Fix issue with stale device lists (#2702)

We were only sending the last entry to the worker, so most likely missed
updates.
This commit is contained in:
Till 2022-09-08 12:03:44 +02:00 committed by GitHub
parent d5876abbe9
commit 42a82091a8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 22 additions and 4 deletions

View file

@ -4,6 +4,7 @@ import (
"context"
"encoding/json"
"net/http"
"net/url"
"github.com/gorilla/mux"
"github.com/matrix-org/gomatrix"
@ -235,9 +236,17 @@ func federationClientError(err error) error {
return &api.FederationClientError{
Code: ferr.Code,
}
case *url.Error: // e.g. certificate error, unable to connect
return &api.FederationClientError{
Err: ferr.Error(),
Code: 400,
}
default:
// We don't know what exactly failed, but we probably don't
// want to retry the request immediately in the device list updater
return &api.FederationClientError{
Err: err.Error(),
Code: 400,
}
}
}

View file

@ -167,6 +167,7 @@ func (u *DeviceListUpdater) Start() error {
step = (time.Second * 120) / time.Duration(max)
}
for _, userID := range staleLists {
userID := userID // otherwise we are only sending the last entry
time.AfterFunc(offset, func() {
u.notifyWorkers(userID)
})
@ -396,11 +397,19 @@ userLoop:
if ctx.Err() != nil {
// we've timed out, give up and go to the back of the queue to let another server be processed.
failCount += 1
waitTime = time.Minute * 10
break
}
res, err := u.fedClient.GetUserDevices(ctx, serverName, userID)
if err != nil {
failCount += 1
select {
case <-ctx.Done():
// we've timed out, give up and go to the back of the queue to let another server be processed.
waitTime = time.Minute * 10
break userLoop
default:
}
switch e := err.(type) {
case *fedsenderapi.FederationClientError:
if e.RetryAfter > 0 {
@ -419,7 +428,7 @@ userLoop:
// It probably doesn't make sense to try further users.
if !e.Timeout() {
waitTime = time.Minute * 10
logrus.WithError(e).Error("GetUserDevices returned net.Error")
logger.WithError(e).Error("GetUserDevices returned net.Error")
break userLoop
}
case gomatrix.HTTPError:
@ -427,7 +436,7 @@ userLoop:
// This is to avoid spamming remote servers, which may not be Matrix servers anymore.
if e.Code >= 300 {
waitTime = time.Hour
logrus.WithError(e).Error("GetUserDevices returned gomatrix.HTTPError")
logger.WithError(e).Error("GetUserDevices returned gomatrix.HTTPError")
break userLoop
}
default: