Enforce device list backoffs (#2653)

This ensures that if the device list updater is already backing off a node, we don't try to call processServer again anyway for server just because the server name arrived in the channel. Otherwise we can keep trying to hit a remote server that is offline or not behaving every second and that spams the logs too.
This commit is contained in:
Neil Alexander 2022-08-19 10:23:09 +01:00 committed by GitHub
parent 365da70a23
commit 5513f182cc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 10 additions and 5 deletions

View file

@ -335,8 +335,9 @@ func (u *DeviceListUpdater) worker(ch chan gomatrixserverlib.ServerName) {
retriesMu := &sync.Mutex{} retriesMu := &sync.Mutex{}
// restarter goroutine which will inject failed servers into ch when it is time // restarter goroutine which will inject failed servers into ch when it is time
go func() { go func() {
for {
var serversToRetry []gomatrixserverlib.ServerName var serversToRetry []gomatrixserverlib.ServerName
for {
serversToRetry = serversToRetry[:0] // reuse memory
time.Sleep(time.Second) time.Sleep(time.Second)
retriesMu.Lock() retriesMu.Lock()
now := time.Now() now := time.Now()
@ -355,11 +356,17 @@ func (u *DeviceListUpdater) worker(ch chan gomatrixserverlib.ServerName) {
} }
}() }()
for serverName := range ch { for serverName := range ch {
retriesMu.Lock()
_, exists := retries[serverName]
retriesMu.Unlock()
if exists {
// Don't retry a server that we're already waiting for.
continue
}
waitTime, shouldRetry := u.processServer(serverName) waitTime, shouldRetry := u.processServer(serverName)
if shouldRetry { if shouldRetry {
retriesMu.Lock() retriesMu.Lock()
_, exists := retries[serverName] if _, exists = retries[serverName]; !exists {
if !exists {
retries[serverName] = time.Now().Add(waitTime) retries[serverName] = time.Now().Add(waitTime)
} }
retriesMu.Unlock() retriesMu.Unlock()

View file

@ -144,7 +144,6 @@ Server correctly handles incoming m.device_list_update
If remote user leaves room, changes device and rejoins we see update in sync If remote user leaves room, changes device and rejoins we see update in sync
If remote user leaves room, changes device and rejoins we see update in /keys/changes If remote user leaves room, changes device and rejoins we see update in /keys/changes
If remote user leaves room we no longer receive device updates If remote user leaves room we no longer receive device updates
If a device list update goes missing, the server resyncs on the next one
Server correctly resyncs when client query keys and there is no remote cache Server correctly resyncs when client query keys and there is no remote cache
Server correctly resyncs when server leaves and rejoins a room Server correctly resyncs when server leaves and rejoins a room
Device list doesn't change if remote server is down Device list doesn't change if remote server is down
@ -633,7 +632,6 @@ Test that rejected pushers are removed.
Trying to add push rule with no scope fails with 400 Trying to add push rule with no scope fails with 400
Trying to add push rule with invalid scope fails with 400 Trying to add push rule with invalid scope fails with 400
Forward extremities remain so even after the next events are populated as outliers Forward extremities remain so even after the next events are populated as outliers
If a device list update goes missing, the server resyncs on the next one
uploading self-signing key notifies over federation uploading self-signing key notifies over federation
uploading signed devices gets propagated over federation uploading signed devices gets propagated over federation
Device list doesn't change if remote server is down Device list doesn't change if remote server is down