Add HTTP status code to FederationClientError (#2699)

Also ensures we wait on more HTTP status codes.
This commit is contained in:
Till 2022-09-07 16:14:09 +02:00 committed by GitHub
parent 7e8c605f98
commit 0d697f6754
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 27 additions and 12 deletions

View file

@ -5,9 +5,10 @@ import (
"fmt" "fmt"
"time" "time"
"github.com/matrix-org/dendrite/federationapi/types"
"github.com/matrix-org/gomatrix" "github.com/matrix-org/gomatrix"
"github.com/matrix-org/gomatrixserverlib" "github.com/matrix-org/gomatrixserverlib"
"github.com/matrix-org/dendrite/federationapi/types"
) )
// FederationInternalAPI is used to query information from the federation sender. // FederationInternalAPI is used to query information from the federation sender.
@ -108,6 +109,7 @@ type FederationClientError struct {
Err string Err string
RetryAfter time.Duration RetryAfter time.Duration
Blacklisted bool Blacklisted bool
Code int // HTTP Status code from the remote server
} }
func (e FederationClientError) Error() string { func (e FederationClientError) Error() string {

View file

@ -6,10 +6,12 @@ import (
"net/http" "net/http"
"github.com/gorilla/mux" "github.com/gorilla/mux"
"github.com/matrix-org/dendrite/federationapi/api" "github.com/matrix-org/gomatrix"
"github.com/matrix-org/dendrite/internal/httputil"
"github.com/matrix-org/gomatrixserverlib" "github.com/matrix-org/gomatrixserverlib"
"github.com/matrix-org/util" "github.com/matrix-org/util"
"github.com/matrix-org/dendrite/federationapi/api"
"github.com/matrix-org/dendrite/internal/httputil"
) )
// AddRoutes adds the FederationInternalAPI handlers to the http.ServeMux. // AddRoutes adds the FederationInternalAPI handlers to the http.ServeMux.
@ -229,6 +231,10 @@ func federationClientError(err error) error {
return &ferr return &ferr
case *api.FederationClientError: case *api.FederationClientError:
return ferr return ferr
case gomatrix.HTTPError:
return &api.FederationClientError{
Code: ferr.Code,
}
default: default:
return &api.FederationClientError{ return &api.FederationClientError{
Err: err.Error(), Err: err.Error(),

View file

@ -407,10 +407,13 @@ userLoop:
waitTime = e.RetryAfter waitTime = e.RetryAfter
} else if e.Blacklisted { } else if e.Blacklisted {
waitTime = time.Hour * 8 waitTime = time.Hour * 8
} else { break userLoop
} else if e.Code >= 300 {
// We didn't get a real FederationClientError (e.g. in polylith mode, where gomatrix.HTTPError
// are "converted" to FederationClientError), but we probably shouldn't hit them every $waitTime seconds.
waitTime = time.Hour waitTime = time.Hour
break userLoop
} }
break userLoop
case net.Error: case net.Error:
// Use the default waitTime, if it's a timeout. // Use the default waitTime, if it's a timeout.
// It probably doesn't make sense to try further users. // It probably doesn't make sense to try further users.
@ -420,9 +423,10 @@ userLoop:
break userLoop break userLoop
} }
case gomatrix.HTTPError: case gomatrix.HTTPError:
// The remote server returned an error, give it some time to recover // The remote server returned an error, give it some time to recover.
if e.Code >= 500 { // This is to avoid spamming remote servers, which may not be Matrix servers anymore.
waitTime = time.Minute * 10 if e.Code >= 300 {
waitTime = time.Hour
logrus.WithError(e).Error("GetUserDevices returned gomatrix.HTTPError") logrus.WithError(e).Error("GetUserDevices returned gomatrix.HTTPError")
break userLoop break userLoop
} }
@ -459,9 +463,10 @@ userLoop:
} }
if failCount > 0 { if failCount > 0 {
logger.WithFields(logrus.Fields{ logger.WithFields(logrus.Fields{
"total": len(userIDs), "total": len(userIDs),
"failed": failCount, "failed": failCount,
"skipped": len(userIDs) - failCount, "skipped": len(userIDs) - failCount,
"waittime": waitTime,
}).Warn("Failed to query device keys for some users") }).Warn("Failed to query device keys for some users")
} }
for _, userID := range userIDs { for _, userID := range userIDs {

View file

@ -49,3 +49,6 @@ Notifications can be viewed with GET /notifications
If remote user leaves room we no longer receive device updates If remote user leaves room we no longer receive device updates
Guest users can join guest_access rooms Guest users can join guest_access rooms
# This will fail in HTTP API mode, so blacklisted for now
If a device list update goes missing, the server resyncs on the next one

View file

@ -742,4 +742,3 @@ User in private room doesn't appear in user directory
User joining then leaving public room appears and dissappears from directory User joining then leaving public room appears and dissappears from directory
User in remote room doesn't appear in user directory after server left room User in remote room doesn't appear in user directory after server left room
User in shared private room does appear in user directory until leave User in shared private room does appear in user directory until leave
If a device list update goes missing, the server resyncs on the next one