Make 'Device list doesn't change if remote server is down' pass (#1268)
- As a last resort, query the DB when exhausting all possible remote query endpoints, but keep the field in `failures` so clients can detect that this is stale data. - Unblock `DeviceListUpdater.Update` on failures rather than timing out. - Use a mutex when writing directly to `res`, not just for failures.
This commit is contained in:
parent
4c4732a9c9
commit
20c8f252a7
|
@ -342,10 +342,12 @@ func (u *DeviceListUpdater) processServer(serverName gomatrixserverlib.ServerNam
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.WithError(err).WithField("user_id", userID).Error("fetched device list but failed to store/emit it")
|
logger.WithError(err).WithField("user_id", userID).Error("fetched device list but failed to store/emit it")
|
||||||
hasFailures = true
|
hasFailures = true
|
||||||
} else {
|
|
||||||
u.clearChannel(userID)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
for _, userID := range userIDs {
|
||||||
|
// always clear the channel to unblock Update calls regardless of success/failure
|
||||||
|
u.clearChannel(userID)
|
||||||
|
}
|
||||||
return hasFailures
|
return hasFailures
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -318,12 +318,39 @@ func (a *KeyInternalAPI) queryRemoteKeys(
|
||||||
// allows us to wait until all federation servers have been poked
|
// allows us to wait until all federation servers have been poked
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
wg.Add(len(domainToDeviceKeys))
|
wg.Add(len(domainToDeviceKeys))
|
||||||
// mutex for failures
|
// mutex for writing directly to res (e.g failures)
|
||||||
var failMu sync.Mutex
|
var respMu sync.Mutex
|
||||||
|
|
||||||
// fan out
|
// fan out
|
||||||
for domain, deviceKeys := range domainToDeviceKeys {
|
for domain, deviceKeys := range domainToDeviceKeys {
|
||||||
go func(serverName string, devKeys map[string][]string) {
|
go a.queryRemoteKeysOnServer(ctx, domain, deviceKeys, &wg, &respMu, timeout, resultCh, res)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close the result channel when the goroutines have quit so the for .. range exits
|
||||||
|
go func() {
|
||||||
|
wg.Wait()
|
||||||
|
close(resultCh)
|
||||||
|
}()
|
||||||
|
|
||||||
|
for result := range resultCh {
|
||||||
|
for userID, nest := range result.DeviceKeys {
|
||||||
|
res.DeviceKeys[userID] = make(map[string]json.RawMessage)
|
||||||
|
for deviceID, deviceKey := range nest {
|
||||||
|
keyJSON, err := json.Marshal(deviceKey)
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
res.DeviceKeys[userID][deviceID] = keyJSON
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *KeyInternalAPI) queryRemoteKeysOnServer(
|
||||||
|
ctx context.Context, serverName string, devKeys map[string][]string, wg *sync.WaitGroup,
|
||||||
|
respMu *sync.Mutex, timeout time.Duration, resultCh chan<- *gomatrixserverlib.RespQueryKeys,
|
||||||
|
res *api.QueryKeysResponse,
|
||||||
|
) {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
fedCtx, cancel := context.WithTimeout(ctx, timeout)
|
fedCtx, cancel := context.WithTimeout(ctx, timeout)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
@ -351,7 +378,9 @@ func (a *KeyInternalAPI) queryRemoteKeys(
|
||||||
}
|
}
|
||||||
// refresh entries from DB: unlike remoteKeysFromDatabase we know we previously had no device info for this
|
// refresh entries from DB: unlike remoteKeysFromDatabase we know we previously had no device info for this
|
||||||
// user so the fact that we're populating all devices here isn't a problem so long as we have devices.
|
// user so the fact that we're populating all devices here isn't a problem so long as we have devices.
|
||||||
|
respMu.Lock()
|
||||||
err = a.populateResponseWithDeviceKeysFromDatabase(ctx, res, userID, nil)
|
err = a.populateResponseWithDeviceKeysFromDatabase(ctx, res, userID, nil)
|
||||||
|
respMu.Unlock()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logrus.WithFields(logrus.Fields{
|
logrus.WithFields(logrus.Fields{
|
||||||
logrus.ErrorKey: err,
|
logrus.ErrorKey: err,
|
||||||
|
@ -367,36 +396,24 @@ func (a *KeyInternalAPI) queryRemoteKeys(
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
queryKeysResp, err := a.FedClient.QueryKeys(fedCtx, gomatrixserverlib.ServerName(serverName), devKeys)
|
queryKeysResp, err := a.FedClient.QueryKeys(fedCtx, gomatrixserverlib.ServerName(serverName), devKeys)
|
||||||
if err != nil {
|
if err == nil {
|
||||||
failMu.Lock()
|
resultCh <- &queryKeysResp
|
||||||
|
return
|
||||||
|
}
|
||||||
|
respMu.Lock()
|
||||||
res.Failures[serverName] = map[string]interface{}{
|
res.Failures[serverName] = map[string]interface{}{
|
||||||
"message": err.Error(),
|
"message": err.Error(),
|
||||||
}
|
}
|
||||||
failMu.Unlock()
|
|
||||||
return
|
|
||||||
}
|
|
||||||
resultCh <- &queryKeysResp
|
|
||||||
}(domain, deviceKeys)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Close the result channel when the goroutines have quit so the for .. range exits
|
// last ditch, use the cache only. This is good for when clients hit /keys/query and the remote server
|
||||||
go func() {
|
// is down, better to return something than nothing at all. Clients can know about the failure by
|
||||||
wg.Wait()
|
// inspecting the failures map though so they can know it's a cached response.
|
||||||
close(resultCh)
|
for userID, dkeys := range devKeys {
|
||||||
}()
|
// drop the error as it's already a failure at this point
|
||||||
|
_ = a.populateResponseWithDeviceKeysFromDatabase(ctx, res, userID, dkeys)
|
||||||
|
}
|
||||||
|
respMu.Unlock()
|
||||||
|
|
||||||
for result := range resultCh {
|
|
||||||
for userID, nest := range result.DeviceKeys {
|
|
||||||
res.DeviceKeys[userID] = make(map[string]json.RawMessage)
|
|
||||||
for deviceID, deviceKey := range nest {
|
|
||||||
keyJSON, err := json.Marshal(deviceKey)
|
|
||||||
if err != nil {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
res.DeviceKeys[userID][deviceID] = keyJSON
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *KeyInternalAPI) populateResponseWithDeviceKeysFromDatabase(
|
func (a *KeyInternalAPI) populateResponseWithDeviceKeysFromDatabase(
|
||||||
|
|
|
@ -148,6 +148,7 @@ Get left notifs in sync and /keys/changes when other user leaves
|
||||||
Can query remote device keys using POST after notification
|
Can query remote device keys using POST after notification
|
||||||
Server correctly resyncs when client query keys and there is no remote cache
|
Server correctly resyncs when client query keys and there is no remote cache
|
||||||
Server correctly resyncs when server leaves and rejoins a room
|
Server correctly resyncs when server leaves and rejoins a room
|
||||||
|
Device list doesn't change if remote server is down
|
||||||
Can add account data
|
Can add account data
|
||||||
Can add account data to room
|
Can add account data to room
|
||||||
Can get account data without syncing
|
Can get account data without syncing
|
||||||
|
|
Loading…
Reference in a new issue