diff --git a/federationapi/queue/destinationqueue.go b/federationapi/queue/destinationqueue.go index 51350916d..12e6db9fa 100644 --- a/federationapi/queue/destinationqueue.go +++ b/federationapi/queue/destinationqueue.go @@ -410,34 +410,49 @@ func (oq *destinationQueue) nextTransaction( defer cancel() relayServers := oq.statistics.KnownRelayServers() - if oq.statistics.AssumedOffline() && len(relayServers) > 0 { - sendMethod = statistics.SendViaRelay - relaySuccess := false - logrus.Infof("Sending to relay servers: %v", relayServers) - // TODO : how to pass through actual userID here?!?!?!?! - userID, userErr := gomatrixserverlib.NewUserID("@user:"+string(oq.destination), false) - if userErr != nil { - return userErr, sendMethod - } - - // Attempt sending to each known relay server. - for _, relayServer := range relayServers { - _, relayErr := oq.client.P2PSendTransactionToRelay(ctx, *userID, t, relayServer) - if relayErr != nil { - err = relayErr - } else { - // If sending to one of the relay servers succeeds, consider the send successful. - relaySuccess = true - } - } - - // Clear the error if sending to any of the relay servers succeeded. - if relaySuccess { - err = nil - } - } else { + hasRelayServers := len(relayServers) > 0 + shouldSendToRelays := oq.statistics.AssumedOffline() && hasRelayServers + if !shouldSendToRelays { sendMethod = statistics.SendDirect _, err = oq.client.SendTransaction(ctx, t) + } else { + // Try sending directly to the destination first in case they came back online. + sendMethod = statistics.SendDirect + _, err = oq.client.SendTransaction(ctx, t) + if err != nil { + // The destination is still offline, try sending to relays. + sendMethod = statistics.SendViaRelay + relaySuccess := false + logrus.Infof("Sending %q to relay servers: %v", t.TransactionID, relayServers) + // TODO : how to pass through actual userID here?!?!?!?! + userID, userErr := gomatrixserverlib.NewUserID("@user:"+string(oq.destination), false) + if userErr != nil { + return userErr, sendMethod + } + + // Attempt sending to each known relay server. + for _, relayServer := range relayServers { + _, relayErr := oq.client.P2PSendTransactionToRelay(ctx, *userID, t, relayServer) + if relayErr != nil { + err = relayErr + } else { + // If sending to one of the relay servers succeeds, consider the send successful. + relaySuccess = true + + // TODO : what about if the dest comes back online but can't see their relay? + // How do I sync with the dest in that case? + // Should change the database to have a "relay success" flag on events and if + // I see the node back online, maybe directly send through the backlog of events + // with "relay success"... could lead to duplicate events, but only those that + // I sent. And will lead to a much more consistent experience. + } + } + + // Clear the error if sending to any of the relay servers succeeded. + if relaySuccess { + err = nil + } + } } switch errResponse := err.(type) { case nil: diff --git a/federationapi/statistics/statistics.go b/federationapi/statistics/statistics.go index 866c09336..e29e3b140 100644 --- a/federationapi/statistics/statistics.go +++ b/federationapi/statistics/statistics.go @@ -164,6 +164,8 @@ func (s *ServerStatistics) Success(method SendMethod) { logrus.WithError(err).Errorf("Failed to remove %q from blacklist", s.serverName) } } + + s.removeAssumedOffline() } } diff --git a/setup/config/config_federationapi.go b/setup/config/config_federationapi.go index 6c198018d..cd7d90562 100644 --- a/setup/config/config_federationapi.go +++ b/setup/config/config_federationapi.go @@ -49,7 +49,7 @@ func (c *FederationAPI) Defaults(opts DefaultOpts) { c.Database.Defaults(10) } c.FederationMaxRetries = 16 - c.P2PFederationRetriesUntilAssumedOffline = 2 + c.P2PFederationRetriesUntilAssumedOffline = 1 c.DisableTLSValidation = false c.DisableHTTPKeepalives = false if opts.Generate {