From c08c7405dbe9d88c1364f6f1f2466db5045506cc Mon Sep 17 00:00:00 2001 From: Till <2353100+S7evinK@users.noreply.github.com> Date: Fri, 7 Jul 2023 13:09:39 +0200 Subject: [PATCH 1/3] Prepare statement on an existing transaction (#3144) This should fix an issue with the database being locked for SQLite. --- internal/sqlutil/migrate.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/internal/sqlutil/migrate.go b/internal/sqlutil/migrate.go index a66a75826..735fb4927 100644 --- a/internal/sqlutil/migrate.go +++ b/internal/sqlutil/migrate.go @@ -112,7 +112,13 @@ func (m *Migrator) Up(ctx context.Context) error { func (m *Migrator) insertMigration(ctx context.Context, txn *sql.Tx, migrationName string) error { if m.insertStmt == nil { - stmt, err := m.db.Prepare(insertVersionSQL) + var stmt *sql.Stmt + var err error + if txn == nil { + stmt, err = m.db.PrepareContext(ctx, insertVersionSQL) + } else { + stmt, err = txn.PrepareContext(ctx, insertVersionSQL) + } if err != nil { return fmt.Errorf("unable to prepare insert statement: %w", err) } From e93bdd56fd2c155eaf577e337e565f2054408fd4 Mon Sep 17 00:00:00 2001 From: Neil Date: Fri, 7 Jul 2023 18:59:34 +0100 Subject: [PATCH 2/3] Set max age for roomserver input stream to avoid excessive interior deletes (#3145) If old messages build up in the input stream and do not get processed successfully, this can create a significant drift between the stream first sequence and the consumer ack floors, which results in a slow and expensive start-up when interest-based retention is in use. If a message is sat in the stream for 24 hours, it's probably not going to get processed successfully, so let NATS drop them instead. Dendrite can reconcile by fetching missing events later if it needs to. --------- Co-authored-by: Neil Alexander --- setup/jetstream/nats.go | 23 ++++++++++++++++++++--- setup/jetstream/streams.go | 1 + 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/setup/jetstream/nats.go b/setup/jetstream/nats.go index e440879c0..8820e86b2 100644 --- a/setup/jetstream/nats.go +++ b/setup/jetstream/nats.go @@ -87,6 +87,7 @@ func (s *NATSInstance) Prepare(process *process.ProcessContext, cfg *config.JetS return js, nc } +// nolint:gocyclo func setupNATS(process *process.ProcessContext, cfg *config.JetStream, nc *natsclient.Conn) (natsclient.JetStreamContext, *natsclient.Conn) { if nc == nil { var err error @@ -126,16 +127,32 @@ func setupNATS(process *process.ProcessContext, cfg *config.JetStream, nc *natsc subjects = []string{name, name + ".>"} } if info != nil { + // If the stream config doesn't match what we expect, try to update + // it. If that doesn't work then try to blow it away and we'll then + // recreate it in the next section. + // Each specific option that we set must be checked by hand, as if + // you DeepEqual the whole config struct, it will always show that + // there's a difference because the NATS Server will return defaults + // in the stream info. switch { case !reflect.DeepEqual(info.Config.Subjects, subjects): fallthrough case info.Config.Retention != stream.Retention: fallthrough case info.Config.Storage != stream.Storage: - if err = s.DeleteStream(name); err != nil { - logrus.WithError(err).Fatal("Unable to delete stream") + fallthrough + case info.Config.MaxAge != stream.MaxAge: + // Try updating the stream first, as many things can be updated + // non-destructively. + if info, err = s.UpdateStream(stream); err != nil { + logrus.WithError(err).Warnf("Unable to update stream %q, recreating...", name) + // We failed to update the stream, this is a last attempt to get + // things working but may result in data loss. + if err = s.DeleteStream(name); err != nil { + logrus.WithError(err).Fatalf("Unable to delete stream %q", name) + } + info = nil } - info = nil } } if info == nil { diff --git a/setup/jetstream/streams.go b/setup/jetstream/streams.go index 590f0cbd9..741407926 100644 --- a/setup/jetstream/streams.go +++ b/setup/jetstream/streams.go @@ -48,6 +48,7 @@ var streams = []*nats.StreamConfig{ Name: InputRoomEvent, Retention: nats.InterestPolicy, Storage: nats.FileStorage, + MaxAge: time.Hour * 24, }, { Name: InputDeviceListUpdate, From eb9e90379d9f19b1b4192248cbf4931874324857 Mon Sep 17 00:00:00 2001 From: Till <2353100+S7evinK@users.noreply.github.com> Date: Fri, 7 Jul 2023 20:37:23 +0200 Subject: [PATCH 3/3] Add event size checks similar to Synapse (#3140) Companion to https://github.com/matrix-org/gomatrixserverlib/pull/400 This tries to mimic the logic found in Synapse, as dropping events can break rooms (and we may end up in endless loops..) --- go.mod | 2 +- go.sum | 8 ++- roomserver/internal/input/input_events.go | 15 +++++ roomserver/internal/input/input_missing.go | 71 ++++++++++++++++++---- sytest-blacklist | 1 + 5 files changed, 82 insertions(+), 15 deletions(-) diff --git a/go.mod b/go.mod index a15d2a60f..f36d68a23 100644 --- a/go.mod +++ b/go.mod @@ -22,7 +22,7 @@ require ( github.com/matrix-org/dugong v0.0.0-20210921133753-66e6b1c67e2e github.com/matrix-org/go-sqlite3-js v0.0.0-20220419092513-28aa791a1c91 github.com/matrix-org/gomatrix v0.0.0-20220926102614-ceba4d9f7530 - github.com/matrix-org/gomatrixserverlib v0.0.0-20230706145103-ad3d32b89246 + github.com/matrix-org/gomatrixserverlib v0.0.0-20230707180038-35f734f7406a github.com/matrix-org/pinecone v0.11.1-0.20230210171230-8c3b24f2649a github.com/matrix-org/util v0.0.0-20221111132719-399730281e66 github.com/mattn/go-sqlite3 v1.14.17 diff --git a/go.sum b/go.sum index 72b675f7e..c499c5e34 100644 --- a/go.sum +++ b/go.sum @@ -113,6 +113,7 @@ github.com/glycerine/go-unsnap-stream v0.0.0-20180323001048-9f0cb55181dd/go.mod github.com/glycerine/goconvey v0.0.0-20180728074245-46e3a41ad493/go.mod h1:Ogl1Tioa0aV7gstGFO7KhffUsb9M4ydbEbbxpcEDc24= github.com/go-errors/errors v1.4.2 h1:J6MZopCL4uSllY1OfXM374weqZFFItUbrImctkmUxIA= github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= +github.com/go-logfmt/logfmt v0.5.1/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs= github.com/go-playground/assert/v2 v2.0.1/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= github.com/go-playground/locales v0.13.0/go.mod h1:taPMhCMXrRLJO55olJkUXHZBHCxTMfnGwq/HNwmWNS8= github.com/go-playground/locales v0.14.0 h1:u50s323jtVGugKlcYeyzC0etD1HifMjqmJqb8WugfUU= @@ -175,12 +176,14 @@ github.com/h2non/filetype v1.1.3/go.mod h1:319b3zT68BvV+WRj7cwy856M2ehB3HqNOt6sy github.com/h2non/parth v0.0.0-20190131123155-b4df798d6542 h1:2VTzZjLZBgl62/EtslCrtky5vbi9dd7HrQPQIx6wqiw= github.com/huandu/xstrings v1.0.0 h1:pO2K/gKgKaat5LdpAhxhluX2GPQMaI3W5FUz/I/UnWk= github.com/huandu/xstrings v1.0.0/go.mod h1:4qWG/gcEcfX4z/mBDHJ++3ReCw9ibxbsNJbcucJdbSo= +github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/jtolds/gls v4.2.1+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= github.com/juju/errors v1.0.0 h1:yiq7kjCLll1BiaRuNY53MGI0+EQ3rF6GB+wvboZDefM= github.com/juju/errors v1.0.0/go.mod h1:B5x9thDqx0wIMH3+aLIMP9HjItInYWObRovoCFM5Qe8= +github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM= github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes= github.com/kardianos/minwinsvc v1.0.2 h1:JmZKFJQrmTGa/WiW+vkJXKmfzdjabuEW4Tirj5lLdR0= github.com/kardianos/minwinsvc v1.0.2/go.mod h1:LUZNYhNmxujx2tR7FbdxqYJ9XDDoCd3MQcl1o//FWl4= @@ -207,8 +210,8 @@ github.com/matrix-org/go-sqlite3-js v0.0.0-20220419092513-28aa791a1c91 h1:s7fexw github.com/matrix-org/go-sqlite3-js v0.0.0-20220419092513-28aa791a1c91/go.mod h1:e+cg2q7C7yE5QnAXgzo512tgFh1RbQLC0+jozuegKgo= github.com/matrix-org/gomatrix v0.0.0-20220926102614-ceba4d9f7530 h1:kHKxCOLcHH8r4Fzarl4+Y3K5hjothkVW5z7T1dUM11U= github.com/matrix-org/gomatrix v0.0.0-20220926102614-ceba4d9f7530/go.mod h1:/gBX06Kw0exX1HrwmoBibFA98yBk/jxKpGVeyQbff+s= -github.com/matrix-org/gomatrixserverlib v0.0.0-20230706145103-ad3d32b89246 h1:gzp7pWLMtU6g39LGch54h+KBzmhKJt6kmJZ+3fIkGvU= -github.com/matrix-org/gomatrixserverlib v0.0.0-20230706145103-ad3d32b89246/go.mod h1:H9V9N3Uqn1bBJqYJNGK1noqtgJTaCEhtTdcH/mp50uU= +github.com/matrix-org/gomatrixserverlib v0.0.0-20230707180038-35f734f7406a h1:PuCOJWHjCEvh4c5UZj2+s3wOWGPL3OJGDrrbEm1UcfU= +github.com/matrix-org/gomatrixserverlib v0.0.0-20230707180038-35f734f7406a/go.mod h1:H9V9N3Uqn1bBJqYJNGK1noqtgJTaCEhtTdcH/mp50uU= github.com/matrix-org/pinecone v0.11.1-0.20230210171230-8c3b24f2649a h1:awrPDf9LEFySxTLKYBMCiObelNx/cBuv/wzllvCCH3A= github.com/matrix-org/pinecone v0.11.1-0.20230210171230-8c3b24f2649a/go.mod h1:HchJX9oKMXaT2xYFs0Ha/6Zs06mxLU8k6F1ODnrGkeQ= github.com/matrix-org/util v0.0.0-20221111132719-399730281e66 h1:6z4KxomXSIGWqhHcfzExgkH3Z3UkIXry4ibJS4Aqz2Y= @@ -241,6 +244,7 @@ github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7P github.com/mschoch/smat v0.0.0-20160514031455-90eadee771ae/go.mod h1:qAyveg+e4CE+eKJXWVjKXM4ck2QobLqTDytGJbLLhJg= github.com/mschoch/smat v0.2.0 h1:8imxQsjDm8yFEAVBe7azKmKSgzSkZXDuKkSq9374khM= github.com/mschoch/smat v0.2.0/go.mod h1:kc9mz7DoBKqDyiRL7VZN8KvXQMWeTaVnttLRXOlotKw= +github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= github.com/nats-io/jwt/v2 v2.4.1 h1:Y35W1dgbbz2SQUYDPCaclXcuqleVmpbRa7646Jf2EX4= github.com/nats-io/jwt/v2 v2.4.1/go.mod h1:24BeQtRwxRV8ruvC4CojXlx/WQ/VjuwlYiH+vu/+ibI= github.com/nats-io/nats-server/v2 v2.9.15 h1:MuwEJheIwpvFgqvbs20W8Ish2azcygjf4Z0liVu2I4c= diff --git a/roomserver/internal/input/input_events.go b/roomserver/internal/input/input_events.go index db3c95502..93f6cc015 100644 --- a/roomserver/internal/input/input_events.go +++ b/roomserver/internal/input/input_events.go @@ -250,6 +250,21 @@ func (r *Inputer) processRoomEvent( // really do anything with the event other than reject it at this point. isRejected = true rejectionErr = fmt.Errorf("missingState.processEventWithMissingState: %w", err) + switch e := err.(type) { + case gomatrixserverlib.EventValidationError: + if e.Persistable && stateSnapshot != nil { + // We retrieved some state and we ended up having to call /state_ids for + // the new event in question (probably because closing the gap by using + // /get_missing_events didn't do what we hoped) so we'll instead overwrite + // the state snapshot with the newly resolved state. + missingPrev = false + input.HasState = true + input.StateEventIDs = make([]string, 0, len(stateSnapshot.StateEvents)) + for _, se := range stateSnapshot.StateEvents { + input.StateEventIDs = append(input.StateEventIDs, se.EventID()) + } + } + } } else if stateSnapshot != nil { // We retrieved some state and we ended up having to call /state_ids for // the new event in question (probably because closing the gap by using diff --git a/roomserver/internal/input/input_missing.go b/roomserver/internal/input/input_missing.go index 7ee84e4c0..5b4c0727b 100644 --- a/roomserver/internal/input/input_missing.go +++ b/roomserver/internal/input/input_missing.go @@ -259,12 +259,20 @@ func (t *missingStateReq) lookupResolvedStateBeforeEvent(ctx context.Context, e // Therefore, we cannot just query /state_ids with this event to get the state before. Instead, we need to query // the state AFTER all the prev_events for this event, then apply state resolution to that to get the state before the event. var states []*respState + var validationError error for _, prevEventID := range e.PrevEventIDs() { // Look up what the state is after the backward extremity. This will either // come from the roomserver, if we know all the required events, or it will // come from a remote server via /state_ids if not. prevState, trustworthy, err := t.lookupStateAfterEvent(ctx, roomVersion, e.RoomID(), prevEventID) - if err != nil { + switch err2 := err.(type) { + case gomatrixserverlib.EventValidationError: + if !err2.Persistable { + return nil, err2 + } + validationError = err2 + case nil: + default: return nil, fmt.Errorf("t.lookupStateAfterEvent: %w", err) } // Append the state onto the collected state. We'll run this through the @@ -311,12 +319,19 @@ func (t *missingStateReq) lookupResolvedStateBeforeEvent(ctx context.Context, e t.roomsMu.Lock(e.RoomID()) resolvedState, err = t.resolveStatesAndCheck(ctx, roomVersion, respStates, e) t.roomsMu.Unlock(e.RoomID()) - if err != nil { + switch err2 := err.(type) { + case gomatrixserverlib.EventValidationError: + if !err2.Persistable { + return nil, err2 + } + validationError = err2 + case nil: + default: return nil, fmt.Errorf("t.resolveStatesAndCheck: %w", err) } } - return resolvedState, nil + return resolvedState, validationError } // lookupStateAfterEvent returns the room state after `eventID`, which is the state before eventID with the state of `eventID` (if it's a state event) @@ -339,8 +354,15 @@ func (t *missingStateReq) lookupStateAfterEvent(ctx context.Context, roomVersion } // fetch the event we're missing and add it to the pile + var validationError error h, err := t.lookupEvent(ctx, roomVersion, roomID, eventID, false) - switch err.(type) { + switch e := err.(type) { + case gomatrixserverlib.EventValidationError: + if !e.Persistable { + logrus.WithContext(ctx).WithError(err).Errorf("Failed to look up event %s", eventID) + return nil, false, e + } + validationError = e case verifySigError: return respState, false, nil case nil: @@ -365,7 +387,7 @@ func (t *missingStateReq) lookupStateAfterEvent(ctx context.Context, roomVersion } } - return respState, false, nil + return respState, false, validationError } func (t *missingStateReq) cacheAndReturn(ev gomatrixserverlib.PDU) gomatrixserverlib.PDU { @@ -481,6 +503,7 @@ func (t *missingStateReq) resolveStatesAndCheck(ctx context.Context, roomVersion return nil, err } // apply the current event + var validationError error retryAllowedState: if err = checkAllowedByState(backwardsExtremity, resolvedStateEvents, func(roomID spec.RoomID, senderID spec.SenderID) (*spec.UserID, error) { return t.inputer.Queryer.QueryUserIDForSender(ctx, roomID, senderID) @@ -488,7 +511,12 @@ retryAllowedState: switch missing := err.(type) { case gomatrixserverlib.MissingAuthEventError: h, err2 := t.lookupEvent(ctx, roomVersion, backwardsExtremity.RoomID(), missing.AuthEventID, true) - switch err2.(type) { + switch e := err2.(type) { + case gomatrixserverlib.EventValidationError: + if !e.Persistable { + return nil, e + } + validationError = e case verifySigError: return &parsedRespState{ AuthEvents: authEventList, @@ -509,7 +537,7 @@ retryAllowedState: return &parsedRespState{ AuthEvents: authEventList, StateEvents: resolvedStateEvents, - }, nil + }, validationError } // get missing events for `e`. If `isGapFilled`=true then `newEvents` contains all the events to inject, @@ -779,7 +807,11 @@ func (t *missingStateReq) lookupMissingStateViaStateIDs(ctx context.Context, roo // Define what we'll do in order to fetch the missing event ID. fetch := func(missingEventID string) { h, herr := t.lookupEvent(ctx, roomVersion, roomID, missingEventID, false) - switch herr.(type) { + switch e := herr.(type) { + case gomatrixserverlib.EventValidationError: + if !e.Persistable { + return + } case verifySigError: return case nil: @@ -869,6 +901,8 @@ func (t *missingStateReq) lookupEvent(ctx context.Context, roomVersion gomatrixs } var event gomatrixserverlib.PDU found := false + var validationError error +serverLoop: for _, serverName := range t.servers { reqctx, cancel := context.WithTimeout(ctx, time.Second*30) defer cancel() @@ -886,12 +920,25 @@ func (t *missingStateReq) lookupEvent(ctx context.Context, roomVersion gomatrixs continue } event, err = verImpl.NewEventFromUntrustedJSON(txn.PDUs[0]) - if err != nil { + switch e := err.(type) { + case gomatrixserverlib.EventValidationError: + // If the event is persistable, e.g. failed validation for exceeding + // byte sizes, we can "accept" the event. + if e.Persistable { + validationError = e + found = true + break serverLoop + } + // If we can't persist the event, we probably can't do so with results + // from other servers, so also break the loop. + break serverLoop + case nil: + found = true + break serverLoop + default: t.log.WithError(err).WithField("missing_event_id", missingEventID).Warnf("Failed to parse event JSON of event returned from /event") continue } - found = true - break } if !found { t.log.WithField("missing_event_id", missingEventID).Warnf("Failed to get missing /event for event ID from %d server(s)", len(t.servers)) @@ -903,7 +950,7 @@ func (t *missingStateReq) lookupEvent(ctx context.Context, roomVersion gomatrixs t.log.WithError(err).Warnf("Couldn't validate signature of event %q from /event", event.EventID()) return nil, verifySigError{event.EventID(), err} } - return t.cacheAndReturn(event), nil + return t.cacheAndReturn(event), validationError } func checkAllowedByState(e gomatrixserverlib.PDU, stateEvents []gomatrixserverlib.PDU, userIDForSender spec.UserIDForSender) error { diff --git a/sytest-blacklist b/sytest-blacklist index 49a3cc870..d6fadc7e1 100644 --- a/sytest-blacklist +++ b/sytest-blacklist @@ -8,6 +8,7 @@ Events in rooms with AS-hosted room aliases are sent to AS server Inviting an AS-hosted user asks the AS server Accesing an AS-hosted room alias asks the AS server If user leaves room, remote user changes device and rejoins we see update in /sync and /keys/changes +New federated private chats get full presence information (SYN-115) # This will fail in HTTP API mode, so blacklisted for now If a device list update goes missing, the server resyncs on the next one