From 7ec70272d28c6b37f8a412dc577ff27da398d1df Mon Sep 17 00:00:00 2001 From: Till <2353100+S7evinK@users.noreply.github.com> Date: Tue, 2 Aug 2022 13:58:08 +0200 Subject: [PATCH 01/12] Disable NATS Server logging, allow self-signed certificates (#2605) * Disable NATS Server logs in CI * Add option to disable TLS validation for NATS --- dendrite-sample.monolith.yaml | 5 +++++ dendrite-sample.polylith.yaml | 5 +++++ setup/config/config_jetstream.go | 6 ++++++ setup/jetstream/nats.go | 10 +++++++++- 4 files changed, 25 insertions(+), 1 deletion(-) diff --git a/dendrite-sample.monolith.yaml b/dendrite-sample.monolith.yaml index cc6c173e8..a34b8af55 100644 --- a/dendrite-sample.monolith.yaml +++ b/dendrite-sample.monolith.yaml @@ -113,6 +113,11 @@ global: addresses: # - localhost:4222 + # Disable the validation of TLS certificates of NATS. This is + # not recommended in production since it may allow NATS traffic + # to be sent to an insecure endpoint. + disable_tls_validation: false + # Persistent directory to store JetStream streams in. This directory should be # preserved across Dendrite restarts. storage_path: ./ diff --git a/dendrite-sample.polylith.yaml b/dendrite-sample.polylith.yaml index 92cab19b1..550611229 100644 --- a/dendrite-sample.polylith.yaml +++ b/dendrite-sample.polylith.yaml @@ -103,6 +103,11 @@ global: addresses: - hostname:4222 + # Disable the validation of TLS certificates of NATS. This is + # not recommended in production since it may allow NATS traffic + # to be sent to an insecure endpoint. + disable_tls_validation: false + # The prefix to use for stream names for this homeserver - really only useful # if you are running more than one Dendrite server on the same NATS deployment. topic_prefix: Dendrite diff --git a/setup/config/config_jetstream.go b/setup/config/config_jetstream.go index e4cfd4d3b..a7827597e 100644 --- a/setup/config/config_jetstream.go +++ b/setup/config/config_jetstream.go @@ -17,6 +17,10 @@ type JetStream struct { TopicPrefix string `yaml:"topic_prefix"` // Keep all storage in memory. This is mostly useful for unit tests. InMemory bool `yaml:"in_memory"` + // Disable logging. This is mostly useful for unit tests. + NoLog bool `yaml:"-"` + // Disables TLS validation. This should NOT be used in production + DisableTLSValidation bool `yaml:"disable_tls_validation"` } func (c *JetStream) Prefixed(name string) string { @@ -32,6 +36,8 @@ func (c *JetStream) Defaults(generate bool) { c.TopicPrefix = "Dendrite" if generate { c.StoragePath = Path("./") + c.NoLog = true + c.DisableTLSValidation = true } } diff --git a/setup/jetstream/nats.go b/setup/jetstream/nats.go index 248b0e656..be216a02a 100644 --- a/setup/jetstream/nats.go +++ b/setup/jetstream/nats.go @@ -1,6 +1,7 @@ package jetstream import ( + "crypto/tls" "fmt" "reflect" "strings" @@ -45,6 +46,7 @@ func (s *NATSInstance) Prepare(process *process.ProcessContext, cfg *config.JetS NoSystemAccount: true, MaxPayload: 16 * 1024 * 1024, NoSigs: true, + NoLog: cfg.NoLog, }) if err != nil { panic(err) @@ -75,7 +77,13 @@ func (s *NATSInstance) Prepare(process *process.ProcessContext, cfg *config.JetS func setupNATS(process *process.ProcessContext, cfg *config.JetStream, nc *natsclient.Conn) (natsclient.JetStreamContext, *natsclient.Conn) { if nc == nil { var err error - nc, err = natsclient.Connect(strings.Join(cfg.Addresses, ",")) + opts := []nats.Option{} + if cfg.DisableTLSValidation { + opts = append(opts, nats.Secure(&tls.Config{ + InsecureSkipVerify: true, + })) + } + nc, err = natsclient.Connect(strings.Join(cfg.Addresses, ","), opts...) if err != nil { logrus.WithError(err).Panic("Unable to connect to NATS") return nil, nil From f4345dafdecacb655ab3985871f28100ececb302 Mon Sep 17 00:00:00 2001 From: Neil Alexander Date: Tue, 2 Aug 2022 13:01:03 +0100 Subject: [PATCH 02/12] Fix data race in `lookupMissingStateViaStateIDs` --- roomserver/internal/input/input_missing.go | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/roomserver/internal/input/input_missing.go b/roomserver/internal/input/input_missing.go index 1fe25e38a..c78e5d79a 100644 --- a/roomserver/internal/input/input_missing.go +++ b/roomserver/internal/input/input_missing.go @@ -750,9 +750,8 @@ func (t *missingStateReq) lookupMissingStateViaStateIDs(ctx context.Context, roo // Define what we'll do in order to fetch the missing event ID. fetch := func(missingEventID string) { - var h *gomatrixserverlib.Event - h, err = t.lookupEvent(ctx, roomVersion, roomID, missingEventID, false) - switch err.(type) { + h, herr := t.lookupEvent(ctx, roomVersion, roomID, missingEventID, false) + switch herr.(type) { case verifySigError: return case nil: @@ -761,7 +760,7 @@ func (t *missingStateReq) lookupMissingStateViaStateIDs(ctx context.Context, roo util.GetLogger(ctx).WithFields(logrus.Fields{ "event_id": missingEventID, "room_id": roomID, - }).Warn("Failed to fetch missing event") + }).WithError(herr).Warn("Failed to fetch missing event") return } haveEventsMutex.Lock() From e384eb683f158950956c8e003b33491ad0905b83 Mon Sep 17 00:00:00 2001 From: Neil Alexander Date: Tue, 2 Aug 2022 15:33:28 +0100 Subject: [PATCH 03/12] Disable flakey test --- sytest-blacklist | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sytest-blacklist b/sytest-blacklist index bcc345f6e..e0b2767b1 100644 --- a/sytest-blacklist +++ b/sytest-blacklist @@ -49,3 +49,7 @@ Notifications can be viewed with GET /notifications If remote user leaves room we no longer receive device updates Guest users can join guest_access rooms + +# You'll be shocked to discover this is flakey too + +Inbound /v1/send_join rejects joins from other servers From df5d4dc7a36f7fe5ec17f9da81c535d5c01bd505 Mon Sep 17 00:00:00 2001 From: Till <2353100+S7evinK@users.noreply.github.com> Date: Tue, 2 Aug 2022 17:00:16 +0200 Subject: [PATCH 04/12] Delete correct Send-to-Device messages (#2608) * Add send-to-device tests * Update tests, fix message deletion * PR comments --- .../storage/postgres/send_to_device_table.go | 2 +- .../storage/sqlite3/send_to_device_table.go | 2 +- syncapi/storage/storage_test.go | 23 +-- syncapi/syncapi_test.go | 136 ++++++++++++++++++ test/testrig/base.go | 3 +- 5 files changed, 145 insertions(+), 21 deletions(-) diff --git a/syncapi/storage/postgres/send_to_device_table.go b/syncapi/storage/postgres/send_to_device_table.go index 2734fef3e..fd0c1c56b 100644 --- a/syncapi/storage/postgres/send_to_device_table.go +++ b/syncapi/storage/postgres/send_to_device_table.go @@ -58,7 +58,7 @@ const selectSendToDeviceMessagesSQL = ` const deleteSendToDeviceMessagesSQL = ` DELETE FROM syncapi_send_to_device - WHERE user_id = $1 AND device_id = $2 AND id < $3 + WHERE user_id = $1 AND device_id = $2 AND id <= $3 ` const selectMaxSendToDeviceIDSQL = "" + diff --git a/syncapi/storage/sqlite3/send_to_device_table.go b/syncapi/storage/sqlite3/send_to_device_table.go index d05d3fe72..e3aa1b7a1 100644 --- a/syncapi/storage/sqlite3/send_to_device_table.go +++ b/syncapi/storage/sqlite3/send_to_device_table.go @@ -55,7 +55,7 @@ const selectSendToDeviceMessagesSQL = ` const deleteSendToDeviceMessagesSQL = ` DELETE FROM syncapi_send_to_device - WHERE user_id = $1 AND device_id = $2 AND id < $3 + WHERE user_id = $1 AND device_id = $2 AND id <= $3 ` const selectMaxSendToDeviceIDSQL = "" + diff --git a/syncapi/storage/storage_test.go b/syncapi/storage/storage_test.go index df03a33c2..eda5ef3e6 100644 --- a/syncapi/storage/storage_test.go +++ b/syncapi/storage/storage_test.go @@ -416,11 +416,6 @@ func TestSendToDeviceBehaviour(t *testing.T) { t.Fatal("first call should have no updates") } - err = db.CleanSendToDeviceUpdates(context.Background(), alice.ID, deviceID, 100) - if err != nil { - return - } - // Try sending a message. streamPos, err := db.StoreNewSendForDeviceMessage(ctx, alice.ID, deviceID, gomatrixserverlib.SendToDeviceEvent{ Sender: bob.ID, @@ -441,43 +436,35 @@ func TestSendToDeviceBehaviour(t *testing.T) { if count := len(events); count != 1 { t.Fatalf("second call should have one update, got %d", count) } - err = db.CleanSendToDeviceUpdates(context.Background(), alice.ID, deviceID, streamPos) - if err != nil { - return - } // At this point we should still have one message because we haven't progressed the // sync position yet. This is equivalent to the client failing to /sync and retrying // with the same position. - streamPos, events, err = db.SendToDeviceUpdatesForSync(ctx, alice.ID, deviceID, 0, 100) + streamPos, events, err = db.SendToDeviceUpdatesForSync(ctx, alice.ID, deviceID, 0, streamPos) if err != nil { t.Fatal(err) } if len(events) != 1 { t.Fatal("third call should have one update still") } - err = db.CleanSendToDeviceUpdates(context.Background(), alice.ID, deviceID, streamPos+1) + err = db.CleanSendToDeviceUpdates(context.Background(), alice.ID, deviceID, streamPos) if err != nil { return } // At this point we should now have no updates, because we've progressed the sync // position. Therefore the update from before will not be sent again. - _, events, err = db.SendToDeviceUpdatesForSync(ctx, alice.ID, deviceID, streamPos+1, streamPos+2) + _, events, err = db.SendToDeviceUpdatesForSync(ctx, alice.ID, deviceID, streamPos, streamPos+10) if err != nil { t.Fatal(err) } if len(events) != 0 { t.Fatal("fourth call should have no updates") } - err = db.CleanSendToDeviceUpdates(context.Background(), alice.ID, deviceID, streamPos+1) - if err != nil { - return - } // At this point we should still have no updates, because no new updates have been // sent. - _, events, err = db.SendToDeviceUpdatesForSync(ctx, alice.ID, deviceID, streamPos, streamPos+2) + _, events, err = db.SendToDeviceUpdatesForSync(ctx, alice.ID, deviceID, streamPos, streamPos+10) if err != nil { t.Fatal(err) } @@ -491,7 +478,7 @@ func TestSendToDeviceBehaviour(t *testing.T) { streamPos, err = db.StoreNewSendForDeviceMessage(ctx, alice.ID, deviceID, gomatrixserverlib.SendToDeviceEvent{ Sender: bob.ID, Type: "m.type", - Content: json.RawMessage(fmt.Sprintf(`{ "count": %d }`, i)), + Content: json.RawMessage(fmt.Sprintf(`{"count":%d}`, i)), }) if err != nil { t.Fatal(err) diff --git a/syncapi/syncapi_test.go b/syncapi/syncapi_test.go index 3ce7c64b7..b10864ff5 100644 --- a/syncapi/syncapi_test.go +++ b/syncapi/syncapi_test.go @@ -3,11 +3,14 @@ package syncapi import ( "context" "encoding/json" + "fmt" "net/http" "net/http/httptest" + "reflect" "testing" "time" + "github.com/matrix-org/dendrite/clientapi/producers" keyapi "github.com/matrix-org/dendrite/keyserver/api" "github.com/matrix-org/dendrite/roomserver/api" rsapi "github.com/matrix-org/dendrite/roomserver/api" @@ -311,6 +314,139 @@ func testSyncAPIUpdatePresenceImmediately(t *testing.T, dbType test.DBType) { } +func TestSendToDevice(t *testing.T) { + test.WithAllDatabases(t, testSendToDevice) +} + +func testSendToDevice(t *testing.T, dbType test.DBType) { + user := test.NewUser(t) + alice := userapi.Device{ + ID: "ALICEID", + UserID: user.ID, + AccessToken: "ALICE_BEARER_TOKEN", + DisplayName: "Alice", + AccountType: userapi.AccountTypeUser, + } + + base, close := testrig.CreateBaseDendrite(t, dbType) + defer close() + + jsctx, _ := base.NATS.Prepare(base.ProcessContext, &base.Cfg.Global.JetStream) + defer jetstream.DeleteAllStreams(jsctx, &base.Cfg.Global.JetStream) + + AddPublicRoutes(base, &syncUserAPI{accounts: []userapi.Device{alice}}, &syncRoomserverAPI{}, &syncKeyAPI{}) + + producer := producers.SyncAPIProducer{ + TopicSendToDeviceEvent: base.Cfg.Global.JetStream.Prefixed(jetstream.OutputSendToDeviceEvent), + JetStream: jsctx, + } + + msgCounter := 0 + + testCases := []struct { + name string + since string + want []string + sendMessagesCount int + }{ + { + name: "initial sync, no messages", + want: []string{}, + }, + { + name: "initial sync, one new message", + sendMessagesCount: 1, + want: []string{ + "message 1", + }, + }, + { + name: "initial sync, two new messages", // we didn't advance the since token, so we'll receive two messages + sendMessagesCount: 1, + want: []string{ + "message 1", + "message 2", + }, + }, + { + name: "incremental sync, one message", // this deletes message 1, as we advanced the since token + since: types.StreamingToken{SendToDevicePosition: 1}.String(), + want: []string{ + "message 2", + }, + }, + { + name: "failed incremental sync, one message", // didn't advance since, so still the same message + since: types.StreamingToken{SendToDevicePosition: 1}.String(), + want: []string{ + "message 2", + }, + }, + { + name: "incremental sync, no message", // this should delete message 2 + since: types.StreamingToken{SendToDevicePosition: 2}.String(), // next_batch from previous sync + want: []string{}, + }, + { + name: "incremental sync, three new messages", + since: types.StreamingToken{SendToDevicePosition: 2}.String(), + sendMessagesCount: 3, + want: []string{ + "message 3", // message 2 was deleted in the previous test + "message 4", + "message 5", + }, + }, + { + name: "initial sync, three messages", // we expect three messages, as we didn't go beyond "2" + want: []string{ + "message 3", + "message 4", + "message 5", + }, + }, + { + name: "incremental sync, no messages", // advance the sync token, no new messages + since: types.StreamingToken{SendToDevicePosition: 5}.String(), + want: []string{}, + }, + } + + ctx := context.Background() + for _, tc := range testCases { + // Send to-device messages of type "m.dendrite.test" with content `{"dummy":"message $counter"}` + for i := 0; i < tc.sendMessagesCount; i++ { + msgCounter++ + msg := map[string]string{ + "dummy": fmt.Sprintf("message %d", msgCounter), + } + if err := producer.SendToDevice(ctx, user.ID, user.ID, alice.ID, "m.dendrite.test", msg); err != nil { + t.Fatalf("unable to send to device message: %v", err) + } + } + time.Sleep((time.Millisecond * 15) * time.Duration(tc.sendMessagesCount)) // wait a bit, so the messages can be processed + // Execute a /sync request, recording the response + w := httptest.NewRecorder() + base.PublicClientAPIMux.ServeHTTP(w, test.NewRequest(t, "GET", "/_matrix/client/v3/sync", test.WithQueryParams(map[string]string{ + "access_token": alice.AccessToken, + "since": tc.since, + }))) + + // Extract the to_device.events, # gets all values of an array, in this case a string slice with "message $counter" entries + events := gjson.Get(w.Body.String(), "to_device.events.#.content.dummy").Array() + got := make([]string, len(events)) + for i := range events { + got[i] = events[i].String() + } + + // Ensure the messages we received are as we expect them to be + if !reflect.DeepEqual(got, tc.want) { + t.Logf("[%s|since=%s]: Sync: %s", tc.name, tc.since, w.Body.String()) + t.Fatalf("[%s|since=%s]: got: %+v, want: %+v", tc.name, tc.since, got, tc.want) + } + } +} + func toNATSMsgs(t *testing.T, base *base.BaseDendrite, input []*gomatrixserverlib.HeaderedEvent) []*nats.Msg { result := make([]*nats.Msg, len(input)) for i, ev := range input { diff --git a/test/testrig/base.go b/test/testrig/base.go index facb49f3e..d13c43129 100644 --- a/test/testrig/base.go +++ b/test/testrig/base.go @@ -32,11 +32,11 @@ func CreateBaseDendrite(t *testing.T, dbType test.DBType) (*base.BaseDendrite, f var cfg config.Dendrite cfg.Defaults(false) cfg.Global.JetStream.InMemory = true - switch dbType { case test.DBTypePostgres: cfg.Global.Defaults(true) // autogen a signing key cfg.MediaAPI.Defaults(true) // autogen a media path + cfg.Global.ServerName = "test" // use a distinct prefix else concurrent postgres/sqlite runs will clash since NATS will use // the file system event with InMemory=true :( cfg.Global.JetStream.TopicPrefix = fmt.Sprintf("Test_%d_", dbType) @@ -50,6 +50,7 @@ func CreateBaseDendrite(t *testing.T, dbType test.DBType) (*base.BaseDendrite, f return base.NewBaseDendrite(&cfg, "Test", base.DisableMetrics), close case test.DBTypeSQLite: cfg.Defaults(true) // sets a sqlite db per component + cfg.Global.ServerName = "test" // use a distinct prefix else concurrent postgres/sqlite runs will clash since NATS will use // the file system event with InMemory=true :( cfg.Global.JetStream.TopicPrefix = fmt.Sprintf("Test_%d_", dbType) From ac2dbb3513b2e6ff24624f30a2a8b1bd51bdabc5 Mon Sep 17 00:00:00 2001 From: Till <2353100+S7evinK@users.noreply.github.com> Date: Wed, 3 Aug 2022 10:55:21 +0200 Subject: [PATCH 05/12] Add Cache-Control header to media endpoints (#2612) * Add Cache-Control header * Raise rate_limiting threshold to 20 --- dendrite-sample.monolith.yaml | 2 +- dendrite-sample.polylith.yaml | 2 +- mediaapi/routing/routing.go | 3 +++ 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/dendrite-sample.monolith.yaml b/dendrite-sample.monolith.yaml index a34b8af55..816c4cae9 100644 --- a/dendrite-sample.monolith.yaml +++ b/dendrite-sample.monolith.yaml @@ -192,7 +192,7 @@ client_api: # and appservice users are exempt from rate limiting by default. rate_limiting: enabled: true - threshold: 5 + threshold: 20 cooloff_ms: 500 exempt_user_ids: # - "@user:domain.com" diff --git a/dendrite-sample.polylith.yaml b/dendrite-sample.polylith.yaml index 550611229..4784dbaff 100644 --- a/dendrite-sample.polylith.yaml +++ b/dendrite-sample.polylith.yaml @@ -195,7 +195,7 @@ client_api: # and appservice users are exempt from rate limiting by default. rate_limiting: enabled: true - threshold: 5 + threshold: 20 cooloff_ms: 500 exempt_user_ids: # - "@user:domain.com" diff --git a/mediaapi/routing/routing.go b/mediaapi/routing/routing.go index 196908184..9dcfa955f 100644 --- a/mediaapi/routing/routing.go +++ b/mediaapi/routing/routing.go @@ -149,6 +149,9 @@ func makeDownloadAPI( } } + // Cache media for at least one day. + w.Header().Set("Cache-Control", "public,max-age=86400,s-maxage=86400") + Download( w, req, From f7f2453a859e9a2b2cccec5b0f47d558bc9ca507 Mon Sep 17 00:00:00 2001 From: Neil Alexander Date: Wed, 3 Aug 2022 10:35:57 +0100 Subject: [PATCH 06/12] Test Go 1.19 in CI --- .github/workflows/dendrite.yml | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/.github/workflows/dendrite.yml b/.github/workflows/dendrite.yml index 4cbfb380f..0d1970efd 100644 --- a/.github/workflows/dendrite.yml +++ b/.github/workflows/dendrite.yml @@ -97,7 +97,7 @@ jobs: strategy: fail-fast: false matrix: - go: ["1.18"] + go: ["1.18", "1.19"] steps: - uses: actions/checkout@v3 - name: Setup go @@ -127,7 +127,7 @@ jobs: strategy: fail-fast: false matrix: - go: ["1.18"] + go: ["1.18", "1.19"] goos: ["linux"] goarch: ["amd64", "386"] steps: @@ -160,7 +160,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - go: ["1.18"] + go: ["1.18", "1.19"] goos: ["windows"] goarch: ["amd64"] steps: @@ -384,7 +384,14 @@ jobs: integration-tests-done: name: Integration tests passed - needs: [initial-tests-done, upgrade_test, upgrade_test_direct, sytest, complement] + needs: + [ + initial-tests-done, + upgrade_test, + upgrade_test_direct, + sytest, + complement, + ] runs-on: ubuntu-latest if: ${{ !cancelled() }} # Run this even if prior jobs were skipped steps: From 376391d1c7e309e4a09998c0717ec8adc70fe1a4 Mon Sep 17 00:00:00 2001 From: Neil Alexander Date: Wed, 3 Aug 2022 10:38:36 +0100 Subject: [PATCH 07/12] Update Pinecone --- go.mod | 11 ++++++----- go.sum | 25 ++++++++++++++----------- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/go.mod b/go.mod index 5b2d8670d..3559c5bb1 100644 --- a/go.mod +++ b/go.mod @@ -22,7 +22,7 @@ require ( github.com/matrix-org/go-sqlite3-js v0.0.0-20220419092513-28aa791a1c91 github.com/matrix-org/gomatrix v0.0.0-20210324163249-be2af5ef2e16 github.com/matrix-org/gomatrixserverlib v0.0.0-20220725104114-b6003e522771 - github.com/matrix-org/pinecone v0.0.0-20220708135211-1ce778fcde6a + github.com/matrix-org/pinecone v0.0.0-20220803093810-b7a830c08fb9 github.com/matrix-org/util v0.0.0-20200807132607-55161520e1d4 github.com/mattn/go-sqlite3 v1.14.13 github.com/nats-io/nats-server/v2 v2.8.5-0.20220731184415-903a06a5b4ee @@ -45,7 +45,7 @@ require ( golang.org/x/crypto v0.0.0-20220525230936-793ad666bf5e golang.org/x/image v0.0.0-20220413100746-70e8d0d3baa9 golang.org/x/mobile v0.0.0-20220518205345-8578da9835fd - golang.org/x/net v0.0.0-20220524220425-1d687d428aca + golang.org/x/net v0.0.0-20220624214902-1bab6f366d9e golang.org/x/term v0.0.0-20220526004731-065cf7ba2467 gopkg.in/h2non/bimg.v1 v1.1.9 gopkg.in/yaml.v2 v2.4.0 @@ -73,10 +73,11 @@ require ( github.com/juju/errors v0.0.0-20220203013757-bd733f3c86b9 // indirect github.com/juju/testing v0.0.0-20220203020004-a0ff61f03494 // indirect github.com/klauspost/compress v1.15.9 // indirect - github.com/lucas-clemente/quic-go v0.26.0 // indirect + github.com/lucas-clemente/quic-go v0.28.1 // indirect github.com/marten-seemann/qtls-go1-16 v0.1.5 // indirect - github.com/marten-seemann/qtls-go1-17 v0.1.1 // indirect - github.com/marten-seemann/qtls-go1-18 v0.1.1 // indirect + github.com/marten-seemann/qtls-go1-17 v0.1.2 // indirect + github.com/marten-seemann/qtls-go1-18 v0.1.2 // indirect + github.com/marten-seemann/qtls-go1-19 v0.1.0-beta.1 // indirect github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 // indirect github.com/miekg/dns v1.1.49 // indirect github.com/minio/highwayhash v1.0.2 // indirect diff --git a/go.sum b/go.sum index 6321adada..2c8bb4f18 100644 --- a/go.sum +++ b/go.sum @@ -321,8 +321,8 @@ github.com/leodido/go-urn v1.2.0 h1:hpXL4XnriNwQ/ABnpepYM/1vCLWNDfUNts8dX3xTG6Y= github.com/leodido/go-urn v1.2.0/go.mod h1:+8+nEpDfqqsY+g338gtMEUOtuK+4dEMhiQEgxpxOKII= github.com/lib/pq v1.10.5 h1:J+gdV2cUmX7ZqL2B0lFcW0m+egaHC2V3lpO8nWxyYiQ= github.com/lib/pq v1.10.5/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= -github.com/lucas-clemente/quic-go v0.26.0 h1:ALBQXr9UJ8A1LyzvceX4jd9QFsHvlI0RR6BkV16o00A= -github.com/lucas-clemente/quic-go v0.26.0/go.mod h1:AzgQoPda7N+3IqMMMkywBKggIFo2KT6pfnlrQ2QieeI= +github.com/lucas-clemente/quic-go v0.28.1 h1:Uo0lvVxWg5la9gflIF9lwa39ONq85Xq2D91YNEIslzU= +github.com/lucas-clemente/quic-go v0.28.1/go.mod h1:oGz5DKK41cJt5+773+BSO9BXDsREY4HLf7+0odGAPO0= github.com/lunixbochs/vtclean v1.0.0/go.mod h1:pHhQNgMf3btfWnGBVipUOjRYhoOsdGqdm/+2c2E2WMI= github.com/lxn/walk v0.0.0-20210112085537-c389da54e794/go.mod h1:E23UucZGqpuUANJooIbHWCufXvOcT6E7Stq81gU+CSQ= github.com/lxn/win v0.0.0-20210218163916-a377121e959e/go.mod h1:KxxjdtRkfNoYDCUP5ryK7XJJNTnpC8atvtmTheChOtk= @@ -330,10 +330,12 @@ github.com/mailru/easyjson v0.0.0-20190312143242-1de009706dbe/go.mod h1:C1wdFJiN github.com/marten-seemann/qpack v0.2.1/go.mod h1:F7Gl5L1jIgN1D11ucXefiuJS9UMVP2opoCp2jDKb7wc= github.com/marten-seemann/qtls-go1-16 v0.1.5 h1:o9JrYPPco/Nukd/HpOHMHZoBDXQqoNtUCmny98/1uqQ= github.com/marten-seemann/qtls-go1-16 v0.1.5/go.mod h1:gNpI2Ol+lRS3WwSOtIUUtRwZEQMXjYK+dQSBFbethAk= -github.com/marten-seemann/qtls-go1-17 v0.1.1 h1:DQjHPq+aOzUeh9/lixAGunn6rIOQyWChPSI4+hgW7jc= -github.com/marten-seemann/qtls-go1-17 v0.1.1/go.mod h1:C2ekUKcDdz9SDWxec1N/MvcXBpaX9l3Nx67XaR84L5s= -github.com/marten-seemann/qtls-go1-18 v0.1.1 h1:qp7p7XXUFL7fpBvSS1sWD+uSqPvzNQK43DH+/qEkj0Y= -github.com/marten-seemann/qtls-go1-18 v0.1.1/go.mod h1:mJttiymBAByA49mhlNZZGrH5u1uXYZJ+RW28Py7f4m4= +github.com/marten-seemann/qtls-go1-17 v0.1.2 h1:JADBlm0LYiVbuSySCHeY863dNkcpMmDR7s0bLKJeYlQ= +github.com/marten-seemann/qtls-go1-17 v0.1.2/go.mod h1:C2ekUKcDdz9SDWxec1N/MvcXBpaX9l3Nx67XaR84L5s= +github.com/marten-seemann/qtls-go1-18 v0.1.2 h1:JH6jmzbduz0ITVQ7ShevK10Av5+jBEKAHMntXmIV7kM= +github.com/marten-seemann/qtls-go1-18 v0.1.2/go.mod h1:mJttiymBAByA49mhlNZZGrH5u1uXYZJ+RW28Py7f4m4= +github.com/marten-seemann/qtls-go1-19 v0.1.0-beta.1 h1:7m/WlWcSROrcK5NxuXaxYD32BZqe/LEEnBrWcH/cOqQ= +github.com/marten-seemann/qtls-go1-19 v0.1.0-beta.1/go.mod h1:5HTDWtVudo/WFsHKRNuOhWlbdjrfs5JHrYb0wIJqGpI= github.com/matrix-org/dugong v0.0.0-20210921133753-66e6b1c67e2e h1:DP5RC0Z3XdyBEW5dKt8YPeN6vZbm6OzVaGVp7f1BQRM= github.com/matrix-org/dugong v0.0.0-20210921133753-66e6b1c67e2e/go.mod h1:NgPCr+UavRGH6n5jmdX8DuqFZ4JiCWIJoZiuhTRLSUg= github.com/matrix-org/go-sqlite3-js v0.0.0-20220419092513-28aa791a1c91 h1:s7fexw2QV3YD/fRrzEDPNGgTlJlvXY0EHHnT87wF3OA= @@ -343,8 +345,8 @@ github.com/matrix-org/gomatrix v0.0.0-20210324163249-be2af5ef2e16 h1:ZtO5uywdd5d github.com/matrix-org/gomatrix v0.0.0-20210324163249-be2af5ef2e16/go.mod h1:/gBX06Kw0exX1HrwmoBibFA98yBk/jxKpGVeyQbff+s= github.com/matrix-org/gomatrixserverlib v0.0.0-20220725104114-b6003e522771 h1:ZIPHFIPNDS9dmEbPEiJbNmyCGJtn9exfpLC7JOcn/bE= github.com/matrix-org/gomatrixserverlib v0.0.0-20220725104114-b6003e522771/go.mod h1:jX38yp3SSLJNftBg3PXU1ayd0PCLIiDHQ4xAc9DIixk= -github.com/matrix-org/pinecone v0.0.0-20220708135211-1ce778fcde6a h1:DdG8vXMlZ65EAtc4V+3t7zHZ2Gqs24pSnyXS+4BRHUs= -github.com/matrix-org/pinecone v0.0.0-20220708135211-1ce778fcde6a/go.mod h1:ulJzsVOTssIVp1j/m5eI//4VpAGDkMt5NrRuAVX7wpc= +github.com/matrix-org/pinecone v0.0.0-20220803093810-b7a830c08fb9 h1:ed8yvWhTLk7+sNeK/eOZRTvESFTOHDRevoRoyeqPtvY= +github.com/matrix-org/pinecone v0.0.0-20220803093810-b7a830c08fb9/go.mod h1:P4MqPf+u83OPulPJ+XTbSDbbWrdFYNY4LZ/B1PIduFE= github.com/matrix-org/util v0.0.0-20190711121626-527ce5ddefc7/go.mod h1:vVQlW/emklohkZnOPwD3LrZUBqdfsbiyO3p1lNV8F6U= github.com/matrix-org/util v0.0.0-20200807132607-55161520e1d4 h1:eCEHXWDv9Rm335MSuB49mFUK44bwZPFSDde3ORE3syk= github.com/matrix-org/util v0.0.0-20200807132607-55161520e1d4/go.mod h1:vVQlW/emklohkZnOPwD3LrZUBqdfsbiyO3p1lNV8F6U= @@ -549,7 +551,6 @@ go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.22.3/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= -go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/atomic v1.9.0 h1:ECmE8Bn/WFTYwEW/bpKD3M8VtR/zQVbavAoalC1PYyE= go.uber.org/atomic v1.9.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go4.org v0.0.0-20180809161055-417644f6feb5/go.mod h1:MkTOUMDaeVYJUOUsaDXIhWPZYa1yOyC1qaOBpL57BhE= @@ -659,8 +660,8 @@ golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d/go.mod h1:9nx3DQGgdP8bBQD5qx golang.org/x/net v0.0.0-20210927181540-4e4d966f7476/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20211011170408-caeb26a5c8c0/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20211101193420-4a448f8816b3/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= -golang.org/x/net v0.0.0-20220524220425-1d687d428aca h1:xTaFYiPROfpPhqrfTIDXj0ri1SpfueYT951s4bAuDO8= -golang.org/x/net v0.0.0-20220524220425-1d687d428aca/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= +golang.org/x/net v0.0.0-20220624214902-1bab6f366d9e h1:TsQ7F31D3bUCLeqPT0u+yjp1guoArKaNKmCr22PYgTQ= +golang.org/x/net v0.0.0-20220624214902-1bab6f366d9e/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20181017192945-9dcd33a902f4/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20181203162652-d668ce993890/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= @@ -750,6 +751,7 @@ golang.org/x/sys v0.0.0-20220405052023-b1e9470b6e64/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a h1:dGzPydgVsqGcTRVwiLJ1jVbufYwmzD3LfVPLKsKg+0k= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.0.0-20220526004731-065cf7ba2467 h1:CBpWXWQpIRjzmkkA+M7q9Fqnwd2mZr3AFqexg8YTfoM= golang.org/x/term v0.0.0-20220526004731-065cf7ba2467/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -758,6 +760,7 @@ golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3 golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.3.8-0.20211004125949-5bd84dd9b33b h1:NXqSWXSRUSCaFuvitrWtU169I3876zRTalMRbfd6LL0= golang.org/x/text v0.3.8-0.20211004125949-5bd84dd9b33b/go.mod h1:EFNZuWvGYxIRUEX+K8UmCFwYmZjqcrnq15ZuVldZkZ0= golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= From bbff41b44bff2dbc53867cc0fd94ce8f31fd511a Mon Sep 17 00:00:00 2001 From: Neil Alexander Date: Wed, 3 Aug 2022 10:50:45 +0100 Subject: [PATCH 08/12] Disable stack protector on Linux CI build pipelines for now (to avoid `relocation target __stack_chk_fail_local not defined` errors) --- .github/workflows/dendrite.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/dendrite.yml b/.github/workflows/dendrite.yml index 0d1970efd..19ebd760c 100644 --- a/.github/workflows/dendrite.yml +++ b/.github/workflows/dendrite.yml @@ -151,6 +151,7 @@ jobs: GOOS: ${{ matrix.goos }} GOARCH: ${{ matrix.goarch }} CGO_ENABLED: 1 + CGO_CFLAGS: -fno-stack-protector run: go build -trimpath -v -o "bin/" ./cmd/... # build for Windows 64-bit From 2250768be16bd0e6b3a6a72b5e55eb3e2ad6e3c6 Mon Sep 17 00:00:00 2001 From: Neil Alexander Date: Wed, 3 Aug 2022 17:14:21 +0100 Subject: [PATCH 09/12] Remove roominfo cache (#2615) * Remove roominfo cache It's the source of a number of race conditions which are seemingly causing bugs and CI failures. * Make the linter less sad --- internal/caching/cache_roominfo.go | 33 ---------------------- internal/caching/cache_roomservernids.go | 1 - internal/caching/caches.go | 1 - internal/caching/impl_ristretto.go | 7 ----- roomserver/storage/shared/storage.go | 35 ++++++------------------ 5 files changed, 9 insertions(+), 68 deletions(-) delete mode 100644 internal/caching/cache_roominfo.go diff --git a/internal/caching/cache_roominfo.go b/internal/caching/cache_roominfo.go deleted file mode 100644 index 5dfed3c85..000000000 --- a/internal/caching/cache_roominfo.go +++ /dev/null @@ -1,33 +0,0 @@ -package caching - -import ( - "github.com/matrix-org/dendrite/roomserver/types" -) - -// WARNING: This cache is mutable because it's entirely possible that -// the IsStub or StateSnaphotNID fields can change, even though the -// room version and room NID fields will not. This is only safe because -// the RoomInfoCache is used ONLY within the roomserver and because it -// will be kept up-to-date by the latest events updater. It MUST NOT be -// used from other components as we currently have no way to invalidate -// the cache in downstream components. - -// RoomInfosCache contains the subset of functions needed for -// a room Info cache. It must only be used from the roomserver only -// It is not safe for use from other components. -type RoomInfoCache interface { - GetRoomInfo(roomID string) (roomInfo *types.RoomInfo, ok bool) - StoreRoomInfo(roomID string, roomInfo *types.RoomInfo) -} - -// GetRoomInfo must only be called from the roomserver only. It is not -// safe for use from other components. -func (c Caches) GetRoomInfo(roomID string) (*types.RoomInfo, bool) { - return c.RoomInfos.Get(roomID) -} - -// StoreRoomInfo must only be called from the roomserver only. It is not -// safe for use from other components. -func (c Caches) StoreRoomInfo(roomID string, roomInfo *types.RoomInfo) { - c.RoomInfos.Set(roomID, roomInfo) -} diff --git a/internal/caching/cache_roomservernids.go b/internal/caching/cache_roomservernids.go index f27154f19..88a5b28bc 100644 --- a/internal/caching/cache_roomservernids.go +++ b/internal/caching/cache_roomservernids.go @@ -7,7 +7,6 @@ import ( type RoomServerCaches interface { RoomServerNIDsCache RoomVersionCache - RoomInfoCache RoomServerEventsCache EventStateKeyCache } diff --git a/internal/caching/caches.go b/internal/caching/caches.go index f13f743d3..78c9ab7ee 100644 --- a/internal/caching/caches.go +++ b/internal/caching/caches.go @@ -29,7 +29,6 @@ type Caches struct { RoomServerRoomIDs Cache[types.RoomNID, string] // room NID -> room ID RoomServerEvents Cache[int64, *gomatrixserverlib.Event] // event NID -> event RoomServerStateKeys Cache[types.EventStateKeyNID, string] // event NID -> event state key - RoomInfos Cache[string, *types.RoomInfo] // room ID -> room info FederationPDUs Cache[int64, *gomatrixserverlib.HeaderedEvent] // queue NID -> PDU FederationEDUs Cache[int64, *gomatrixserverlib.EDU] // queue NID -> EDU SpaceSummaryRooms Cache[string, gomatrixserverlib.MSC2946SpacesResponse] // room ID -> space response diff --git a/internal/caching/impl_ristretto.go b/internal/caching/impl_ristretto.go index fdbbb4d72..fc0c8cc0f 100644 --- a/internal/caching/impl_ristretto.go +++ b/internal/caching/impl_ristretto.go @@ -35,7 +35,6 @@ const ( roomNIDsCache roomIDsCache roomEventsCache - roomInfosCache federationPDUsCache federationEDUsCache spaceSummaryRoomsCache @@ -106,12 +105,6 @@ func NewRistrettoCache(maxCost config.DataUnit, maxAge time.Duration, enableProm Prefix: eventStateKeyCache, MaxAge: maxAge, }, - RoomInfos: &RistrettoCachePartition[string, *types.RoomInfo]{ // room ID -> room info - cache: cache, - Prefix: roomInfosCache, - Mutable: true, - MaxAge: maxAge, - }, FederationPDUs: &RistrettoCostedCachePartition[int64, *gomatrixserverlib.HeaderedEvent]{ // queue NID -> PDU &RistrettoCachePartition[int64, *gomatrixserverlib.HeaderedEvent]{ cache: cache, diff --git a/roomserver/storage/shared/storage.go b/roomserver/storage/shared/storage.go index 9e6a4142c..cbf9c8b20 100644 --- a/roomserver/storage/shared/storage.go +++ b/roomserver/storage/shared/storage.go @@ -156,30 +156,15 @@ func (d *Database) RoomInfo(ctx context.Context, roomID string) (*types.RoomInfo } func (d *Database) roomInfo(ctx context.Context, txn *sql.Tx, roomID string) (*types.RoomInfo, error) { - roomInfo, ok := d.Cache.GetRoomInfo(roomID) - if ok && roomInfo != nil && !roomInfo.IsStub() { - // The data that's in the cache is not stubby, so return it. - return roomInfo, nil - } - // At this point we either don't have an entry in the cache, or - // it is stubby, so let's check the roomserver_rooms table again. - roomInfoFromDB, err := d.RoomsTable.SelectRoomInfo(ctx, txn, roomID) + roomInfo, err := d.RoomsTable.SelectRoomInfo(ctx, txn, roomID) if err != nil { return nil, err } - // If we have a stubby cache entry already, update it and return - // the reference to the cache entry. if roomInfo != nil { - roomInfo.CopyFrom(roomInfoFromDB) - return roomInfo, nil + d.Cache.StoreRoomServerRoomID(roomInfo.RoomNID, roomID) + d.Cache.StoreRoomVersion(roomID, roomInfo.RoomVersion) } - // Otherwise, try to admit the data into the cache and return the - // new reference from the database. - if roomInfoFromDB != nil { - d.Cache.StoreRoomServerRoomID(roomInfoFromDB.RoomNID, roomID) - d.Cache.StoreRoomInfo(roomID, roomInfoFromDB) - } - return roomInfoFromDB, err + return roomInfo, err } func (d *Database) AddState( @@ -504,8 +489,8 @@ func (d *Database) events( fetchNIDList := make([]types.RoomNID, 0, len(uniqueRoomNIDs)) for n := range uniqueRoomNIDs { if roomID, ok := d.Cache.GetRoomServerRoomID(n); ok { - if roomInfo, ok := d.Cache.GetRoomInfo(roomID); ok { - roomVersions[n] = roomInfo.RoomVersion + if roomVersion, ok := d.Cache.GetRoomVersion(roomID); ok { + roomVersions[n] = roomVersion continue } } @@ -762,9 +747,6 @@ func (d *Database) MissingAuthPrevEvents( func (d *Database) assignRoomNID( ctx context.Context, roomID string, roomVersion gomatrixserverlib.RoomVersion, ) (types.RoomNID, error) { - if roomInfo, ok := d.Cache.GetRoomInfo(roomID); ok { - return roomInfo.RoomNID, nil - } // Check if we already have a numeric ID in the database. roomNID, err := d.RoomsTable.SelectRoomNID(ctx, nil, roomID) if err == sql.ErrNoRows { @@ -837,8 +819,9 @@ func extractRoomVersionFromCreateEvent(event *gomatrixserverlib.Event) ( // "servers should not apply or send redactions to clients until both the redaction event and original event have been seen, and are valid." // https://matrix.org/docs/spec/rooms/v3#authorization-rules-for-events // These cases are: -// - This is a redaction event, redact the event it references if we know about it. -// - This is a normal event which may have been previously redacted. +// - This is a redaction event, redact the event it references if we know about it. +// - This is a normal event which may have been previously redacted. +// // In the first case, check if we have the referenced event then apply the redaction, else store it // in the redactions table with validated=FALSE. In the second case, check if there is a redaction for it: // if there is then apply the redactions and set validated=TRUE. From 9fe509b18da997e294813fcc5f46a45b7f6e6784 Mon Sep 17 00:00:00 2001 From: Till <2353100+S7evinK@users.noreply.github.com> Date: Wed, 3 Aug 2022 18:35:17 +0200 Subject: [PATCH 10/12] Fix syncapi shared users query & device lists (#2614) * Fix query issue, only add "changed" users if we actually share a room * Avoid log spam if context is done * Undo changes to filterSharedUsers * Add logging again.. * Fix SQLite shared users query * Change query to include invited users --- keyserver/internal/internal.go | 11 +++++ syncapi/internal/keychange.go | 44 +++++++++--------- syncapi/internal/keychange_test.go | 1 + .../postgres/current_room_state_table.go | 4 +- .../sqlite3/current_room_state_table.go | 45 ++++++++++--------- 5 files changed, 62 insertions(+), 43 deletions(-) diff --git a/keyserver/internal/internal.go b/keyserver/internal/internal.go index c146b2aa0..91f011517 100644 --- a/keyserver/internal/internal.go +++ b/keyserver/internal/internal.go @@ -18,6 +18,7 @@ import ( "bytes" "context" "encoding/json" + "errors" "fmt" "sync" "time" @@ -314,6 +315,11 @@ func (a *KeyInternalAPI) QueryKeys(ctx context.Context, req *api.QueryKeysReques for targetKeyID := range masterKey.Keys { sigMap, err := a.DB.CrossSigningSigsForTarget(ctx, req.UserID, targetUserID, targetKeyID) if err != nil { + // Stop executing the function if the context was canceled/the deadline was exceeded, + // as we can't continue without a valid context. + if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { + return + } logrus.WithError(err).Errorf("a.DB.CrossSigningSigsForTarget failed") continue } @@ -335,6 +341,11 @@ func (a *KeyInternalAPI) QueryKeys(ctx context.Context, req *api.QueryKeysReques for targetKeyID, key := range forUserID { sigMap, err := a.DB.CrossSigningSigsForTarget(ctx, req.UserID, targetUserID, gomatrixserverlib.KeyID(targetKeyID)) if err != nil { + // Stop executing the function if the context was canceled/the deadline was exceeded, + // as we can't continue without a valid context. + if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { + return + } logrus.WithError(err).Errorf("a.DB.CrossSigningSigsForTarget failed") continue } diff --git a/syncapi/internal/keychange.go b/syncapi/internal/keychange.go index 03df9285c..4bf54cae0 100644 --- a/syncapi/internal/keychange.go +++ b/syncapi/internal/keychange.go @@ -25,10 +25,9 @@ import ( "github.com/matrix-org/dendrite/syncapi/types" "github.com/matrix-org/gomatrixserverlib" "github.com/matrix-org/util" + "github.com/sirupsen/logrus" ) -const DeviceListLogName = "dl" - // DeviceOTKCounts adds one-time key counts to the /sync response func DeviceOTKCounts(ctx context.Context, keyAPI keyapi.SyncKeyAPI, userID, deviceID string, res *types.Response) error { var queryRes keyapi.QueryOneTimeKeysResponse @@ -93,18 +92,13 @@ func DeviceListCatchup( queryRes.UserIDs = append(queryRes.UserIDs, joinUserIDs...) queryRes.UserIDs = append(queryRes.UserIDs, leaveUserIDs...) queryRes.UserIDs = util.UniqueStrings(queryRes.UserIDs) - var sharedUsersMap map[string]int - sharedUsersMap, queryRes.UserIDs = filterSharedUsers(ctx, db, userID, queryRes.UserIDs) - util.GetLogger(ctx).Debugf( - "QueryKeyChanges request off=%d,to=%d response off=%d uids=%v", - offset, toOffset, queryRes.Offset, queryRes.UserIDs, - ) + sharedUsersMap := filterSharedUsers(ctx, db, userID, queryRes.UserIDs) userSet := make(map[string]bool) for _, userID := range res.DeviceLists.Changed { userSet[userID] = true } - for _, userID := range queryRes.UserIDs { - if !userSet[userID] { + for userID, count := range sharedUsersMap { + if !userSet[userID] && count > 0 { res.DeviceLists.Changed = append(res.DeviceLists.Changed, userID) hasNew = true userSet[userID] = true @@ -113,7 +107,7 @@ func DeviceListCatchup( // Finally, add in users who have joined or left. // TODO: This is sub-optimal because we will add users to `changed` even if we already shared a room with them. for _, userID := range joinUserIDs { - if !userSet[userID] { + if !userSet[userID] && sharedUsersMap[userID] > 0 { res.DeviceLists.Changed = append(res.DeviceLists.Changed, userID) hasNew = true userSet[userID] = true @@ -126,6 +120,13 @@ func DeviceListCatchup( } } + util.GetLogger(ctx).WithFields(logrus.Fields{ + "user_id": userID, + "from": offset, + "to": toOffset, + "response_offset": queryRes.Offset, + }).Debugf("QueryKeyChanges request result: %+v", res.DeviceLists) + return types.StreamPosition(queryRes.Offset), hasNew, nil } @@ -220,24 +221,27 @@ func TrackChangedUsers( // it down to include only users who the requesting user shares a room with. func filterSharedUsers( ctx context.Context, db storage.SharedUsers, userID string, usersWithChangedKeys []string, -) (map[string]int, []string) { +) map[string]int { sharedUsersMap := make(map[string]int, len(usersWithChangedKeys)) - for _, userID := range usersWithChangedKeys { - sharedUsersMap[userID] = 0 + for _, changedUserID := range usersWithChangedKeys { + sharedUsersMap[changedUserID] = 0 + if changedUserID == userID { + // We forcibly put ourselves in this list because we should be notified about our own device updates + // and if we are in 0 rooms then we don't technically share any room with ourselves so we wouldn't + // be notified about key changes. + sharedUsersMap[userID] = 1 + } } sharedUsers, err := db.SharedUsers(ctx, userID, usersWithChangedKeys) if err != nil { + util.GetLogger(ctx).WithError(err).Errorf("db.SharedUsers failed: %s", err) // default to all users so we do needless queries rather than miss some important device update - return nil, usersWithChangedKeys + return sharedUsersMap } for _, userID := range sharedUsers { sharedUsersMap[userID]++ } - // We forcibly put ourselves in this list because we should be notified about our own device updates - // and if we are in 0 rooms then we don't technically share any room with ourselves so we wouldn't - // be notified about key changes. - sharedUsersMap[userID] = 1 - return sharedUsersMap, sharedUsers + return sharedUsersMap } func joinedRooms(res *types.Response, userID string) []string { diff --git a/syncapi/internal/keychange_test.go b/syncapi/internal/keychange_test.go index 79ed440e7..6bfc91edd 100644 --- a/syncapi/internal/keychange_test.go +++ b/syncapi/internal/keychange_test.go @@ -129,6 +129,7 @@ type wantCatchup struct { } func assertCatchup(t *testing.T, hasNew bool, syncResponse *types.Response, want wantCatchup) { + t.Helper() if hasNew != want.hasNew { t.Errorf("got hasNew=%v want %v", hasNew, want.hasNew) } diff --git a/syncapi/storage/postgres/current_room_state_table.go b/syncapi/storage/postgres/current_room_state_table.go index d13b7be41..58f404511 100644 --- a/syncapi/storage/postgres/current_room_state_table.go +++ b/syncapi/storage/postgres/current_room_state_table.go @@ -112,7 +112,7 @@ const selectEventsWithEventIDsSQL = "" + const selectSharedUsersSQL = "" + "SELECT state_key FROM syncapi_current_room_state WHERE room_id = ANY(" + " SELECT room_id FROM syncapi_current_room_state WHERE state_key = $1 AND membership='join'" + - ") AND state_key = ANY($2) AND membership='join';" + ") AND state_key = ANY($2) AND membership IN ('join', 'invite');" type currentRoomStateStatements struct { upsertRoomStateStmt *sql.Stmt @@ -407,7 +407,7 @@ func (s *currentRoomStateStatements) SelectSharedUsers( ctx context.Context, txn *sql.Tx, userID string, otherUserIDs []string, ) ([]string, error) { stmt := sqlutil.TxStmt(txn, s.selectSharedUsersStmt) - rows, err := stmt.QueryContext(ctx, userID, otherUserIDs) + rows, err := stmt.QueryContext(ctx, userID, pq.Array(otherUserIDs)) if err != nil { return nil, err } diff --git a/syncapi/storage/sqlite3/current_room_state_table.go b/syncapi/storage/sqlite3/current_room_state_table.go index e19298aee..3a10b2325 100644 --- a/syncapi/storage/sqlite3/current_room_state_table.go +++ b/syncapi/storage/sqlite3/current_room_state_table.go @@ -94,9 +94,9 @@ const selectEventsWithEventIDsSQL = "" + " FROM syncapi_current_room_state WHERE event_id IN ($1)" const selectSharedUsersSQL = "" + - "SELECT state_key FROM syncapi_current_room_state WHERE room_id = ANY(" + + "SELECT state_key FROM syncapi_current_room_state WHERE room_id IN(" + " SELECT room_id FROM syncapi_current_room_state WHERE state_key = $1 AND membership='join'" + - ") AND state_key IN ($2) AND membership='join';" + ") AND state_key IN ($2) AND membership IN ('join', 'invite');" type currentRoomStateStatements struct { db *sql.DB @@ -420,25 +420,28 @@ func (s *currentRoomStateStatements) SelectStateEvent( func (s *currentRoomStateStatements) SelectSharedUsers( ctx context.Context, txn *sql.Tx, userID string, otherUserIDs []string, ) ([]string, error) { - query := strings.Replace(selectSharedUsersSQL, "($2)", sqlutil.QueryVariadicOffset(len(otherUserIDs), 1), 1) - stmt, err := s.db.Prepare(query) - if err != nil { - return nil, fmt.Errorf("SelectSharedUsers s.db.Prepare: %w", err) - } - defer internal.CloseAndLogIfError(ctx, stmt, "SelectSharedUsers: stmt.close() failed") - rows, err := sqlutil.TxStmt(txn, stmt).QueryContext(ctx, userID, otherUserIDs) - if err != nil { - return nil, err - } - defer internal.CloseAndLogIfError(ctx, rows, "selectSharedUsersStmt: rows.close() failed") - var stateKey string - result := make([]string, 0, len(otherUserIDs)) - for rows.Next() { - if err := rows.Scan(&stateKey); err != nil { - return nil, err - } - result = append(result, stateKey) + params := make([]interface{}, len(otherUserIDs)+1) + params[0] = userID + for k, v := range otherUserIDs { + params[k+1] = v } - return result, rows.Err() + + result := make([]string, 0, len(otherUserIDs)) + query := strings.Replace(selectSharedUsersSQL, "($2)", sqlutil.QueryVariadicOffset(len(otherUserIDs), 1), 1) + err := sqlutil.RunLimitedVariablesQuery( + ctx, query, s.db, params, sqlutil.SQLite3MaxVariables, + func(rows *sql.Rows) error { + var stateKey string + for rows.Next() { + if err := rows.Scan(&stateKey); err != nil { + return err + } + result = append(result, stateKey) + } + return nil + }, + ) + + return result, err } From 3bf5ae5ffef0ebc140f55320658d9b07bc58e848 Mon Sep 17 00:00:00 2001 From: Neil Alexander Date: Wed, 3 Aug 2022 17:37:27 +0100 Subject: [PATCH 11/12] Try more servers when calling `/state_ids` (#2610) * Try more servers when calling `/state_ids` * More logging * Maybe fix concurrent map write * Revert "Maybe fix concurrent map write" This reverts commit da0dbb836207a911afe77e6f6d63c4809669693c. * Enforce a limit of 20s per server, 5 mins total --- roomserver/internal/input/input_missing.go | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/roomserver/internal/input/input_missing.go b/roomserver/internal/input/input_missing.go index c78e5d79a..0dd2b64c0 100644 --- a/roomserver/internal/input/input_missing.go +++ b/roomserver/internal/input/input_missing.go @@ -326,8 +326,10 @@ func (t *missingStateReq) lookupStateAfterEvent(ctx context.Context, roomVersion return respState, true, nil } + logrus.WithContext(ctx).Warnf("State for event %s not available locally, falling back to federation (via %d servers)", eventID, len(t.servers)) respState, err := t.lookupStateBeforeEvent(ctx, roomVersion, roomID, eventID) if err != nil { + logrus.WithContext(ctx).WithError(err).Errorf("Failed to look up state before event %s", eventID) return nil, false, fmt.Errorf("t.lookupStateBeforeEvent: %w", err) } @@ -339,6 +341,7 @@ func (t *missingStateReq) lookupStateAfterEvent(ctx context.Context, roomVersion case nil: // do nothing default: + logrus.WithContext(ctx).WithError(err).Errorf("Failed to look up event %s", eventID) return nil, false, fmt.Errorf("t.lookupEvent: %w", err) } h = t.cacheAndReturn(h) @@ -662,9 +665,22 @@ func (t *missingStateReq) lookupMissingStateViaStateIDs(ctx context.Context, roo util.GetLogger(ctx).WithField("room_id", roomID).Infof("lookupMissingStateViaStateIDs %s", eventID) // fetch the state event IDs at the time of the event - stateIDs, err := t.federation.LookupStateIDs(ctx, t.origin, roomID, eventID) + var stateIDs gomatrixserverlib.RespStateIDs + var err error + count := 0 + totalctx, totalcancel := context.WithTimeout(ctx, time.Minute*5) + for _, serverName := range t.servers { + reqctx, reqcancel := context.WithTimeout(totalctx, time.Second*20) + stateIDs, err = t.federation.LookupStateIDs(reqctx, serverName, roomID, eventID) + reqcancel() + if err == nil { + break + } + count++ + } + totalcancel() if err != nil { - return nil, err + return nil, fmt.Errorf("t.federation.LookupStateIDs tried %d server(s), last error: %w", count, err) } // work out which auth/state IDs are missing wantIDs := append(stateIDs.StateEventIDs, stateIDs.AuthEventIDs...) From a2bed259dd765bd0b9781cd65594343d22c07986 Mon Sep 17 00:00:00 2001 From: Neil Alexander Date: Wed, 3 Aug 2022 17:42:13 +0100 Subject: [PATCH 12/12] Version 0.9.1 (#2616) * Version 0.9.1 * Update CHANGES.md --- CHANGES.md | 14 ++++++++++++++ internal/version.go | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index 0ae927446..5dd8da362 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,19 @@ # Changelog +## Dendrite 0.9.1 (2022-08-03) + +### Fixes + +* Upgrades a dependency which caused issues building Dendrite with Go 1.19 +* The roomserver will no longer give up prematurely after failing to call `/state_ids` +* Removes the faulty room info cache, which caused of a number of race conditions and occasional bugs (including when creating and joining rooms) +* The media endpoint now sets the `Cache-Control` header correctly to prevent web-based clients from hitting media endpoints excessively +* The sync API will now advance the PDU stream position correctly in all cases (contributed by [sergekh2](https://github.com/sergekh2)) +* The sync API will now delete the correct range of send-to-device messages when advancing the stream position +* The device list `changed` key in the `/sync` response should now return the correct users +* A data race when looking up missing state has been fixed +* The `/send_join` API is now applying stronger validation to the received membership event + ## Dendrite 0.9.0 (2022-08-01) ### Features diff --git a/internal/version.go b/internal/version.go index 6d29a68ee..38d0864e7 100644 --- a/internal/version.go +++ b/internal/version.go @@ -17,7 +17,7 @@ var build string const ( VersionMajor = 0 VersionMinor = 9 - VersionPatch = 0 + VersionPatch = 1 VersionTag = "" // example: "rc1" )