diff --git a/docs/administration/5_troubleshooting.md b/docs/administration/5_troubleshooting.md new file mode 100644 index 000000000..14df2e3fb --- /dev/null +++ b/docs/administration/5_troubleshooting.md @@ -0,0 +1,81 @@ +--- +title: Troubleshooting +parent: Administration +permalink: /administration/troubleshooting +--- + +# Troubleshooting + +If your Dendrite installation is acting strangely, there are a few things you should +check before seeking help. + +## 1. Logs + +Dendrite, by default, will log all warnings and errors to stdout, in addition to any +other locations configured in the `dendrite.yaml` configuration file. Often there will +be clues in the logs. + +You can increase this log level to the more verbose `debug` level if necessary by adding +this to the config and restarting Dendrite: + +``` +logging: +- type: std + level: debug +``` + +Look specifically for lines that contain `level=error` or `level=warning`. + +## 2. Federation tester + +If you are experiencing problems federating with other homeservers, you should check +that the [Federation Tester](https://federationtester.matrix.org) is passing for your +server. + +Common reasons that it may not pass include: + +1. Incorrect DNS configuration; +2. Misconfigured DNS SRV entries or well-known files; +3. Invalid TLS/SSL certificates; +4. Reverse proxy configuration issues (if applicable). + +Correct any errors if shown and re-run the federation tester to check the results. + +## 3. System time + +Matrix relies heavily on TLS which requires the system time to be correct. If the clock +drifts then you may find that federation no works reliably (or at all) and clients may +struggle to connect to your Dendrite server. + +Ensure that your system time is correct and consider syncing to a reliable NTP source. + +## 4. Database connections + +If you are using the PostgreSQL database, you should ensure that Dendrite's configured +number of database connections does not exceed the maximum allowed by PostgreSQL. + +Open your `postgresql.conf` configuration file and check the value of `max_connections` +(which is typically `100` by default). Then open your `dendrite.yaml` configuration file +and ensure that: + +1. If you are using the `global.database` section, that `max_open_conns` does not exceed + that number; +2. If you are **not** using the `global.database` section, that the sum total of all + `max_open_conns` across all `database` blocks does not exceed that number. + +## 5. File descriptors + +Dendrite requires a sufficient number of file descriptors for every connection it makes +to a remote server, every connection to the database engine and every file it is reading +or writing to at a given time (media, logs etc). We recommend ensuring that the limit is +no lower than 65535 for Dendrite. + +Dendrite will check at startup if there are a sufficient number of available descriptors. +If there aren't, you will see a log lines like this: + +``` +level=warning msg="IMPORTANT: Process file descriptor limit is currently 65535, it is recommended to raise the limit for Dendrite to at least 65535 to avoid issues" +``` + +Follow the [Optimisation](../installation/10_optimisation.md) instructions to correct the +available number of file descriptors. diff --git a/go.mod b/go.mod index 2a2a037c1..1e62e1d93 100644 --- a/go.mod +++ b/go.mod @@ -25,7 +25,7 @@ require ( github.com/matrix-org/dugong v0.0.0-20210921133753-66e6b1c67e2e github.com/matrix-org/go-sqlite3-js v0.0.0-20220419092513-28aa791a1c91 github.com/matrix-org/gomatrix v0.0.0-20210324163249-be2af5ef2e16 - github.com/matrix-org/gomatrixserverlib v0.0.0-20220711125303-3bb2e997a44c + github.com/matrix-org/gomatrixserverlib v0.0.0-20220713083127-fc2ea1e62e46 github.com/matrix-org/pinecone v0.0.0-20220708135211-1ce778fcde6a github.com/matrix-org/util v0.0.0-20200807132607-55161520e1d4 github.com/mattn/go-sqlite3 v1.14.13 diff --git a/go.sum b/go.sum index 98549f702..86656ccd5 100644 --- a/go.sum +++ b/go.sum @@ -341,8 +341,8 @@ github.com/matrix-org/go-sqlite3-js v0.0.0-20220419092513-28aa791a1c91/go.mod h1 github.com/matrix-org/gomatrix v0.0.0-20190528120928-7df988a63f26/go.mod h1:3fxX6gUjWyI/2Bt7J1OLhpCzOfO/bB3AiX0cJtEKud0= github.com/matrix-org/gomatrix v0.0.0-20210324163249-be2af5ef2e16 h1:ZtO5uywdd5dLDCud4r0r55eP4j9FuUNpl60Gmntcop4= github.com/matrix-org/gomatrix v0.0.0-20210324163249-be2af5ef2e16/go.mod h1:/gBX06Kw0exX1HrwmoBibFA98yBk/jxKpGVeyQbff+s= -github.com/matrix-org/gomatrixserverlib v0.0.0-20220711125303-3bb2e997a44c h1:mt30TDK8kXKV+nCmVfnqoXsh842N+74kvZw7DXuS/JQ= -github.com/matrix-org/gomatrixserverlib v0.0.0-20220711125303-3bb2e997a44c/go.mod h1:jX38yp3SSLJNftBg3PXU1ayd0PCLIiDHQ4xAc9DIixk= +github.com/matrix-org/gomatrixserverlib v0.0.0-20220713083127-fc2ea1e62e46 h1:5X/kXY3nwqKOwwrE9tnMKrjbsi3PHigQYvrvDBSntO8= +github.com/matrix-org/gomatrixserverlib v0.0.0-20220713083127-fc2ea1e62e46/go.mod h1:jX38yp3SSLJNftBg3PXU1ayd0PCLIiDHQ4xAc9DIixk= github.com/matrix-org/pinecone v0.0.0-20220708135211-1ce778fcde6a h1:DdG8vXMlZ65EAtc4V+3t7zHZ2Gqs24pSnyXS+4BRHUs= github.com/matrix-org/pinecone v0.0.0-20220708135211-1ce778fcde6a/go.mod h1:ulJzsVOTssIVp1j/m5eI//4VpAGDkMt5NrRuAVX7wpc= github.com/matrix-org/util v0.0.0-20190711121626-527ce5ddefc7/go.mod h1:vVQlW/emklohkZnOPwD3LrZUBqdfsbiyO3p1lNV8F6U= diff --git a/internal/caching/cache_roominfo.go b/internal/caching/cache_roominfo.go index d03a61077..5dfed3c85 100644 --- a/internal/caching/cache_roominfo.go +++ b/internal/caching/cache_roominfo.go @@ -16,18 +16,18 @@ import ( // a room Info cache. It must only be used from the roomserver only // It is not safe for use from other components. type RoomInfoCache interface { - GetRoomInfo(roomID string) (roomInfo types.RoomInfo, ok bool) - StoreRoomInfo(roomID string, roomInfo types.RoomInfo) + GetRoomInfo(roomID string) (roomInfo *types.RoomInfo, ok bool) + StoreRoomInfo(roomID string, roomInfo *types.RoomInfo) } // GetRoomInfo must only be called from the roomserver only. It is not // safe for use from other components. -func (c Caches) GetRoomInfo(roomID string) (types.RoomInfo, bool) { +func (c Caches) GetRoomInfo(roomID string) (*types.RoomInfo, bool) { return c.RoomInfos.Get(roomID) } // StoreRoomInfo must only be called from the roomserver only. It is not // safe for use from other components. -func (c Caches) StoreRoomInfo(roomID string, roomInfo types.RoomInfo) { +func (c Caches) StoreRoomInfo(roomID string, roomInfo *types.RoomInfo) { c.RoomInfos.Set(roomID, roomInfo) } diff --git a/internal/caching/caches.go b/internal/caching/caches.go index 14b232dd0..e7914ce7d 100644 --- a/internal/caching/caches.go +++ b/internal/caching/caches.go @@ -28,7 +28,7 @@ type Caches struct { RoomServerRoomNIDs Cache[string, types.RoomNID] // room ID -> room NID RoomServerRoomIDs Cache[int64, string] // room NID -> room ID RoomServerEvents Cache[int64, *gomatrixserverlib.Event] // event NID -> event - RoomInfos Cache[string, types.RoomInfo] // room ID -> room info + RoomInfos Cache[string, *types.RoomInfo] // room ID -> room info FederationPDUs Cache[int64, *gomatrixserverlib.HeaderedEvent] // queue NID -> PDU FederationEDUs Cache[int64, *gomatrixserverlib.EDU] // queue NID -> EDU SpaceSummaryRooms Cache[string, gomatrixserverlib.MSC2946SpacesResponse] // room ID -> space response diff --git a/internal/caching/impl_ristretto.go b/internal/caching/impl_ristretto.go index 6d625b552..677218b5e 100644 --- a/internal/caching/impl_ristretto.go +++ b/internal/caching/impl_ristretto.go @@ -100,7 +100,7 @@ func NewRistrettoCache(maxCost config.DataUnit, maxAge time.Duration, enableProm MaxAge: maxAge, }, }, - RoomInfos: &RistrettoCachePartition[string, types.RoomInfo]{ // room ID -> room info + RoomInfos: &RistrettoCachePartition[string, *types.RoomInfo]{ // room ID -> room info cache: cache, Prefix: roomInfosCache, Mutable: true, diff --git a/roomserver/storage/shared/room_updater.go b/roomserver/storage/shared/room_updater.go index 8f4e011bf..c35ac653c 100644 --- a/roomserver/storage/shared/room_updater.go +++ b/roomserver/storage/shared/room_updater.go @@ -225,13 +225,12 @@ func (u *RoomUpdater) SetLatestEvents( if err := u.d.RoomsTable.UpdateLatestEventNIDs(u.ctx, txn, roomNID, eventNIDs, lastEventNIDSent, currentStateSnapshotNID); err != nil { return fmt.Errorf("u.d.RoomsTable.updateLatestEventNIDs: %w", err) } - if roomID, ok := u.d.Cache.GetRoomServerRoomID(roomNID); ok { - if roomInfo, ok := u.d.Cache.GetRoomInfo(roomID); ok { - roomInfo.StateSnapshotNID = currentStateSnapshotNID - roomInfo.IsStub = false - u.d.Cache.StoreRoomInfo(roomID, roomInfo) - } - } + + // Since it's entirely possible that this types.RoomInfo came from the + // cache, we should make sure to update that entry so that the next run + // works from live data. + u.roomInfo.StateSnapshotNID = currentStateSnapshotNID + u.roomInfo.IsStub = false return nil }) } diff --git a/roomserver/storage/shared/storage.go b/roomserver/storage/shared/storage.go index 692af1f6c..d8d5f67c8 100644 --- a/roomserver/storage/shared/storage.go +++ b/roomserver/storage/shared/storage.go @@ -139,13 +139,13 @@ func (d *Database) RoomInfo(ctx context.Context, roomID string) (*types.RoomInfo } func (d *Database) roomInfo(ctx context.Context, txn *sql.Tx, roomID string) (*types.RoomInfo, error) { - if roomInfo, ok := d.Cache.GetRoomInfo(roomID); ok { - return &roomInfo, nil + if roomInfo, ok := d.Cache.GetRoomInfo(roomID); ok && roomInfo != nil { + return roomInfo, nil } roomInfo, err := d.RoomsTable.SelectRoomInfo(ctx, txn, roomID) if err == nil && roomInfo != nil { d.Cache.StoreRoomServerRoomID(roomInfo.RoomNID, roomID) - d.Cache.StoreRoomInfo(roomID, *roomInfo) + d.Cache.StoreRoomInfo(roomID, roomInfo) } return roomInfo, err } diff --git a/sytest-whitelist b/sytest-whitelist index ea25c75d0..2a145291f 100644 --- a/sytest-whitelist +++ b/sytest-whitelist @@ -719,4 +719,5 @@ registration is idempotent, with username specified Setting state twice is idempotent Joining room twice is idempotent Inbound federation can return missing events for shared visibility -Inbound federation ignores redactions from invalid servers room > v3 \ No newline at end of file +Inbound federation ignores redactions from invalid servers room > v3 +Newly joined room includes presence in incremental sync \ No newline at end of file