From 8a5cd945b3b303034a718f01e179e0823ba0cc9a Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Tue, 7 Sep 2021 10:29:06 +0100 Subject: [PATCH] Add more logs To help debug the migration issue in #1924 along with manual data-loss-inducing fixes. Also log the origin server on processed txns to help debug buggy server origins. --- federationapi/routing/send.go | 2 +- .../2021041615092700_state_blocks_refactor.go | 24 +++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/federationapi/routing/send.go b/federationapi/routing/send.go index 2b2828266..aacc8a919 100644 --- a/federationapi/routing/send.go +++ b/federationapi/routing/send.go @@ -345,7 +345,7 @@ func (t *txnReq) processTransaction(ctx context.Context) (*gomatrixserverlib.Res } if c := len(results); c > 0 { - util.GetLogger(ctx).Infof("Processed %d PDUs from transaction %q", c, t.TransactionID) + util.GetLogger(ctx).Infof("Processed %d PDUs from %v in transaction %q", c, t.Origin, t.TransactionID) } return &gomatrixserverlib.RespSend{PDUs: results}, nil } diff --git a/roomserver/storage/postgres/deltas/2021041615092700_state_blocks_refactor.go b/roomserver/storage/postgres/deltas/2021041615092700_state_blocks_refactor.go index 6b72de51a..96de729d8 100644 --- a/roomserver/storage/postgres/deltas/2021041615092700_state_blocks_refactor.go +++ b/roomserver/storage/postgres/deltas/2021041615092700_state_blocks_refactor.go @@ -256,12 +256,36 @@ func UpStateBlocksRefactor(tx *sql.Tx) error { return fmt.Errorf("assertion query failed: %s", err) } if count > 0 { + var debugEventID, debugRoomID string + var debugEventTypeNID, debugStateKeyNID, debugSnapNID, debugDepth int64 + err = tx.QueryRow( + `SELECT event_id, event_type_nid, event_state_key_nid, state_snapshot_nid, depth, room_id FROM roomserver_events + JOIN roomserver_rooms ON roomserver_rooms.room_nid = roomserver_events.room_nid WHERE state_snapshot_nid < $1 AND state_snapshot_nid != 0`, maxsnapshotid, + ).Scan(&debugEventID, &debugEventTypeNID, &debugStateKeyNID, &debugSnapNID, &debugDepth, &debugRoomID) + logrus.Errorf( + "Affected row: event_id=%v room_id=%v type=%v state_key=%v snapshot=%v depth=%v", + debugEventID, debugRoomID, debugEventTypeNID, debugStateKeyNID, debugSnapNID, debugDepth, + ) + logrus.Errorf("To fix this manually, run this query first then retry the migration: "+ + "UPDATE roomserver_events SET state_snapshot_nid=0 WHERE event_id='%v'", debugEventID) return fmt.Errorf("%d events exist in roomserver_events which have not been converted to a new state_snapshot_nid; this is a bug, please report", count) } if err = tx.QueryRow(`SELECT COUNT(*) FROM roomserver_rooms WHERE state_snapshot_nid < $1 AND state_snapshot_nid != 0`, maxsnapshotid).Scan(&count); err != nil { return fmt.Errorf("assertion query failed: %s", err) } if count > 0 { + var debugRoomID string + var debugSnapNID, debugLastEventNID int64 + err = tx.QueryRow( + `SELECT room_id, state_snapshot_nid, last_event_sent_nid FROM roomserver_rooms WHERE state_snapshot_nid < $1 AND state_snapshot_nid != 0`, maxsnapshotid, + ).Scan(&debugRoomID, &debugSnapNID, &debugLastEventNID) + logrus.Errorf( + "Affected row: room_id=%v snapshot=%v last_sent=%v", + debugRoomID, debugSnapNID, debugLastEventNID, + ) + logrus.Errorf("To fix this manually, run this query first then retry the migration: "+ + "UPDATE roomserver_rooms SET state_snapshot_nid=0 WHERE room_id='%v'", debugRoomID) + logrus.Errorf("Running this UPDATE will cause the room in question to become unavailable on this server. Leave and re-join the room afterwards.") return fmt.Errorf("%d rooms exist in roomserver_rooms which have not been converted to a new state_snapshot_nid; this is a bug, please report", count) }