Add more logs (#2005)
* Add more logs To help debug the migration issue in #1924 along with manual data-loss-inducing fixes. Also log the origin server on processed txns to help debug buggy server origins. * Fix query
This commit is contained in:
parent
51b119107c
commit
7dc8fb1fe7
|
@ -345,7 +345,7 @@ func (t *txnReq) processTransaction(ctx context.Context) (*gomatrixserverlib.Res
|
|||
}
|
||||
|
||||
if c := len(results); c > 0 {
|
||||
util.GetLogger(ctx).Infof("Processed %d PDUs from transaction %q", c, t.TransactionID)
|
||||
util.GetLogger(ctx).Infof("Processed %d PDUs from %v in transaction %q", c, t.Origin, t.TransactionID)
|
||||
}
|
||||
return &gomatrixserverlib.RespSend{PDUs: results}, nil
|
||||
}
|
||||
|
|
|
@ -220,7 +220,6 @@ func UpStateBlocksRefactor(tx *sql.Tx) error {
|
|||
index := stateSnapshotData{snapshot.StateSnapshotNID, snapshot.RoomNID}
|
||||
newsnapshots[index] = append(newsnapshots[index], blocknid)
|
||||
}
|
||||
|
||||
for snapshotdata, newblocks := range newsnapshots {
|
||||
var newblocksarray pq.Int64Array
|
||||
for _, b := range newblocks {
|
||||
|
@ -229,11 +228,11 @@ func UpStateBlocksRefactor(tx *sql.Tx) error {
|
|||
|
||||
var newNID types.StateSnapshotNID
|
||||
err = tx.QueryRow(`
|
||||
INSERT INTO roomserver_state_snapshots (state_snapshot_hash, room_nid, state_block_nids)
|
||||
VALUES ($1, $2, $3)
|
||||
ON CONFLICT (state_snapshot_hash) DO UPDATE SET room_nid=$2
|
||||
RETURNING state_snapshot_nid
|
||||
`, newblocks.Hash(), snapshotdata.RoomNID, newblocksarray).Scan(&newNID)
|
||||
INSERT INTO roomserver_state_snapshots (state_snapshot_hash, room_nid, state_block_nids)
|
||||
VALUES ($1, $2, $3)
|
||||
ON CONFLICT (state_snapshot_hash) DO UPDATE SET room_nid=$2
|
||||
RETURNING state_snapshot_nid
|
||||
`, newblocks.Hash(), snapshotdata.RoomNID, newblocksarray).Scan(&newNID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("tx.QueryRow.Scan (insert new snapshot): %w", err)
|
||||
}
|
||||
|
@ -252,16 +251,49 @@ func UpStateBlocksRefactor(tx *sql.Tx) error {
|
|||
// If we do, this is a problem if Dendrite tries to load the snapshot as it will not exist
|
||||
// in roomserver_state_snapshots
|
||||
var count int64
|
||||
|
||||
if err = tx.QueryRow(`SELECT COUNT(*) FROM roomserver_events WHERE state_snapshot_nid < $1 AND state_snapshot_nid != 0`, maxsnapshotid).Scan(&count); err != nil {
|
||||
return fmt.Errorf("assertion query failed: %s", err)
|
||||
}
|
||||
if count > 0 {
|
||||
var debugEventID, debugRoomID string
|
||||
var debugEventTypeNID, debugStateKeyNID, debugSnapNID, debugDepth int64
|
||||
err = tx.QueryRow(
|
||||
`SELECT event_id, event_type_nid, event_state_key_nid, roomserver_events.state_snapshot_nid, depth, room_id FROM roomserver_events
|
||||
JOIN roomserver_rooms ON roomserver_rooms.room_nid = roomserver_events.room_nid WHERE roomserver_events.state_snapshot_nid < $1 AND roomserver_events.state_snapshot_nid != 0`, maxsnapshotid,
|
||||
).Scan(&debugEventID, &debugEventTypeNID, &debugStateKeyNID, &debugSnapNID, &debugDepth, &debugRoomID)
|
||||
if err != nil {
|
||||
logrus.Errorf("cannot extract debug info: %v", err)
|
||||
} else {
|
||||
logrus.Errorf(
|
||||
"Affected row: event_id=%v room_id=%v type=%v state_key=%v snapshot=%v depth=%v",
|
||||
debugEventID, debugRoomID, debugEventTypeNID, debugStateKeyNID, debugSnapNID, debugDepth,
|
||||
)
|
||||
logrus.Errorf("To fix this manually, run this query first then retry the migration: "+
|
||||
"UPDATE roomserver_events SET state_snapshot_nid=0 WHERE event_id='%v'", debugEventID)
|
||||
}
|
||||
return fmt.Errorf("%d events exist in roomserver_events which have not been converted to a new state_snapshot_nid; this is a bug, please report", count)
|
||||
}
|
||||
if err = tx.QueryRow(`SELECT COUNT(*) FROM roomserver_rooms WHERE state_snapshot_nid < $1 AND state_snapshot_nid != 0`, maxsnapshotid).Scan(&count); err != nil {
|
||||
return fmt.Errorf("assertion query failed: %s", err)
|
||||
}
|
||||
if count > 0 {
|
||||
var debugRoomID string
|
||||
var debugSnapNID, debugLastEventNID int64
|
||||
err = tx.QueryRow(
|
||||
`SELECT room_id, state_snapshot_nid, last_event_sent_nid FROM roomserver_rooms WHERE state_snapshot_nid < $1 AND state_snapshot_nid != 0`, maxsnapshotid,
|
||||
).Scan(&debugRoomID, &debugSnapNID, &debugLastEventNID)
|
||||
if err != nil {
|
||||
logrus.Errorf("cannot extract debug info: %v", err)
|
||||
} else {
|
||||
logrus.Errorf(
|
||||
"Affected row: room_id=%v snapshot=%v last_sent=%v",
|
||||
debugRoomID, debugSnapNID, debugLastEventNID,
|
||||
)
|
||||
logrus.Errorf("To fix this manually, run this query first then retry the migration: "+
|
||||
"UPDATE roomserver_rooms SET state_snapshot_nid=0 WHERE room_id='%v'", debugRoomID)
|
||||
logrus.Errorf("Running this UPDATE will cause the room in question to become unavailable on this server. Leave and re-join the room afterwards.")
|
||||
}
|
||||
return fmt.Errorf("%d rooms exist in roomserver_rooms which have not been converted to a new state_snapshot_nid; this is a bug, please report", count)
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue