diff --git a/roomserver/internal/helpers/auth.go b/roomserver/internal/helpers/auth.go index 648c50cf6..935a045df 100644 --- a/roomserver/internal/helpers/auth.go +++ b/roomserver/internal/helpers/auth.go @@ -39,7 +39,7 @@ func CheckForSoftFail( var authStateEntries []types.StateEntry var err error if rewritesState { - authStateEntries, err = db.StateEntriesForEventIDs(ctx, stateEventIDs) + authStateEntries, err = db.StateEntriesForEventIDs(ctx, stateEventIDs, true) if err != nil { return true, fmt.Errorf("StateEntriesForEventIDs failed: %w", err) } @@ -97,7 +97,7 @@ func CheckAuthEvents( authEventIDs []string, ) ([]types.EventNID, error) { // Grab the numeric IDs for the supplied auth state events from the database. - authStateEntries, err := db.StateEntriesForEventIDs(ctx, authEventIDs) + authStateEntries, err := db.StateEntriesForEventIDs(ctx, authEventIDs, true) if err != nil { return nil, fmt.Errorf("db.StateEntriesForEventIDs: %w", err) } diff --git a/roomserver/internal/input/input_events.go b/roomserver/internal/input/input_events.go index 53ccd5973..0ece9d145 100644 --- a/roomserver/internal/input/input_events.go +++ b/roomserver/internal/input/input_events.go @@ -301,7 +301,7 @@ func (r *Inputer) processRoomEvent( // bother doing this if the event was already rejected as it just ends up // burning CPU time. historyVisibility := gomatrixserverlib.HistoryVisibilityShared // Default to shared. - if rejectionErr == nil && !isRejected && !softfail { + if input.Kind != api.KindOutlier && rejectionErr == nil && !isRejected && !softfail { var err error historyVisibility, rejectionErr, err = r.processStateBefore(ctx, input, missingPrev) if err != nil { @@ -356,6 +356,8 @@ func (r *Inputer) processRoomEvent( // We stop here if the event is rejected: We've stored it but won't update forward extremities or notify anyone about it. if isRejected || softfail { logger.WithError(rejectionErr).WithFields(logrus.Fields{ + "room_id": event.RoomID(), + "event_id": event.EventID(), "soft_fail": softfail, "missing_prev": missingPrev, }).Warn("Stored rejected event") @@ -661,7 +663,7 @@ func (r *Inputer) calculateAndSetState( // We've been told what the state at the event is so we don't need to calculate it. // Check that those state events are in the database and store the state. var entries []types.StateEntry - if entries, err = r.DB.StateEntriesForEventIDs(ctx, input.StateEventIDs); err != nil { + if entries, err = r.DB.StateEntriesForEventIDs(ctx, input.StateEventIDs, true); err != nil { return fmt.Errorf("updater.StateEntriesForEventIDs: %w", err) } entries = types.DeduplicateStateEntries(entries) diff --git a/roomserver/internal/perform/perform_backfill.go b/roomserver/internal/perform/perform_backfill.go index 298ba04f6..aecff8b88 100644 --- a/roomserver/internal/perform/perform_backfill.go +++ b/roomserver/internal/perform/perform_backfill.go @@ -140,11 +140,11 @@ func (r *Backfiller) backfillViaFederation(ctx context.Context, req *api.Perform continue } var entries []types.StateEntry - if entries, err = r.DB.StateEntriesForEventIDs(ctx, stateIDs); err != nil { + if entries, err = r.DB.StateEntriesForEventIDs(ctx, stateIDs, true); err != nil { // attempt to fetch the missing events r.fetchAndStoreMissingEvents(ctx, info.RoomVersion, requester, stateIDs) // try again - entries, err = r.DB.StateEntriesForEventIDs(ctx, stateIDs) + entries, err = r.DB.StateEntriesForEventIDs(ctx, stateIDs, true) if err != nil { logrus.WithError(err).WithField("event_id", ev.EventID()).Error("backfillViaFederation: failed to get state entries for event") return err diff --git a/roomserver/storage/interface.go b/roomserver/storage/interface.go index 5c068873a..43e8da7bb 100644 --- a/roomserver/storage/interface.go +++ b/roomserver/storage/interface.go @@ -79,7 +79,7 @@ type Database interface { // Look up the state entries for a list of string event IDs // Returns an error if the there is an error talking to the database // Returns a types.MissingEventError if the event IDs aren't in the database. - StateEntriesForEventIDs(ctx context.Context, eventIDs []string) ([]types.StateEntry, error) + StateEntriesForEventIDs(ctx context.Context, eventIDs []string, excludeRejected bool) ([]types.StateEntry, error) // Look up the string event state keys for a list of numeric event state keys // Returns an error if there was a problem talking to the database. EventStateKeys(ctx context.Context, eventStateKeyNIDs []types.EventStateKeyNID) (map[types.EventStateKeyNID]string, error) diff --git a/roomserver/storage/postgres/events_table.go b/roomserver/storage/postgres/events_table.go index c7748d2be..a310c3963 100644 --- a/roomserver/storage/postgres/events_table.go +++ b/roomserver/storage/postgres/events_table.go @@ -88,6 +88,14 @@ const bulkSelectStateEventByIDSQL = "" + " WHERE event_id = ANY($1)" + " ORDER BY event_type_nid, event_state_key_nid ASC" +// Bulk lookup of events by string ID that aren't excluded. +// Sort by the numeric IDs for event type and state key. +// This means we can use binary search to lookup entries by type and state key. +const bulkSelectStateEventByIDExcludingRejectedSQL = "" + + "SELECT event_type_nid, event_state_key_nid, event_nid FROM roomserver_events" + + " WHERE event_id = ANY($1) AND is_rejected = FALSE" + + " ORDER BY event_type_nid, event_state_key_nid ASC" + // Bulk look up of events by event NID, optionally filtering by the event type // or event state key NIDs if provided. (The CARDINALITY check will return true // if the provided arrays are empty, ergo no filtering). @@ -140,23 +148,24 @@ const selectEventRejectedSQL = "" + "SELECT is_rejected FROM roomserver_events WHERE room_nid = $1 AND event_id = $2" type eventStatements struct { - insertEventStmt *sql.Stmt - selectEventStmt *sql.Stmt - bulkSelectStateEventByIDStmt *sql.Stmt - bulkSelectStateEventByNIDStmt *sql.Stmt - bulkSelectStateAtEventByIDStmt *sql.Stmt - updateEventStateStmt *sql.Stmt - selectEventSentToOutputStmt *sql.Stmt - updateEventSentToOutputStmt *sql.Stmt - selectEventIDStmt *sql.Stmt - bulkSelectStateAtEventAndReferenceStmt *sql.Stmt - bulkSelectEventReferenceStmt *sql.Stmt - bulkSelectEventIDStmt *sql.Stmt - bulkSelectEventNIDStmt *sql.Stmt - bulkSelectUnsentEventNIDStmt *sql.Stmt - selectMaxEventDepthStmt *sql.Stmt - selectRoomNIDsForEventNIDsStmt *sql.Stmt - selectEventRejectedStmt *sql.Stmt + insertEventStmt *sql.Stmt + selectEventStmt *sql.Stmt + bulkSelectStateEventByIDStmt *sql.Stmt + bulkSelectStateEventByIDExcludingRejectedStmt *sql.Stmt + bulkSelectStateEventByNIDStmt *sql.Stmt + bulkSelectStateAtEventByIDStmt *sql.Stmt + updateEventStateStmt *sql.Stmt + selectEventSentToOutputStmt *sql.Stmt + updateEventSentToOutputStmt *sql.Stmt + selectEventIDStmt *sql.Stmt + bulkSelectStateAtEventAndReferenceStmt *sql.Stmt + bulkSelectEventReferenceStmt *sql.Stmt + bulkSelectEventIDStmt *sql.Stmt + bulkSelectEventNIDStmt *sql.Stmt + bulkSelectUnsentEventNIDStmt *sql.Stmt + selectMaxEventDepthStmt *sql.Stmt + selectRoomNIDsForEventNIDsStmt *sql.Stmt + selectEventRejectedStmt *sql.Stmt } func CreateEventsTable(db *sql.DB) error { @@ -171,6 +180,7 @@ func PrepareEventsTable(db *sql.DB) (tables.Events, error) { {&s.insertEventStmt, insertEventSQL}, {&s.selectEventStmt, selectEventSQL}, {&s.bulkSelectStateEventByIDStmt, bulkSelectStateEventByIDSQL}, + {&s.bulkSelectStateEventByIDExcludingRejectedStmt, bulkSelectStateEventByIDExcludingRejectedSQL}, {&s.bulkSelectStateEventByNIDStmt, bulkSelectStateEventByNIDSQL}, {&s.bulkSelectStateAtEventByIDStmt, bulkSelectStateAtEventByIDSQL}, {&s.updateEventStateStmt, updateEventStateSQL}, @@ -221,11 +231,18 @@ func (s *eventStatements) SelectEvent( } // bulkSelectStateEventByID lookups a list of state events by event ID. -// If any of the requested events are missing from the database it returns a types.MissingEventError +// If not excluding rejected events, and any of the requested events are missing from +// the database it returns a types.MissingEventError. If excluding rejected events, +// the events will be silently omitted without error. func (s *eventStatements) BulkSelectStateEventByID( - ctx context.Context, txn *sql.Tx, eventIDs []string, + ctx context.Context, txn *sql.Tx, eventIDs []string, excludeRejected bool, ) ([]types.StateEntry, error) { - stmt := sqlutil.TxStmt(txn, s.bulkSelectStateEventByIDStmt) + var stmt *sql.Stmt + if excludeRejected { + stmt = sqlutil.TxStmt(txn, s.bulkSelectStateEventByIDExcludingRejectedStmt) + } else { + stmt = sqlutil.TxStmt(txn, s.bulkSelectStateEventByIDStmt) + } rows, err := stmt.QueryContext(ctx, pq.StringArray(eventIDs)) if err != nil { return nil, err @@ -235,10 +252,10 @@ func (s *eventStatements) BulkSelectStateEventByID( // because of the unique constraint on event IDs. // So we can allocate an array of the correct size now. // We might get fewer results than IDs so we adjust the length of the slice before returning it. - results := make([]types.StateEntry, len(eventIDs)) + results := make([]types.StateEntry, 0, len(eventIDs)) i := 0 for ; rows.Next(); i++ { - result := &results[i] + var result types.StateEntry if err = rows.Scan( &result.EventTypeNID, &result.EventStateKeyNID, @@ -246,11 +263,12 @@ func (s *eventStatements) BulkSelectStateEventByID( ); err != nil { return nil, err } + results = append(results, result) } if err = rows.Err(); err != nil { return nil, err } - if i != len(eventIDs) { + if !excludeRejected && i != len(eventIDs) { // If there are fewer rows returned than IDs then we were asked to lookup event IDs we don't have. // We don't know which ones were missing because we don't return the string IDs in the query. // However it should be possible debug this by replaying queries or entries from the input kafka logs. diff --git a/roomserver/storage/shared/storage.go b/roomserver/storage/shared/storage.go index 4f92adf1f..f35592a76 100644 --- a/roomserver/storage/shared/storage.go +++ b/roomserver/storage/shared/storage.go @@ -113,9 +113,9 @@ func (d *Database) eventStateKeyNIDs( } func (d *Database) StateEntriesForEventIDs( - ctx context.Context, eventIDs []string, + ctx context.Context, eventIDs []string, excludeRejected bool, ) ([]types.StateEntry, error) { - return d.EventsTable.BulkSelectStateEventByID(ctx, nil, eventIDs) + return d.EventsTable.BulkSelectStateEventByID(ctx, nil, eventIDs, excludeRejected) } func (d *Database) StateEntriesForTuples( diff --git a/roomserver/storage/sqlite3/events_table.go b/roomserver/storage/sqlite3/events_table.go index 174e3a9a7..943f256eb 100644 --- a/roomserver/storage/sqlite3/events_table.go +++ b/roomserver/storage/sqlite3/events_table.go @@ -65,6 +65,14 @@ const bulkSelectStateEventByIDSQL = "" + " WHERE event_id IN ($1)" + " ORDER BY event_type_nid, event_state_key_nid ASC" +// Bulk lookup of events by string ID that aren't rejected. +// Sort by the numeric IDs for event type and state key. +// This means we can use binary search to lookup entries by type and state key. +const bulkSelectStateEventByIDExcludingRejectedSQL = "" + + "SELECT event_type_nid, event_state_key_nid, event_nid FROM roomserver_events" + + " WHERE event_id IN ($1) AND is_rejected = 0" + + " ORDER BY event_type_nid, event_state_key_nid ASC" + const bulkSelectStateEventByNIDSQL = "" + "SELECT event_type_nid, event_state_key_nid, event_nid FROM roomserver_events" + " WHERE event_nid IN ($1)" @@ -113,19 +121,20 @@ const selectEventRejectedSQL = "" + "SELECT is_rejected FROM roomserver_events WHERE room_nid = $1 AND event_id = $2" type eventStatements struct { - db *sql.DB - insertEventStmt *sql.Stmt - selectEventStmt *sql.Stmt - bulkSelectStateEventByIDStmt *sql.Stmt - bulkSelectStateAtEventByIDStmt *sql.Stmt - updateEventStateStmt *sql.Stmt - selectEventSentToOutputStmt *sql.Stmt - updateEventSentToOutputStmt *sql.Stmt - selectEventIDStmt *sql.Stmt - bulkSelectStateAtEventAndReferenceStmt *sql.Stmt - bulkSelectEventReferenceStmt *sql.Stmt - bulkSelectEventIDStmt *sql.Stmt - selectEventRejectedStmt *sql.Stmt + db *sql.DB + insertEventStmt *sql.Stmt + selectEventStmt *sql.Stmt + bulkSelectStateEventByIDStmt *sql.Stmt + bulkSelectStateEventByIDExcludingRejectedStmt *sql.Stmt + bulkSelectStateAtEventByIDStmt *sql.Stmt + updateEventStateStmt *sql.Stmt + selectEventSentToOutputStmt *sql.Stmt + updateEventSentToOutputStmt *sql.Stmt + selectEventIDStmt *sql.Stmt + bulkSelectStateAtEventAndReferenceStmt *sql.Stmt + bulkSelectEventReferenceStmt *sql.Stmt + bulkSelectEventIDStmt *sql.Stmt + selectEventRejectedStmt *sql.Stmt //bulkSelectEventNIDStmt *sql.Stmt //bulkSelectUnsentEventNIDStmt *sql.Stmt //selectRoomNIDsForEventNIDsStmt *sql.Stmt @@ -145,6 +154,7 @@ func PrepareEventsTable(db *sql.DB) (tables.Events, error) { {&s.insertEventStmt, insertEventSQL}, {&s.selectEventStmt, selectEventSQL}, {&s.bulkSelectStateEventByIDStmt, bulkSelectStateEventByIDSQL}, + {&s.bulkSelectStateEventByIDExcludingRejectedStmt, bulkSelectStateEventByIDExcludingRejectedSQL}, {&s.bulkSelectStateAtEventByIDStmt, bulkSelectStateAtEventByIDSQL}, {&s.updateEventStateStmt, updateEventStateSQL}, {&s.updateEventSentToOutputStmt, updateEventSentToOutputSQL}, @@ -194,16 +204,24 @@ func (s *eventStatements) SelectEvent( } // bulkSelectStateEventByID lookups a list of state events by event ID. -// If any of the requested events are missing from the database it returns a types.MissingEventError +// If not excluding rejected events, and any of the requested events are missing from +// the database it returns a types.MissingEventError. If excluding rejected events, +// the events will be silently omitted without error. func (s *eventStatements) BulkSelectStateEventByID( - ctx context.Context, txn *sql.Tx, eventIDs []string, + ctx context.Context, txn *sql.Tx, eventIDs []string, excludeRejected bool, ) ([]types.StateEntry, error) { /////////////// + var sql string + if excludeRejected { + sql = bulkSelectStateEventByIDExcludingRejectedSQL + } else { + sql = bulkSelectStateEventByIDSQL + } iEventIDs := make([]interface{}, len(eventIDs)) for k, v := range eventIDs { iEventIDs[k] = v } - selectOrig := strings.Replace(bulkSelectStateEventByIDSQL, "($1)", sqlutil.QueryVariadic(len(iEventIDs)), 1) + selectOrig := strings.Replace(sql, "($1)", sqlutil.QueryVariadic(len(iEventIDs)), 1) selectPrep, err := s.db.Prepare(selectOrig) if err != nil { return nil, err @@ -221,10 +239,10 @@ func (s *eventStatements) BulkSelectStateEventByID( // because of the unique constraint on event IDs. // So we can allocate an array of the correct size now. // We might get fewer results than IDs so we adjust the length of the slice before returning it. - results := make([]types.StateEntry, len(eventIDs)) + results := make([]types.StateEntry, 0, len(eventIDs)) i := 0 for ; rows.Next(); i++ { - result := &results[i] + var result types.StateEntry if err = rows.Scan( &result.EventTypeNID, &result.EventStateKeyNID, @@ -232,8 +250,9 @@ func (s *eventStatements) BulkSelectStateEventByID( ); err != nil { return nil, err } + results = append(results, result) } - if i != len(eventIDs) { + if !excludeRejected && i != len(eventIDs) { // If there are fewer rows returned than IDs then we were asked to lookup event IDs we don't have. // We don't know which ones were missing because we don't return the string IDs in the query. // However it should be possible debug this by replaying queries or entries from the input kafka logs. diff --git a/roomserver/storage/tables/events_table_test.go b/roomserver/storage/tables/events_table_test.go index 74502a31f..107af4784 100644 --- a/roomserver/storage/tables/events_table_test.go +++ b/roomserver/storage/tables/events_table_test.go @@ -102,7 +102,7 @@ func Test_EventsTable(t *testing.T) { }) } - stateEvents, err := tab.BulkSelectStateEventByID(ctx, nil, eventIDs) + stateEvents, err := tab.BulkSelectStateEventByID(ctx, nil, eventIDs, false) assert.NoError(t, err) assert.Equal(t, len(stateEvents), len(eventIDs)) nids := make([]types.EventNID, 0, len(stateEvents)) diff --git a/roomserver/storage/tables/interface.go b/roomserver/storage/tables/interface.go index ed67c43d8..68d30f994 100644 --- a/roomserver/storage/tables/interface.go +++ b/roomserver/storage/tables/interface.go @@ -46,7 +46,7 @@ type Events interface { SelectEvent(ctx context.Context, txn *sql.Tx, eventID string) (types.EventNID, types.StateSnapshotNID, error) // bulkSelectStateEventByID lookups a list of state events by event ID. // If any of the requested events are missing from the database it returns a types.MissingEventError - BulkSelectStateEventByID(ctx context.Context, txn *sql.Tx, eventIDs []string) ([]types.StateEntry, error) + BulkSelectStateEventByID(ctx context.Context, txn *sql.Tx, eventIDs []string, excludeRejected bool) ([]types.StateEntry, error) BulkSelectStateEventByNID(ctx context.Context, txn *sql.Tx, eventNIDs []types.EventNID, stateKeyTuples []types.StateKeyTuple) ([]types.StateEntry, error) // BulkSelectStateAtEventByID lookups the state at a list of events by event ID. // If any of the requested events are missing from the database it returns a types.MissingEventError.