diff --git a/roomserver/internal/input/input.go b/roomserver/internal/input/input.go index d6ad77fe6..d76e354ba 100644 --- a/roomserver/internal/input/input.go +++ b/roomserver/internal/input/input.go @@ -102,7 +102,7 @@ func (r *Inputer) Start() error { _ = msg.InProgress() // resets the acknowledgement wait timer defer eventsInProgress.Delete(index) defer roomserverInputBackpressure.With(prometheus.Labels{"room_id": roomID}).Dec() - action, err := r.processRoomEventUsingUpdater(context.Background(), roomID, &inputRoomEvent) + retry, err := r.processRoomEventUsingUpdater(context.Background(), roomID, &inputRoomEvent) if err != nil { if !errors.Is(err, context.DeadlineExceeded) && !errors.Is(err, context.Canceled) { sentry.CaptureException(err) @@ -113,10 +113,9 @@ func (r *Inputer) Start() error { "type": inputRoomEvent.Event.Type(), }).Warn("Roomserver failed to process async event") } - switch action { - case retryLater: + if retry { _ = msg.Nak() - case doNotRetry: + } else { _ = msg.Ack() } }) @@ -138,41 +137,35 @@ func (r *Inputer) Start() error { return err } -type retryAction int - -const ( - doNotRetry retryAction = iota - retryLater -) - // processRoomEventUsingUpdater opens up a room updater and tries to -// process the event. It returns whether or not we should positively -// or negatively acknowledge the event (i.e. for NATS) and an error -// if it occurred. +// process the event. It returns two values: the bool signifying whether +// we should retry later if possible (i.e. using NATS, because we couldn't +// commit the transaction) and an error signifying anything else that may +// have gone wrong. func (r *Inputer) processRoomEventUsingUpdater( ctx context.Context, roomID string, inputRoomEvent *api.InputRoomEvent, -) (retryAction, error) { +) (bool, error) { roomInfo, err := r.DB.RoomInfo(ctx, roomID) if err != nil { - return doNotRetry, fmt.Errorf("r.DB.RoomInfo: %w", err) + return false, fmt.Errorf("r.DB.RoomInfo: %w", err) } updater, err := r.DB.GetRoomUpdater(ctx, roomInfo) if err != nil { - return retryLater, fmt.Errorf("r.DB.GetRoomUpdater: %w", err) + return true, fmt.Errorf("r.DB.GetRoomUpdater: %w", err) } commit, err := r.processRoomEvent(ctx, updater, inputRoomEvent) if commit { if cerr := updater.Commit(); cerr != nil { - return retryLater, fmt.Errorf("updater.Commit: %w", cerr) + return true, fmt.Errorf("updater.Commit: %w", cerr) } } else { if rerr := updater.Rollback(); rerr != nil { - return retryLater, fmt.Errorf("updater.Rollback: %w", rerr) + return true, fmt.Errorf("updater.Rollback: %w", rerr) } } - return doNotRetry, err + return false, err } // InputRoomEvents implements api.RoomserverInternalAPI diff --git a/roomserver/storage/interface.go b/roomserver/storage/interface.go index 90365eefd..a9851e05b 100644 --- a/roomserver/storage/interface.go +++ b/roomserver/storage/interface.go @@ -24,8 +24,8 @@ import ( ) type Database interface { - // Do we support transactional isolation on this database engine? - SupportsTransactionalIsolation() bool + // Do we support processing input events for more than one room at a time? + SupportsConcurrentRoomInputs() bool // RoomInfo returns room information for the given room ID, or nil if there is no room. RoomInfo(ctx context.Context, roomID string) (*types.RoomInfo, error) // Store the room state at an event in the database diff --git a/roomserver/storage/shared/room_updater.go b/roomserver/storage/shared/room_updater.go index ec44323f5..bb9f5dc62 100644 --- a/roomserver/storage/shared/room_updater.go +++ b/roomserver/storage/shared/room_updater.go @@ -25,16 +25,7 @@ func rollback(txn *sql.Tx) { txn.Rollback() // nolint: errcheck } -func NewRoomUpdater(ctx context.Context, d *Database, roomInfo *types.RoomInfo) (*RoomUpdater, error) { - var txn *sql.Tx - if d.SupportsTransactionalIsolation() { - var err error - txn, err = d.DB.Begin() - if err != nil { - return nil, err - } - } - +func NewRoomUpdater(ctx context.Context, d *Database, txn *sql.Tx, roomInfo *types.RoomInfo) (*RoomUpdater, error) { // If the roomInfo is nil then that means that the room doesn't exist // yet, so we can't do `SelectLatestEventsNIDsForUpdate` because that // would involve locking a row on the table that doesn't exist. Instead diff --git a/roomserver/storage/shared/storage.go b/roomserver/storage/shared/storage.go index e6962e170..2df88534d 100644 --- a/roomserver/storage/shared/storage.go +++ b/roomserver/storage/shared/storage.go @@ -45,7 +45,7 @@ type Database struct { GetRoomUpdaterFn func(ctx context.Context, roomInfo *types.RoomInfo) (*RoomUpdater, error) } -func (d *Database) SupportsTransactionalIsolation() bool { +func (d *Database) SupportsConcurrentRoomInputs() bool { return true } @@ -447,7 +447,7 @@ func (d *Database) events( if err != nil { return nil, err } - eventIDs, _ := d.EventsTable.BulkSelectEventID(ctx, txn, eventNIDs) + eventIDs, _ := d.EventsTable.BulkSelectEventID(ctx, nil, eventNIDs) if err != nil { eventIDs = map[types.EventNID]string{} } @@ -501,15 +501,16 @@ func (d *Database) MembershipUpdater( ctx context.Context, roomID, targetUserID string, targetLocal bool, roomVersion gomatrixserverlib.RoomVersion, ) (*MembershipUpdater, error) { - var txn *sql.Tx - var err error - if d.SupportsTransactionalIsolation() { - txn, err = d.DB.Begin() - if err != nil { - return nil, err - } + txn, err := d.DB.Begin() + if err != nil { + return nil, err } - return NewMembershipUpdater(ctx, d, txn, roomID, targetUserID, targetLocal, roomVersion) + var updater *MembershipUpdater + _ = d.Writer.Do(d.DB, txn, func(txn *sql.Tx) error { + updater, err = NewMembershipUpdater(ctx, d, txn, roomID, targetUserID, targetLocal, roomVersion) + return err + }) + return updater, err } func (d *Database) GetRoomUpdater( @@ -518,7 +519,16 @@ func (d *Database) GetRoomUpdater( if d.GetRoomUpdaterFn != nil { return d.GetRoomUpdaterFn(ctx, roomInfo) } - return NewRoomUpdater(ctx, d, roomInfo) + txn, err := d.DB.Begin() + if err != nil { + return nil, err + } + var updater *RoomUpdater + _ = d.Writer.Do(d.DB, txn, func(txn *sql.Tx) error { + updater, err = NewRoomUpdater(ctx, d, txn, roomInfo) + return err + }) + return updater, err } func (d *Database) StoreEvent( diff --git a/roomserver/storage/sqlite3/storage.go b/roomserver/storage/sqlite3/storage.go index 3e6b9ce00..325c253b5 100644 --- a/roomserver/storage/sqlite3/storage.go +++ b/roomserver/storage/sqlite3/storage.go @@ -23,6 +23,7 @@ import ( "github.com/matrix-org/dendrite/internal/sqlutil" "github.com/matrix-org/dendrite/roomserver/storage/shared" "github.com/matrix-org/dendrite/roomserver/storage/sqlite3/deltas" + "github.com/matrix-org/dendrite/roomserver/types" "github.com/matrix-org/dendrite/setup/config" "github.com/matrix-org/gomatrixserverlib" ) @@ -192,7 +193,7 @@ func (d *Database) prepare(db *sql.DB, cache caching.RoomServerCaches) error { return nil } -func (d *Database) SupportsTransactionalIsolation() bool { +func (d *Database) SupportsConcurrentRoomInputs() bool { // This isn't supported in SQLite mode yet because of issues with // database locks. // TODO: Look at this again - the problem is probably to do with @@ -200,6 +201,18 @@ func (d *Database) SupportsTransactionalIsolation() bool { return false } +func (d *Database) GetRoomUpdater( + ctx context.Context, roomInfo *types.RoomInfo, +) (*shared.RoomUpdater, error) { + // TODO: Do not use transactions. We should be holding open this transaction but we cannot have + // multiple write transactions on sqlite. The code will perform additional + // write transactions independent of this one which will consistently cause + // 'database is locked' errors. As sqlite doesn't support multi-process on the + // same DB anyway, and we only execute updates sequentially, the only worries + // are for rolling back when things go wrong. (atomicity) + return shared.NewRoomUpdater(ctx, &d.Database, nil, roomInfo) +} + func (d *Database) MembershipUpdater( ctx context.Context, roomID, targetUserID string, targetLocal bool, roomVersion gomatrixserverlib.RoomVersion,