From 76e4ebaf78a698eb775ccb358fb5c253084747b9 Mon Sep 17 00:00:00 2001 From: Thibaut CHARLES Date: Wed, 7 Aug 2019 12:12:09 +0200 Subject: [PATCH] State events filtering database api (#438) This PR adds a gomatrixserverlib.Filter parameter to functions handling the syncapi_current_room_state table. It does not implement any filtering logic inside the syncapi IncrementalSync/CompleteSync functions, just the APIs for future use. Default filters are provided as placeholders in IncrementalSync/CompleteSync, so behaviour should be unchanged (except the default 20 event limit) SQL table will be changed. You can upgrade an existing database using: ``` ALTER TABLE syncapi_current_room_state ADD COLUMN IF NOT EXISTS sender text; UPDATE syncapi_current_room_state SET sender=(event_json::json->>'sender'); ALTER TABLE syncapi_current_room_state ALTER COLUMN sender SET NOT NULL; ALTER TABLE syncapi_current_room_state ADD COLUMN IF NOT EXISTS contains_url bool; UPDATE syncapi_current_room_state SET contains_url=(event_json::json->>'content')::json->>'url' IS NOT NULL; ALTER TABLE syncapi_current_room_state ALTER COLUMN contains_url SET NOT NULL; ``` Note: This depends on #436 (and includes all its commits). I'm not sure if Github will remove the duplicated commits once #436 is merged. --- syncapi/routing/state.go | 5 ++- syncapi/storage/current_room_state_table.go | 42 ++++++++++++++++++--- syncapi/storage/filtering.go | 36 ++++++++++++++++++ syncapi/storage/output_room_events_table.go | 41 ++++++++++++++++++-- syncapi/storage/syncserver.go | 31 ++++++++++----- 5 files changed, 134 insertions(+), 21 deletions(-) create mode 100644 syncapi/storage/filtering.go diff --git a/syncapi/routing/state.go b/syncapi/routing/state.go index 5571a0525..87a93d194 100644 --- a/syncapi/routing/state.go +++ b/syncapi/routing/state.go @@ -44,7 +44,10 @@ func OnIncomingStateRequest(req *http.Request, db *storage.SyncServerDatasource, // TODO(#287): Auth request and handle the case where the user has left (where // we should return the state at the poin they left) - stateEvents, err := db.GetStateEventsForRoom(req.Context(), roomID) + stateFilterPart := gomatrixserverlib.DefaultFilterPart() + // TODO: stateFilterPart should not limit the number of state events (or only limits abusive number of events) + + stateEvents, err := db.GetStateEventsForRoom(req.Context(), roomID, &stateFilterPart) if err != nil { return httputil.LogThenError(req, err) } diff --git a/syncapi/storage/current_room_state_table.go b/syncapi/storage/current_room_state_table.go index 852bfd760..88e7a76c3 100644 --- a/syncapi/storage/current_room_state_table.go +++ b/syncapi/storage/current_room_state_table.go @@ -17,6 +17,7 @@ package storage import ( "context" "database/sql" + "encoding/json" "github.com/lib/pq" "github.com/matrix-org/dendrite/common" @@ -32,6 +33,10 @@ CREATE TABLE IF NOT EXISTS syncapi_current_room_state ( event_id TEXT NOT NULL, -- The state event type e.g 'm.room.member' type TEXT NOT NULL, + -- The 'sender' property of the event. + sender TEXT NOT NULL, + -- true if the event content contains a url key + contains_url BOOL NOT NULL, -- The state_key value for this state event e.g '' state_key TEXT NOT NULL, -- The JSON for the event. Stored as TEXT because this should be valid UTF-8. @@ -46,16 +51,16 @@ CREATE TABLE IF NOT EXISTS syncapi_current_room_state ( CONSTRAINT syncapi_room_state_unique UNIQUE (room_id, type, state_key) ); -- for event deletion -CREATE UNIQUE INDEX IF NOT EXISTS syncapi_event_id_idx ON syncapi_current_room_state(event_id); +CREATE UNIQUE INDEX IF NOT EXISTS syncapi_event_id_idx ON syncapi_current_room_state(event_id, room_id, type, sender, contains_url); -- for querying membership states of users CREATE INDEX IF NOT EXISTS syncapi_membership_idx ON syncapi_current_room_state(type, state_key, membership) WHERE membership IS NOT NULL AND membership != 'leave'; ` const upsertRoomStateSQL = "" + - "INSERT INTO syncapi_current_room_state (room_id, event_id, type, state_key, event_json, membership, added_at)" + - " VALUES ($1, $2, $3, $4, $5, $6, $7)" + + "INSERT INTO syncapi_current_room_state (room_id, event_id, type, sender, contains_url, state_key, event_json, membership, added_at)" + + " VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)" + " ON CONFLICT ON CONSTRAINT syncapi_room_state_unique" + - " DO UPDATE SET event_id = $2, event_json = $5, membership = $6, added_at = $7" + " DO UPDATE SET event_id = $2, sender=$4, contains_url=$5, event_json = $7, membership = $8, added_at = $9" const deleteRoomStateByEventIDSQL = "" + "DELETE FROM syncapi_current_room_state WHERE event_id = $1" @@ -64,7 +69,13 @@ const selectRoomIDsWithMembershipSQL = "" + "SELECT room_id FROM syncapi_current_room_state WHERE type = 'm.room.member' AND state_key = $1 AND membership = $2" const selectCurrentStateSQL = "" + - "SELECT event_json FROM syncapi_current_room_state WHERE room_id = $1" + "SELECT event_json FROM syncapi_current_room_state WHERE room_id = $1" + + " AND ( $2::text[] IS NULL OR sender = ANY($2) )" + + " AND ( $3::text[] IS NULL OR NOT(sender = ANY($3)) )" + + " AND ( $4::text[] IS NULL OR type LIKE ANY($4) )" + + " AND ( $5::text[] IS NULL OR NOT(type LIKE ANY($5)) )" + + " AND ( $6::bool IS NULL OR contains_url = $6 )" + + " LIMIT $7" const selectJoinedUsersSQL = "" + "SELECT room_id, state_key FROM syncapi_current_room_state WHERE type = 'm.room.member' AND membership = 'join'" @@ -166,9 +177,17 @@ func (s *currentRoomStateStatements) selectRoomIDsWithMembership( // CurrentState returns all the current state events for the given room. func (s *currentRoomStateStatements) selectCurrentState( ctx context.Context, txn *sql.Tx, roomID string, + stateFilterPart *gomatrixserverlib.FilterPart, ) ([]gomatrixserverlib.Event, error) { stmt := common.TxStmt(txn, s.selectCurrentStateStmt) - rows, err := stmt.QueryContext(ctx, roomID) + rows, err := stmt.QueryContext(ctx, roomID, + pq.StringArray(stateFilterPart.Senders), + pq.StringArray(stateFilterPart.NotSenders), + pq.StringArray(filterConvertTypeWildcardToSQL(stateFilterPart.Types)), + pq.StringArray(filterConvertTypeWildcardToSQL(stateFilterPart.NotTypes)), + stateFilterPart.ContainsURL, + stateFilterPart.Limit, + ) if err != nil { return nil, err } @@ -189,12 +208,23 @@ func (s *currentRoomStateStatements) upsertRoomState( ctx context.Context, txn *sql.Tx, event gomatrixserverlib.Event, membership *string, addedAt int64, ) error { + // Parse content as JSON and search for an "url" key + containsURL := false + var content map[string]interface{} + if json.Unmarshal(event.Content(), &content) != nil { + // Set containsURL to true if url is present + _, containsURL = content["url"] + } + + // upsert state event stmt := common.TxStmt(txn, s.upsertRoomStateStmt) _, err := stmt.ExecContext( ctx, event.RoomID(), event.EventID(), event.Type(), + event.Sender(), + containsURL, *event.StateKey(), event.JSON(), membership, diff --git a/syncapi/storage/filtering.go b/syncapi/storage/filtering.go new file mode 100644 index 000000000..27b0b888a --- /dev/null +++ b/syncapi/storage/filtering.go @@ -0,0 +1,36 @@ +// Copyright 2017 Thibaut CHARLES +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package storage + +import ( + "strings" +) + +// filterConvertWildcardToSQL converts wildcards as defined in +// https://matrix.org/docs/spec/client_server/r0.3.0.html#post-matrix-client-r0-user-userid-filter +// to SQL wildcards that can be used with LIKE() +func filterConvertTypeWildcardToSQL(values []string) []string { + if values == nil { + // Return nil instead of []string{} so IS NULL can work correctly when + // the return value is passed into SQL queries + return nil + } + + ret := make([]string, len(values)) + for i := range values { + ret[i] = strings.Replace(values[i], "*", "%", -1) + } + return ret +} diff --git a/syncapi/storage/output_room_events_table.go b/syncapi/storage/output_room_events_table.go index 34632aedf..8fbeb18c9 100644 --- a/syncapi/storage/output_room_events_table.go +++ b/syncapi/storage/output_room_events_table.go @@ -17,6 +17,7 @@ package storage import ( "context" "database/sql" + "encoding/json" "sort" "github.com/matrix-org/dendrite/roomserver/api" @@ -43,6 +44,12 @@ CREATE TABLE IF NOT EXISTS syncapi_output_room_events ( room_id TEXT NOT NULL, -- The JSON for the event. Stored as TEXT because this should be valid UTF-8. event_json TEXT NOT NULL, + -- The event type e.g 'm.room.member'. + type TEXT NOT NULL, + -- The 'sender' property of the event. + sender TEXT NOT NULL, + -- true if the event content contains a url key. + contains_url BOOL NOT NULL, -- A list of event IDs which represent a delta of added/removed room state. This can be NULL -- if there is no delta. add_state_ids TEXT[], @@ -56,8 +63,8 @@ CREATE UNIQUE INDEX IF NOT EXISTS syncapi_event_id_idx ON syncapi_output_room_ev const insertEventSQL = "" + "INSERT INTO syncapi_output_room_events (" + - " room_id, event_id, event_json, add_state_ids, remove_state_ids, device_id, transaction_id" + - ") VALUES ($1, $2, $3, $4, $5, $6, $7) RETURNING id" + "room_id, event_id, event_json, type, sender, contains_url, add_state_ids, remove_state_ids, device_id, transaction_id" + + ") VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) RETURNING id" const selectEventsSQL = "" + "SELECT id, event_json FROM syncapi_output_room_events WHERE event_id = ANY($1)" @@ -75,7 +82,13 @@ const selectStateInRangeSQL = "" + "SELECT id, event_json, add_state_ids, remove_state_ids" + " FROM syncapi_output_room_events" + " WHERE (id > $1 AND id <= $2) AND (add_state_ids IS NOT NULL OR remove_state_ids IS NOT NULL)" + - " ORDER BY id ASC" + " AND ( $3::text[] IS NULL OR sender = ANY($3) )" + + " AND ( $4::text[] IS NULL OR NOT(sender = ANY($4)) )" + + " AND ( $5::text[] IS NULL OR type LIKE ANY($5) )" + + " AND ( $6::text[] IS NULL OR NOT(type LIKE ANY($6)) )" + + " AND ( $7::bool IS NULL OR contains_url = $7 )" + + " ORDER BY id ASC" + + " LIMIT $8" type outputRoomEventsStatements struct { insertEventStmt *sql.Stmt @@ -113,10 +126,19 @@ func (s *outputRoomEventsStatements) prepare(db *sql.DB) (err error) { // two positions, only the most recent state is returned. func (s *outputRoomEventsStatements) selectStateInRange( ctx context.Context, txn *sql.Tx, oldPos, newPos int64, + stateFilterPart *gomatrixserverlib.FilterPart, ) (map[string]map[string]bool, map[string]streamEvent, error) { stmt := common.TxStmt(txn, s.selectStateInRangeStmt) - rows, err := stmt.QueryContext(ctx, oldPos, newPos) + rows, err := stmt.QueryContext( + ctx, oldPos, newPos, + pq.StringArray(stateFilterPart.Senders), + pq.StringArray(stateFilterPart.NotSenders), + pq.StringArray(filterConvertTypeWildcardToSQL(stateFilterPart.Types)), + pq.StringArray(filterConvertTypeWildcardToSQL(stateFilterPart.NotTypes)), + stateFilterPart.ContainsURL, + stateFilterPart.Limit, + ) if err != nil { return nil, nil, err } @@ -205,12 +227,23 @@ func (s *outputRoomEventsStatements) insertEvent( txnID = &transactionID.TransactionID } + // Parse content as JSON and search for an "url" key + containsURL := false + var content map[string]interface{} + if json.Unmarshal(event.Content(), &content) != nil { + // Set containsURL to true if url is present + _, containsURL = content["url"] + } + stmt := common.TxStmt(txn, s.insertEventStmt) err = stmt.QueryRowContext( ctx, event.RoomID(), event.EventID(), event.JSON(), + event.Type(), + event.Sender(), + containsURL, pq.StringArray(addState), pq.StringArray(removeState), deviceID, diff --git a/syncapi/storage/syncserver.go b/syncapi/storage/syncserver.go index ebec6c3e1..c57a90256 100644 --- a/syncapi/storage/syncserver.go +++ b/syncapi/storage/syncserver.go @@ -185,10 +185,10 @@ func (d *SyncServerDatasource) GetStateEvent( // Returns an empty slice if no state events could be found for this room. // Returns an error if there was an issue with the retrieval. func (d *SyncServerDatasource) GetStateEventsForRoom( - ctx context.Context, roomID string, + ctx context.Context, roomID string, stateFilterPart *gomatrixserverlib.FilterPart, ) (stateEvents []gomatrixserverlib.Event, err error) { err = common.WithTransaction(d.db, func(txn *sql.Tx) error { - stateEvents, err = d.roomstate.selectCurrentState(ctx, txn, roomID) + stateEvents, err = d.roomstate.selectCurrentState(ctx, txn, roomID, stateFilterPart) return err }) return @@ -245,6 +245,8 @@ func (d *SyncServerDatasource) addPDUDeltaToResponse( var succeeded bool defer common.EndTransaction(txn, &succeeded) + stateFilterPart := gomatrixserverlib.DefaultFilterPart() // TODO: use filter provided in request + // Work out which rooms to return in the response. This is done by getting not only the currently // joined rooms, but also which rooms have membership transitions for this user between the 2 PDU stream positions. // This works out what the 'state' key should be for each room as well as which membership block @@ -252,9 +254,13 @@ func (d *SyncServerDatasource) addPDUDeltaToResponse( var deltas []stateDelta var joinedRoomIDs []string if !wantFullState { - deltas, joinedRoomIDs, err = d.getStateDeltas(ctx, &device, txn, fromPos, toPos, device.UserID) + deltas, joinedRoomIDs, err = d.getStateDeltas( + ctx, &device, txn, fromPos, toPos, device.UserID, &stateFilterPart, + ) } else { - deltas, joinedRoomIDs, err = d.getStateDeltasForFullStateSync(ctx, &device, txn, fromPos, toPos, device.UserID) + deltas, joinedRoomIDs, err = d.getStateDeltasForFullStateSync( + ctx, &device, txn, fromPos, toPos, device.UserID, &stateFilterPart, + ) } if err != nil { return nil, err @@ -404,10 +410,12 @@ func (d *SyncServerDatasource) getResponseWithPDUsForCompleteSync( return } + stateFilterPart := gomatrixserverlib.DefaultFilterPart() // TODO: use filter provided in request + // Build up a /sync response. Add joined rooms. for _, roomID := range joinedRoomIDs { var stateEvents []gomatrixserverlib.Event - stateEvents, err = d.roomstate.selectCurrentState(ctx, txn, roomID) + stateEvents, err = d.roomstate.selectCurrentState(ctx, txn, roomID, &stateFilterPart) if err != nil { return } @@ -733,6 +741,7 @@ func (d *SyncServerDatasource) fetchMissingStateEvents( func (d *SyncServerDatasource) getStateDeltas( ctx context.Context, device *authtypes.Device, txn *sql.Tx, fromPos, toPos int64, userID string, + stateFilterPart *gomatrixserverlib.FilterPart, ) ([]stateDelta, []string, error) { // Implement membership change algorithm: https://github.com/matrix-org/synapse/blob/v0.19.3/synapse/handlers/sync.py#L821 // - Get membership list changes for this user in this sync response @@ -745,7 +754,7 @@ func (d *SyncServerDatasource) getStateDeltas( var deltas []stateDelta // get all the state events ever between these two positions - stateNeeded, eventMap, err := d.events.selectStateInRange(ctx, txn, fromPos, toPos) + stateNeeded, eventMap, err := d.events.selectStateInRange(ctx, txn, fromPos, toPos, stateFilterPart) if err != nil { return nil, nil, err } @@ -765,7 +774,7 @@ func (d *SyncServerDatasource) getStateDeltas( if membership == gomatrixserverlib.Join { // send full room state down instead of a delta var s []streamEvent - s, err = d.currentStateStreamEventsForRoom(ctx, txn, roomID) + s, err = d.currentStateStreamEventsForRoom(ctx, txn, roomID, stateFilterPart) if err != nil { return nil, nil, err } @@ -807,6 +816,7 @@ func (d *SyncServerDatasource) getStateDeltas( func (d *SyncServerDatasource) getStateDeltasForFullStateSync( ctx context.Context, device *authtypes.Device, txn *sql.Tx, fromPos, toPos int64, userID string, + stateFilterPart *gomatrixserverlib.FilterPart, ) ([]stateDelta, []string, error) { joinedRoomIDs, err := d.roomstate.selectRoomIDsWithMembership(ctx, txn, userID, gomatrixserverlib.Join) if err != nil { @@ -818,7 +828,7 @@ func (d *SyncServerDatasource) getStateDeltasForFullStateSync( // Add full states for all joined rooms for _, joinedRoomID := range joinedRoomIDs { - s, stateErr := d.currentStateStreamEventsForRoom(ctx, txn, joinedRoomID) + s, stateErr := d.currentStateStreamEventsForRoom(ctx, txn, joinedRoomID, stateFilterPart) if stateErr != nil { return nil, nil, stateErr } @@ -830,7 +840,7 @@ func (d *SyncServerDatasource) getStateDeltasForFullStateSync( } // Get all the state events ever between these two positions - stateNeeded, eventMap, err := d.events.selectStateInRange(ctx, txn, fromPos, toPos) + stateNeeded, eventMap, err := d.events.selectStateInRange(ctx, txn, fromPos, toPos, stateFilterPart) if err != nil { return nil, nil, err } @@ -861,8 +871,9 @@ func (d *SyncServerDatasource) getStateDeltasForFullStateSync( func (d *SyncServerDatasource) currentStateStreamEventsForRoom( ctx context.Context, txn *sql.Tx, roomID string, + stateFilterPart *gomatrixserverlib.FilterPart, ) ([]streamEvent, error) { - allState, err := d.roomstate.selectCurrentState(ctx, txn, roomID) + allState, err := d.roomstate.selectCurrentState(ctx, txn, roomID, stateFilterPart) if err != nil { return nil, err }