Implement core redaction logic (#1185)

* Implement core redaction logic

- Add a new `redactions_table.go` which tracks the mapping of
  the redaction event ID and the redacted event ID
- Mark redactions as 'validated' when we have both events.
- When redactions are validated, add `unsigned.redacted_because`
  and modify the `eventJSON` accordingly.

Note: We currently do NOT redact the event content - it's gated
behind a feature flag - until we have tested redactions a bit more.

* Linting
This commit is contained in:
Kegsay 2020-07-06 17:49:15 +01:00 committed by GitHub
parent 65084c5052
commit 52c77cb4b4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 414 additions and 4 deletions

View file

@ -44,7 +44,7 @@ CREATE TABLE IF NOT EXISTS roomserver_event_json (
const insertEventJSONSQL = "" + const insertEventJSONSQL = "" +
"INSERT INTO roomserver_event_json (event_nid, event_json) VALUES ($1, $2)" + "INSERT INTO roomserver_event_json (event_nid, event_json) VALUES ($1, $2)" +
" ON CONFLICT DO NOTHING" " ON CONFLICT (event_nid) DO UPDATE SET event_json=$2"
// Bulk event JSON lookup by numeric event ID. // Bulk event JSON lookup by numeric event ID.
// Sort by the numeric event ID. // Sort by the numeric event ID.

View file

@ -0,0 +1,121 @@
// Copyright 2020 The Matrix.org Foundation C.I.C.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package postgres
import (
"context"
"database/sql"
"github.com/matrix-org/dendrite/internal/sqlutil"
"github.com/matrix-org/dendrite/roomserver/storage/shared"
"github.com/matrix-org/dendrite/roomserver/storage/tables"
)
const redactionsSchema = `
-- Stores information about the redacted state of events.
-- We need to track redactions rather than blindly updating the event JSON table on receipt of a redaction
-- because we might receive the redaction BEFORE we receive the event which it redacts (think backfill).
CREATE TABLE IF NOT EXISTS roomserver_redactions (
redaction_event_id TEXT PRIMARY KEY,
redacts_event_id TEXT NOT NULL,
-- Initially FALSE, set to TRUE when the redaction has been validated according to rooms v3+ spec
-- https://matrix.org/docs/spec/rooms/v3#authorization-rules-for-events
validated BOOLEAN NOT NULL
);
CREATE INDEX IF NOT EXISTS roomserver_redactions_redacts_event_id ON roomserver_redactions(redacts_event_id);
`
const insertRedactionSQL = "" +
"INSERT INTO roomserver_redactions (redaction_event_id, redacts_event_id, validated)" +
" VALUES ($1, $2, $3)"
const selectRedactedEventSQL = "" +
"SELECT redaction_event_id, redacts_event_id, validated FROM roomserver_redactions" +
" WHERE redaction_event_id = $1"
const selectRedactionEventSQL = "" +
"SELECT redaction_event_id, redacts_event_id, validated FROM roomserver_redactions" +
" WHERE redacts_event_id = $1"
const markRedactionValidatedSQL = "" +
" UPDATE roomserver_redactions SET validated = $2 WHERE redaction_event_id = $1"
type redactionStatements struct {
insertRedactionStmt *sql.Stmt
selectRedactedEventStmt *sql.Stmt
selectRedactionEventStmt *sql.Stmt
markRedactionValidatedStmt *sql.Stmt
}
func NewPostgresRedactionsTable(db *sql.DB) (tables.Redactions, error) {
s := &redactionStatements{}
_, err := db.Exec(redactionsSchema)
if err != nil {
return nil, err
}
return s, shared.StatementList{
{&s.insertRedactionStmt, insertRedactionSQL},
{&s.selectRedactedEventStmt, selectRedactedEventSQL},
{&s.selectRedactionEventStmt, selectRedactionEventSQL},
{&s.markRedactionValidatedStmt, markRedactionValidatedSQL},
}.Prepare(db)
}
func (s *redactionStatements) InsertRedaction(
ctx context.Context, txn *sql.Tx, info tables.RedactionInfo,
) error {
stmt := sqlutil.TxStmt(txn, s.insertRedactionStmt)
_, err := stmt.ExecContext(ctx, info.RedactionEventID, info.RedactsEventID, info.Validated)
return err
}
func (s *redactionStatements) SelectRedactedEvent(
ctx context.Context, txn *sql.Tx, redactionEventID string,
) (info *tables.RedactionInfo, err error) {
info = &tables.RedactionInfo{}
stmt := sqlutil.TxStmt(txn, s.selectRedactedEventStmt)
err = stmt.QueryRowContext(ctx, redactionEventID).Scan(
&info.RedactionEventID, &info.RedactsEventID, &info.Validated,
)
if err == sql.ErrNoRows {
err = nil
info = nil
}
return
}
func (s *redactionStatements) SelectRedactionEvent(
ctx context.Context, txn *sql.Tx, redactedEventID string,
) (info *tables.RedactionInfo, err error) {
info = &tables.RedactionInfo{}
stmt := sqlutil.TxStmt(txn, s.selectRedactionEventStmt)
err = stmt.QueryRowContext(ctx, redactedEventID).Scan(
&info.RedactionEventID, &info.RedactsEventID, &info.Validated,
)
if err == sql.ErrNoRows {
err = nil
info = nil
}
return
}
func (s *redactionStatements) MarkRedactionValidated(
ctx context.Context, txn *sql.Tx, redactionEventID string, validated bool,
) error {
stmt := sqlutil.TxStmt(txn, s.markRedactionValidatedStmt)
_, err := stmt.ExecContext(ctx, redactionEventID, validated)
return err
}

View file

@ -91,6 +91,10 @@ func Open(dataSourceName string, dbProperties sqlutil.DbProperties) (*Database,
if err != nil { if err != nil {
return nil, err return nil, err
} }
redactions, err := NewPostgresRedactionsTable(db)
if err != nil {
return nil, err
}
d.Database = shared.Database{ d.Database = shared.Database{
DB: db, DB: db,
EventTypesTable: eventTypes, EventTypesTable: eventTypes,
@ -106,6 +110,7 @@ func Open(dataSourceName string, dbProperties sqlutil.DbProperties) (*Database,
InvitesTable: invites, InvitesTable: invites,
MembershipTable: membership, MembershipTable: membership,
PublishedTable: published, PublishedTable: published,
RedactionsTable: redactions,
} }
return &d, nil return &d, nil
} }

View file

@ -4,14 +4,27 @@ import (
"context" "context"
"database/sql" "database/sql"
"encoding/json" "encoding/json"
"fmt"
"github.com/matrix-org/dendrite/internal/sqlutil" "github.com/matrix-org/dendrite/internal/sqlutil"
"github.com/matrix-org/dendrite/roomserver/api" "github.com/matrix-org/dendrite/roomserver/api"
"github.com/matrix-org/dendrite/roomserver/storage/tables" "github.com/matrix-org/dendrite/roomserver/storage/tables"
"github.com/matrix-org/dendrite/roomserver/types" "github.com/matrix-org/dendrite/roomserver/types"
"github.com/matrix-org/gomatrixserverlib" "github.com/matrix-org/gomatrixserverlib"
"github.com/tidwall/gjson"
) )
// Ideally, when we have both events we should redact the event JSON and forget about the redaction, but we currently
// don't because the redaction code is brand new. When we are more certain that redactions don't misbehave or are
// vulnerable to attacks from remote servers (e.g a server bypassing event auth rules shouldn't redact our data)
// then we should flip this to true. This will mean redactions /actually delete information irretrievably/ which
// will be necessary for compliance with the law. Note that downstream components (syncapi) WILL delete information
// in their database on receipt of a redaction. Also note that we still modify the event JSON to set the field
// unsigned.redacted_because - we just don't clear out the content fields yet.
//
// If this hasn't been done by 09/2020 this should be flipped to true.
const redactionsArePermanent = false
type Database struct { type Database struct {
DB *sql.DB DB *sql.DB
EventsTable tables.Events EventsTable tables.Events
@ -27,6 +40,7 @@ type Database struct {
InvitesTable tables.Invites InvitesTable tables.Invites
MembershipTable tables.Membership MembershipTable tables.Membership
PublishedTable tables.Published PublishedTable tables.Published
RedactionsTable tables.Redactions
} }
func (d *Database) EventTypeNIDs( func (d *Database) EventTypeNIDs(
@ -298,6 +312,9 @@ func (d *Database) Events(
return nil, err return nil, err
} }
} }
if !redactionsArePermanent {
d.applyRedactions(results)
}
return results, nil return results, nil
} }
@ -403,7 +420,7 @@ func (d *Database) StoreEvent(
return err return err
} }
return nil return d.handleRedactions(ctx, txn, eventNID, event)
}) })
if err != nil { if err != nil {
return 0, types.StateAtEvent{}, err return 0, types.StateAtEvent{}, err
@ -500,3 +517,125 @@ func extractRoomVersionFromCreateEvent(event gomatrixserverlib.Event) (
} }
return roomVersion, err return roomVersion, err
} }
// handleRedactions manages the redacted status of events. There's two cases to consider in order to comply with the spec:
// "servers should not apply or send redactions to clients until both the redaction event and original event have been seen, and are valid."
// https://matrix.org/docs/spec/rooms/v3#authorization-rules-for-events
// These cases are:
// - This is a redaction event, redact the event it references if we know about it.
// - This is a normal event which may have been previously redacted.
// In the first case, check if we have the referenced event then apply the redaction, else store it
// in the redactions table with validated=FALSE. In the second case, check if there is a redaction for it:
// if there is then apply the redactions and set validated=TRUE.
//
// When an event is redacted, the redacted event JSON is modified to add an `unsigned.redacted_because` field. We use this field
// when loading events to determine whether to apply redactions. This keeps the hot-path of reading events quick as we don't need
// to cross-reference with other tables when loading.
func (d *Database) handleRedactions(ctx context.Context, txn *sql.Tx, eventNID types.EventNID, event gomatrixserverlib.Event) error {
redactionEvent, redactedEvent, validated, err := d.loadRedactionPair(ctx, txn, eventNID, event)
if err != nil {
return err
}
if validated || redactedEvent == nil || redactionEvent == nil {
// we've seen this redaction before or there is nothing to redact
return nil
}
// mark the event as redacted
err = redactedEvent.SetUnsignedField("redacted_because", redactionEvent)
if err != nil {
return err
}
if redactionsArePermanent {
redactedEvent.Event = redactedEvent.Redact()
}
// overwrite the eventJSON table
err = d.EventJSONTable.InsertEventJSON(ctx, txn, redactedEvent.EventNID, redactedEvent.JSON())
if err != nil {
return err
}
return d.RedactionsTable.MarkRedactionValidated(ctx, txn, redactionEvent.EventID(), true)
}
// loadRedactionPair returns both the redaction event and the redacted event, else nil.
// nolint:gocyclo
func (d *Database) loadRedactionPair(
ctx context.Context, txn *sql.Tx, eventNID types.EventNID, event gomatrixserverlib.Event,
) (*types.Event, *types.Event, bool, error) {
var redactionEvent, redactedEvent *types.Event
var info *tables.RedactionInfo
var nids map[string]types.EventNID
var evs []types.Event
var err error
isRedactionEvent := event.Type() == gomatrixserverlib.MRoomRedaction && event.StateKey() == nil
if isRedactionEvent {
redactionEvent = &types.Event{
EventNID: eventNID,
Event: event,
}
// find the redacted event if one exists
info, err = d.RedactionsTable.SelectRedactedEvent(ctx, txn, event.EventID())
if err != nil {
return nil, nil, false, err
}
if info == nil {
// we don't have the redacted event yet
return nil, nil, false, nil
}
nids, err = d.EventNIDs(ctx, []string{info.RedactsEventID})
if err != nil {
return nil, nil, false, err
}
if len(nids) == 0 {
return nil, nil, false, fmt.Errorf("redaction: missing event NID being redacted: %+v", info)
}
evs, err = d.Events(ctx, []types.EventNID{nids[info.RedactsEventID]})
if err != nil {
return nil, nil, false, err
}
if len(evs) != 1 {
return nil, nil, false, fmt.Errorf("redaction: missing event being redacted: %+v", info)
}
redactedEvent = &evs[0]
} else {
redactedEvent = &types.Event{
EventNID: eventNID,
Event: event,
}
// find the redaction event if one exists
info, err = d.RedactionsTable.SelectRedactionEvent(ctx, txn, event.EventID())
if err != nil {
return nil, nil, false, err
}
if info == nil {
// this event is not redacted
return nil, nil, false, nil
}
nids, err = d.EventNIDs(ctx, []string{info.RedactionEventID})
if err != nil {
return nil, nil, false, err
}
if len(nids) == 0 {
return nil, nil, false, fmt.Errorf("redaction: missing redaction event NID: %+v", info)
}
evs, err = d.Events(ctx, []types.EventNID{nids[info.RedactionEventID]})
if err != nil {
return nil, nil, false, err
}
if len(evs) != 1 {
return nil, nil, false, fmt.Errorf("redaction: missing redaction event: %+v", info)
}
redactionEvent = &evs[0]
}
return redactionEvent, redactedEvent, info.Validated, nil
}
// applyRedactions will redact events that have an `unsigned.redacted_because` field.
func (d *Database) applyRedactions(events []types.Event) {
for i := range events {
if result := gjson.GetBytes(events[i].Unsigned(), "redacted_because"); result.Exists() {
events[i].Event = events[i].Redact()
}
}
}

View file

@ -35,8 +35,7 @@ const eventJSONSchema = `
` `
const insertEventJSONSQL = ` const insertEventJSONSQL = `
INSERT INTO roomserver_event_json (event_nid, event_json) VALUES ($1, $2) INSERT OR REPLACE INTO roomserver_event_json (event_nid, event_json) VALUES ($1, $2)
ON CONFLICT DO NOTHING
` `
// Bulk event JSON lookup by numeric event ID. // Bulk event JSON lookup by numeric event ID.

View file

@ -0,0 +1,120 @@
// Copyright 2020 The Matrix.org Foundation C.I.C.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package sqlite3
import (
"context"
"database/sql"
"github.com/matrix-org/dendrite/internal/sqlutil"
"github.com/matrix-org/dendrite/roomserver/storage/shared"
"github.com/matrix-org/dendrite/roomserver/storage/tables"
)
const redactionsSchema = `
-- Stores information about the redacted state of events.
-- We need to track redactions rather than blindly updating the event JSON table on receipt of a redaction
-- because we might receive the redaction BEFORE we receive the event which it redacts (think backfill).
CREATE TABLE IF NOT EXISTS roomserver_redactions (
redaction_event_id TEXT PRIMARY KEY,
redacts_event_id TEXT NOT NULL,
-- Initially FALSE, set to TRUE when the redaction has been validated according to rooms v3+ spec
-- https://matrix.org/docs/spec/rooms/v3#authorization-rules-for-events
validated BOOLEAN NOT NULL
);
`
const insertRedactionSQL = "" +
"INSERT INTO roomserver_redactions (redaction_event_id, redacts_event_id, validated)" +
" VALUES ($1, $2, $3)"
const selectRedactedEventSQL = "" +
"SELECT redaction_event_id, redacts_event_id, validated FROM roomserver_redactions" +
" WHERE redaction_event_id = $1"
const selectRedactionEventSQL = "" +
"SELECT redaction_event_id, redacts_event_id, validated FROM roomserver_redactions" +
" WHERE redacts_event_id = $1"
const markRedactionValidatedSQL = "" +
" UPDATE roomserver_redactions SET validated = $2 WHERE redaction_event_id = $1"
type redactionStatements struct {
insertRedactionStmt *sql.Stmt
selectRedactedEventStmt *sql.Stmt
selectRedactionEventStmt *sql.Stmt
markRedactionValidatedStmt *sql.Stmt
}
func NewSqliteRedactionsTable(db *sql.DB) (tables.Redactions, error) {
s := &redactionStatements{}
_, err := db.Exec(redactionsSchema)
if err != nil {
return nil, err
}
return s, shared.StatementList{
{&s.insertRedactionStmt, insertRedactionSQL},
{&s.selectRedactedEventStmt, selectRedactedEventSQL},
{&s.selectRedactionEventStmt, selectRedactionEventSQL},
{&s.markRedactionValidatedStmt, markRedactionValidatedSQL},
}.Prepare(db)
}
func (s *redactionStatements) InsertRedaction(
ctx context.Context, txn *sql.Tx, info tables.RedactionInfo,
) error {
stmt := sqlutil.TxStmt(txn, s.insertRedactionStmt)
_, err := stmt.ExecContext(ctx, info.RedactionEventID, info.RedactsEventID, info.Validated)
return err
}
func (s *redactionStatements) SelectRedactedEvent(
ctx context.Context, txn *sql.Tx, redactionEventID string,
) (info *tables.RedactionInfo, err error) {
info = &tables.RedactionInfo{}
stmt := sqlutil.TxStmt(txn, s.selectRedactedEventStmt)
err = stmt.QueryRowContext(ctx, redactionEventID).Scan(
&info.RedactionEventID, &info.RedactsEventID, &info.Validated,
)
if err == sql.ErrNoRows {
info = nil
err = nil
}
return
}
func (s *redactionStatements) SelectRedactionEvent(
ctx context.Context, txn *sql.Tx, redactedEventID string,
) (info *tables.RedactionInfo, err error) {
info = &tables.RedactionInfo{}
stmt := sqlutil.TxStmt(txn, s.selectRedactionEventStmt)
err = stmt.QueryRowContext(ctx, redactedEventID).Scan(
&info.RedactionEventID, &info.RedactsEventID, &info.Validated,
)
if err == sql.ErrNoRows {
info = nil
err = nil
}
return
}
func (s *redactionStatements) MarkRedactionValidated(
ctx context.Context, txn *sql.Tx, redactionEventID string, validated bool,
) error {
stmt := sqlutil.TxStmt(txn, s.markRedactionValidatedStmt)
_, err := stmt.ExecContext(ctx, redactionEventID, validated)
return err
}

View file

@ -114,6 +114,10 @@ func Open(dataSourceName string) (*Database, error) {
if err != nil { if err != nil {
return nil, err return nil, err
} }
redactions, err := NewSqliteRedactionsTable(d.db)
if err != nil {
return nil, err
}
d.Database = shared.Database{ d.Database = shared.Database{
DB: d.db, DB: d.db,
EventsTable: d.events, EventsTable: d.events,
@ -129,6 +133,7 @@ func Open(dataSourceName string) (*Database, error) {
InvitesTable: d.invites, InvitesTable: d.invites,
MembershipTable: d.membership, MembershipTable: d.membership,
PublishedTable: published, PublishedTable: published,
RedactionsTable: redactions,
} }
return &d, nil return &d, nil
} }

View file

@ -14,6 +14,7 @@ type EventJSONPair struct {
} }
type EventJSON interface { type EventJSON interface {
// Insert the event JSON. On conflict, replace the event JSON with the new value (for redactions).
InsertEventJSON(ctx context.Context, tx *sql.Tx, eventNID types.EventNID, eventJSON []byte) error InsertEventJSON(ctx context.Context, tx *sql.Tx, eventNID types.EventNID, eventJSON []byte) error
BulkSelectEventJSON(ctx context.Context, eventNIDs []types.EventNID) ([]EventJSONPair, error) BulkSelectEventJSON(ctx context.Context, eventNIDs []types.EventNID) ([]EventJSONPair, error)
} }
@ -126,3 +127,23 @@ type Published interface {
SelectPublishedFromRoomID(ctx context.Context, roomID string) (published bool, err error) SelectPublishedFromRoomID(ctx context.Context, roomID string) (published bool, err error)
SelectAllPublishedRooms(ctx context.Context, published bool) ([]string, error) SelectAllPublishedRooms(ctx context.Context, published bool) ([]string, error)
} }
type RedactionInfo struct {
// whether this redaction is validated (we have both events)
Validated bool
// the ID of the event being redacted
RedactsEventID string
// the ID of the redaction event
RedactionEventID string
}
type Redactions interface {
InsertRedaction(ctx context.Context, txn *sql.Tx, info RedactionInfo) error
// SelectRedactedEvent returns the redaction info for the given redaction event ID, or nil if there is no match.
SelectRedactedEvent(ctx context.Context, txn *sql.Tx, redactionEventID string) (*RedactionInfo, error)
// SelectRedactionEvent returns the redaction info for the given redacted event ID, or nil if there is no match.
SelectRedactionEvent(ctx context.Context, txn *sql.Tx, redactedEventID string) (*RedactionInfo, error)
// Mark this redaction event as having been validated. This means we have both sides of the redaction and have
// successfully redacted the event JSON.
MarkRedactionValidated(ctx context.Context, txn *sql.Tx, redactionEventID string, validated bool) error
}