From d60d7f9837daf3ec2a1390a21aac5cc5475846b6 Mon Sep 17 00:00:00 2001 From: Till Faelligen <2353100+S7evinK@users.noreply.github.com> Date: Wed, 1 Mar 2023 16:07:08 +0100 Subject: [PATCH] Add basic tool to download a rooms state to file --- cmd/backfill/main.go | 220 +++++++++++++++++++++++++++++++++++++++++++ go.mod | 2 +- go.sum | 2 + 3 files changed, 223 insertions(+), 1 deletion(-) create mode 100644 cmd/backfill/main.go diff --git a/cmd/backfill/main.go b/cmd/backfill/main.go new file mode 100644 index 000000000..eb19f883f --- /dev/null +++ b/cmd/backfill/main.go @@ -0,0 +1,220 @@ +package main + +import ( + "bytes" + "context" + "crypto/ed25519" + "encoding/json" + "encoding/pem" + "flag" + "fmt" + "log" + "math/rand" + "os" + "regexp" + "time" + + "github.com/matrix-org/gomatrixserverlib" + "github.com/tidwall/gjson" +) + +var requestFrom = flag.String("from", "", "the server name that the request should originate from") +var requestKey = flag.String("key", "matrix_key.pem", "the private key to use when signing the request") +var requestTo = flag.String("to", "", "the server name to start backfilling from") +var startEventID = flag.String("eventid", "", "the event ID to start backfilling from") +var roomID = flag.String("room", "", "the room ID to backfill") + +func main() { + flag.Parse() + + if requestFrom == nil || *requestFrom == "" { + fmt.Println("expecting: furl -from origin.com [-key matrix_key.pem] https://path/to/url") + fmt.Println("supported flags:") + flag.PrintDefaults() + os.Exit(1) + } + + if requestTo == nil || *requestTo == "" { + fmt.Println("expecting a non empty -to value") + flag.PrintDefaults() + os.Exit(1) + } + if roomID == nil || *roomID == "" { + fmt.Println("expecting a non empty -room value") + flag.PrintDefaults() + os.Exit(1) + } + if startEventID == nil || *startEventID == "" { + fmt.Println("expecting a non empty -eventid value") + flag.PrintDefaults() + os.Exit(1) + } + + data, err := os.ReadFile(*requestKey) + if err != nil { + panic(err) + } + + var privateKey ed25519.PrivateKey + keyBlock, _ := pem.Decode(data) + if keyBlock == nil { + panic("keyBlock is nil") + } + if keyBlock.Type == "MATRIX PRIVATE KEY" { + _, privateKey, err = ed25519.GenerateKey(bytes.NewReader(keyBlock.Bytes)) + if err != nil { + panic(err) + } + } else { + panic("unexpected key block") + } + + serverName := gomatrixserverlib.ServerName(*requestFrom) + client := gomatrixserverlib.NewFederationClient( + []*gomatrixserverlib.SigningIdentity{ + { + ServerName: serverName, + KeyID: gomatrixserverlib.KeyID(keyBlock.Headers["Key-ID"]), + PrivateKey: privateKey, + }, + }, + gomatrixserverlib.WithKeepAlives(true), + ) + + b := &backfiller{ + FedClient: client, + servers: map[gomatrixserverlib.ServerName]struct{}{ + gomatrixserverlib.ServerName(*requestTo): {}, + }, + } + + ctx := context.Background() + eventID := *startEventID + start := time.Now() + defer func() { + log.Printf("Backfilling took: %s", time.Since(start)) + }() + f, err := os.Create(tokenise(*roomID) + "_backfill.csv") + if err != nil { + log.Fatal(err) + } + defer f.Close() + seenEvents := make(map[string]struct{}) + + encoder := json.NewEncoder(f) + + for { + log.Printf("[%d] going to request %s\n", len(seenEvents), eventID) + evs, err := gomatrixserverlib.RequestBackfill(ctx, serverName, b, &nopJSONVerifier{}, *roomID, "9", []string{eventID}, 100) + if err != nil && len(evs) == 0 { + log.Printf("failed to backfill, retrying: %s", err) + continue + } + var createSeen bool + for _, x := range evs { + if _, ok := seenEvents[x.EventID()]; ok { + continue + } + + sender := gomatrixserverlib.ServerName(gjson.GetBytes(x.JSON(), "origin").Str) + if sender != "" && sender != serverName { + b.servers[sender] = struct{}{} + } + + if x.Type() == "m.room.message" { + x.Redact() + } + + // The following ensures we preserve the "_event_id" field + err = encoder.Encode(x) + if err != nil { + log.Fatal(err) + } + if x.Type() == gomatrixserverlib.MRoomCreate { + createSeen = true + } + } + // We've reached the beginng of the room + if createSeen { + log.Printf("[%d] Reached beginning of the room, exiting", len(seenEvents)) + return + } + + // Remember the event ID before trying to find a new one + beforeEvID := eventID + for _, x := range evs { + if _, ok := seenEvents[x.EventID()]; ok { + continue + } + eventID = x.EventID() + break + } + if beforeEvID == eventID { + log.Printf("no new eventID found in backfill response") + return + } + // Finally store which events we've already seen + for _, x := range evs { + seenEvents[x.EventID()] = struct{}{} + } + time.Sleep(time.Second) // don't hit remotes to hard + } +} + +type backfiller struct { + FedClient *gomatrixserverlib.FederationClient + servers map[gomatrixserverlib.ServerName]struct{} +} + +func (b backfiller) StateIDsBeforeEvent(ctx context.Context, event *gomatrixserverlib.HeaderedEvent) ([]string, error) { + return []string{}, nil +} + +func (b backfiller) StateBeforeEvent(ctx context.Context, roomVer gomatrixserverlib.RoomVersion, event *gomatrixserverlib.HeaderedEvent, eventIDs []string) (map[string]*gomatrixserverlib.Event, error) { + return nil, nil +} + +func (b backfiller) Backfill(ctx context.Context, origin, server gomatrixserverlib.ServerName, roomID string, limit int, fromEventIDs []string) (gomatrixserverlib.Transaction, error) { + return b.FedClient.Backfill(ctx, origin, server, roomID, limit, fromEventIDs) +} + +func (b backfiller) ServersAtEvent(ctx context.Context, roomID, eventID string) []gomatrixserverlib.ServerName { + servers := make([]gomatrixserverlib.ServerName, 0, len(b.servers)+1) + for v := range b.servers { + if v == "matrix.org" { // will be added to the front anyway + continue + } + servers = append(servers, v) + } + rand.Shuffle(len(servers), func(i, j int) { + servers[i], servers[j] = servers[j], servers[i] + }) + + // always prefer matrix.org + servers = append([]gomatrixserverlib.ServerName{"matrix.org"}, servers...) + + if len(servers) > 5 { + servers = servers[:5] + } + return servers +} + +func (b backfiller) ProvideEvents(roomVer gomatrixserverlib.RoomVersion, eventIDs []string) ([]*gomatrixserverlib.Event, error) { + return []*gomatrixserverlib.Event{}, nil +} + +var safeCharacters = regexp.MustCompile("[^A-Za-z0-9$]+") + +func tokenise(str string) string { + return safeCharacters.ReplaceAllString(str, "_") +} + +// NopJSONVerifier is a JSONVerifier that verifies nothing and returns no errors. +type nopJSONVerifier struct { + // this verifier verifies nothing +} + +func (t *nopJSONVerifier) VerifyJSONs(ctx context.Context, requests []gomatrixserverlib.VerifyJSONRequest) ([]gomatrixserverlib.VerifyJSONResult, error) { + result := make([]gomatrixserverlib.VerifyJSONResult, len(requests)) + return result, nil +} diff --git a/go.mod b/go.mod index 6857290f3..44de606fb 100644 --- a/go.mod +++ b/go.mod @@ -22,7 +22,7 @@ require ( github.com/matrix-org/dugong v0.0.0-20210921133753-66e6b1c67e2e github.com/matrix-org/go-sqlite3-js v0.0.0-20220419092513-28aa791a1c91 github.com/matrix-org/gomatrix v0.0.0-20220926102614-ceba4d9f7530 - github.com/matrix-org/gomatrixserverlib v0.0.0-20230131183213-122f1e0e3fa1 + github.com/matrix-org/gomatrixserverlib v0.0.0-20230301150509-5f7577968b8f github.com/matrix-org/pinecone v0.11.1-0.20230210171230-8c3b24f2649a github.com/matrix-org/util v0.0.0-20221111132719-399730281e66 github.com/mattn/go-sqlite3 v1.14.15 diff --git a/go.sum b/go.sum index 2272e5404..040403a36 100644 --- a/go.sum +++ b/go.sum @@ -323,6 +323,8 @@ github.com/matrix-org/gomatrix v0.0.0-20220926102614-ceba4d9f7530 h1:kHKxCOLcHH8 github.com/matrix-org/gomatrix v0.0.0-20220926102614-ceba4d9f7530/go.mod h1:/gBX06Kw0exX1HrwmoBibFA98yBk/jxKpGVeyQbff+s= github.com/matrix-org/gomatrixserverlib v0.0.0-20230131183213-122f1e0e3fa1 h1:JSw0nmjMrgBmoM2aQsa78LTpI5BnuD9+vOiEQ4Qo0qw= github.com/matrix-org/gomatrixserverlib v0.0.0-20230131183213-122f1e0e3fa1/go.mod h1:Mtifyr8q8htcBeugvlDnkBcNUy5LO8OzUoplAf1+mb4= +github.com/matrix-org/gomatrixserverlib v0.0.0-20230301150509-5f7577968b8f h1:gJQy+K/OYxAycj5rs0/7U2R4/Cx+XfkdERRLjIOQmZ4= +github.com/matrix-org/gomatrixserverlib v0.0.0-20230301150509-5f7577968b8f/go.mod h1:Mtifyr8q8htcBeugvlDnkBcNUy5LO8OzUoplAf1+mb4= github.com/matrix-org/pinecone v0.11.1-0.20230210171230-8c3b24f2649a h1:awrPDf9LEFySxTLKYBMCiObelNx/cBuv/wzllvCCH3A= github.com/matrix-org/pinecone v0.11.1-0.20230210171230-8c3b24f2649a/go.mod h1:HchJX9oKMXaT2xYFs0Ha/6Zs06mxLU8k6F1ODnrGkeQ= github.com/matrix-org/util v0.0.0-20221111132719-399730281e66 h1:6z4KxomXSIGWqhHcfzExgkH3Z3UkIXry4ibJS4Aqz2Y=