dendrite/cmd/backfill/main.go

221 lines
6 KiB
Go
Raw Normal View History

package main
import (
"bytes"
"context"
"crypto/ed25519"
"encoding/json"
"encoding/pem"
"flag"
"fmt"
"log"
"math/rand"
"os"
"regexp"
"time"
"github.com/matrix-org/gomatrixserverlib"
"github.com/tidwall/gjson"
)
var requestFrom = flag.String("from", "", "the server name that the request should originate from")
var requestKey = flag.String("key", "matrix_key.pem", "the private key to use when signing the request")
var requestTo = flag.String("to", "", "the server name to start backfilling from")
var startEventID = flag.String("eventid", "", "the event ID to start backfilling from")
var roomID = flag.String("room", "", "the room ID to backfill")
func main() {
flag.Parse()
if requestFrom == nil || *requestFrom == "" {
fmt.Println("expecting: furl -from origin.com [-key matrix_key.pem] https://path/to/url")
fmt.Println("supported flags:")
flag.PrintDefaults()
os.Exit(1)
}
if requestTo == nil || *requestTo == "" {
fmt.Println("expecting a non empty -to value")
flag.PrintDefaults()
os.Exit(1)
}
if roomID == nil || *roomID == "" {
fmt.Println("expecting a non empty -room value")
flag.PrintDefaults()
os.Exit(1)
}
if startEventID == nil || *startEventID == "" {
fmt.Println("expecting a non empty -eventid value")
flag.PrintDefaults()
os.Exit(1)
}
data, err := os.ReadFile(*requestKey)
if err != nil {
panic(err)
}
var privateKey ed25519.PrivateKey
keyBlock, _ := pem.Decode(data)
if keyBlock == nil {
panic("keyBlock is nil")
}
if keyBlock.Type == "MATRIX PRIVATE KEY" {
_, privateKey, err = ed25519.GenerateKey(bytes.NewReader(keyBlock.Bytes))
if err != nil {
panic(err)
}
} else {
panic("unexpected key block")
}
serverName := gomatrixserverlib.ServerName(*requestFrom)
client := gomatrixserverlib.NewFederationClient(
[]*gomatrixserverlib.SigningIdentity{
{
ServerName: serverName,
KeyID: gomatrixserverlib.KeyID(keyBlock.Headers["Key-ID"]),
PrivateKey: privateKey,
},
},
gomatrixserverlib.WithKeepAlives(true),
)
b := &backfiller{
FedClient: client,
servers: map[gomatrixserverlib.ServerName]struct{}{
gomatrixserverlib.ServerName(*requestTo): {},
},
}
ctx := context.Background()
eventID := *startEventID
start := time.Now()
defer func() {
log.Printf("Backfilling took: %s", time.Since(start))
}()
f, err := os.Create(tokenise(*roomID) + "_backfill.csv")
if err != nil {
log.Fatal(err)
}
defer f.Close()
seenEvents := make(map[string]struct{})
encoder := json.NewEncoder(f)
for {
log.Printf("[%d] going to request %s\n", len(seenEvents), eventID)
evs, err := gomatrixserverlib.RequestBackfill(ctx, serverName, b, &nopJSONVerifier{}, *roomID, "9", []string{eventID}, 100)
if err != nil && len(evs) == 0 {
log.Printf("failed to backfill, retrying: %s", err)
continue
}
var createSeen bool
for _, x := range evs {
if _, ok := seenEvents[x.EventID()]; ok {
continue
}
sender := gomatrixserverlib.ServerName(gjson.GetBytes(x.JSON(), "origin").Str)
if sender != "" && sender != serverName {
b.servers[sender] = struct{}{}
}
if x.Type() == "m.room.message" {
x.Redact()
}
// The following ensures we preserve the "_event_id" field
err = encoder.Encode(x)
if err != nil {
log.Fatal(err)
}
if x.Type() == gomatrixserverlib.MRoomCreate {
createSeen = true
}
}
// We've reached the beginng of the room
if createSeen {
log.Printf("[%d] Reached beginning of the room, exiting", len(seenEvents))
return
}
// Remember the event ID before trying to find a new one
beforeEvID := eventID
for _, x := range evs {
if _, ok := seenEvents[x.EventID()]; ok {
continue
}
eventID = x.EventID()
break
}
if beforeEvID == eventID {
log.Printf("no new eventID found in backfill response")
return
}
// Finally store which events we've already seen
for _, x := range evs {
seenEvents[x.EventID()] = struct{}{}
}
time.Sleep(time.Second) // don't hit remotes to hard
}
}
type backfiller struct {
FedClient *gomatrixserverlib.FederationClient
servers map[gomatrixserverlib.ServerName]struct{}
}
func (b backfiller) StateIDsBeforeEvent(ctx context.Context, event *gomatrixserverlib.HeaderedEvent) ([]string, error) {
return []string{}, nil
}
func (b backfiller) StateBeforeEvent(ctx context.Context, roomVer gomatrixserverlib.RoomVersion, event *gomatrixserverlib.HeaderedEvent, eventIDs []string) (map[string]*gomatrixserverlib.Event, error) {
return nil, nil
}
func (b backfiller) Backfill(ctx context.Context, origin, server gomatrixserverlib.ServerName, roomID string, limit int, fromEventIDs []string) (gomatrixserverlib.Transaction, error) {
return b.FedClient.Backfill(ctx, origin, server, roomID, limit, fromEventIDs)
}
func (b backfiller) ServersAtEvent(ctx context.Context, roomID, eventID string) []gomatrixserverlib.ServerName {
servers := make([]gomatrixserverlib.ServerName, 0, len(b.servers)+1)
for v := range b.servers {
if v == "matrix.org" { // will be added to the front anyway
continue
}
servers = append(servers, v)
}
rand.Shuffle(len(servers), func(i, j int) {
servers[i], servers[j] = servers[j], servers[i]
})
// always prefer matrix.org
servers = append([]gomatrixserverlib.ServerName{"matrix.org"}, servers...)
if len(servers) > 5 {
servers = servers[:5]
}
return servers
}
func (b backfiller) ProvideEvents(roomVer gomatrixserverlib.RoomVersion, eventIDs []string) ([]*gomatrixserverlib.Event, error) {
return []*gomatrixserverlib.Event{}, nil
}
var safeCharacters = regexp.MustCompile("[^A-Za-z0-9$]+")
func tokenise(str string) string {
return safeCharacters.ReplaceAllString(str, "_")
}
// NopJSONVerifier is a JSONVerifier that verifies nothing and returns no errors.
type nopJSONVerifier struct {
// this verifier verifies nothing
}
func (t *nopJSONVerifier) VerifyJSONs(ctx context.Context, requests []gomatrixserverlib.VerifyJSONRequest) ([]gomatrixserverlib.VerifyJSONResult, error) {
result := make([]gomatrixserverlib.VerifyJSONResult, len(requests))
return result, nil
}