Some updates, sorta works now

This commit is contained in:
Neil Alexander 2020-02-10 14:05:23 +00:00
parent be819a022e
commit b60cc7283d
3 changed files with 57 additions and 25 deletions

View file

@ -79,7 +79,6 @@ func (oq *destinationQueue) backgroundSend() {
return return
} }
// TODO: handle retries.
// TODO: blacklist uncooperative servers. // TODO: blacklist uncooperative servers.
_, err := oq.client.SendTransaction(context.TODO(), *t) _, err := oq.client.SendTransaction(context.TODO(), *t)
@ -88,6 +87,15 @@ func (oq *destinationQueue) backgroundSend() {
"destination": oq.destination, "destination": oq.destination,
log.ErrorKey: err, log.ErrorKey: err,
}).Info("problem sending transaction") }).Info("problem sending transaction")
for _, pdu := range (*t).PDUs {
if err := oq.parent.QueueEvent((*t).Destination, pdu); err != nil {
log.WithFields(log.Fields{
"destination": (*t).Destination,
log.ErrorKey: err,
}).Warn("Error queuing PDU")
}
}
} }
} }
} }

View file

@ -16,7 +16,6 @@ package queue
import ( import (
"context" "context"
"errors"
"fmt" "fmt"
"sync" "sync"
"time" "time"
@ -27,6 +26,8 @@ import (
log "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus"
) )
const retryInterval = time.Second * 5
// OutgoingQueues is a collection of queues for sending transactions to other // OutgoingQueues is a collection of queues for sending transactions to other
// matrix servers // matrix servers
type OutgoingQueues struct { type OutgoingQueues struct {
@ -57,19 +58,14 @@ func NewOutgoingQueues(
func (oqs *OutgoingQueues) QueueEvent( func (oqs *OutgoingQueues) QueueEvent(
destination gomatrixserverlib.ServerName, destination gomatrixserverlib.ServerName,
event gomatrixserverlib.Event, event gomatrixserverlib.Event,
retryAt time.Time,
) error { ) error {
if time.Until(retryAt) < time.Second*5 {
return errors.New("can't queue for less than 5 seconds")
}
return oqs.db.QueueEventForRetry( return oqs.db.QueueEventForRetry(
context.Background(), // context context.Background(), // context
string(oqs.origin), // origin servername string(oqs.origin), // origin servername
string(destination), // destination servername string(destination), // destination servername
event, // event event, // event
0, // attempts 0, // attempts
retryAt, // retry at time time.Now().Add(retryInterval*2), // retry at time
) )
} }
@ -149,6 +145,7 @@ func (oqs *OutgoingQueues) SendEDU(
oq := oqs.queues[destination] oq := oqs.queues[destination]
if oq == nil { if oq == nil {
oq = &destinationQueue{ oq = &destinationQueue{
parent: oqs,
origin: oqs.origin, origin: oqs.origin,
destination: destination, destination: destination,
client: oqs.client, client: oqs.client,
@ -167,22 +164,41 @@ func (oqs *OutgoingQueues) SendEDU(
func (oqs *OutgoingQueues) processRetries() { func (oqs *OutgoingQueues) processRetries() {
ctx := context.Background() ctx := context.Background()
for { for {
time.Sleep(time.Second * 5) time.Sleep(retryInterval)
fmt.Println("trying to process retries") if err := oqs.db.DeleteRetryExpiredEvents(ctx); err != nil {
log.WithFields(log.Fields{
log.ErrorKey: err,
}).Warn("Error cleaning expired retry events")
}
retries, err := oqs.db.SelectRetryEventsPending(ctx) retries, err := oqs.db.SelectRetryEventsPending(ctx)
if err != nil { if err != nil {
fmt.Println("failed:", err) log.WithFields(log.Fields{
log.ErrorKey: err,
}).Warn("Error selecting pending retry events")
continue continue
} }
fmt.Println("there are", len(retries), "PDUs to retry sending") if len(retries) == 0 {
continue
for _, retry := range retries {
fmt.Println("retrying:", retry)
} }
oqs.db.DeleteRetryExpiredEvents(ctx) log.WithFields(log.Fields{
"pending": len(retries),
}).Info("Retrying failed PDU sends")
for _, retry := range retries {
if err := oqs.SendEvent(
retry.PDU,
retry.Origin,
[]gomatrixserverlib.ServerName{retry.Destination},
); err != nil {
log.WithFields(log.Fields{
"destination": retry.Destination,
log.ErrorKey: err,
}).Warn("Error resending retry event")
}
}
} }
} }

View file

@ -18,6 +18,7 @@ package postgres
import ( import (
"context" "context"
"database/sql" "database/sql"
"encoding/json"
"time" "time"
"github.com/matrix-org/dendrite/common" "github.com/matrix-org/dendrite/common"
@ -40,7 +41,9 @@ INSERT INTO federationsender_retry
(origin_server_name, destination_server_name, event_json, attempts, retry_at) (origin_server_name, destination_server_name, event_json, attempts, retry_at)
VALUES ($1, $2, $3, $4, $5) VALUES ($1, $2, $3, $4, $5)
ON CONFLICT ON CONSTRAINT federationsender_retry_unique ON CONFLICT ON CONSTRAINT federationsender_retry_unique
DO UPDATE SET attempts = $4, retry_at = $5 DO UPDATE SET
attempts = federationsender_retry.attempts+1,
retry_at = $5
` `
const deleteEventSQL = ` const deleteEventSQL = `
@ -49,10 +52,11 @@ const deleteEventSQL = `
const selectEventsForRetry = ` const selectEventsForRetry = `
SELECT * FROM federationsender_retry WHERE retry_at >= $1 AND attempts < 5 SELECT * FROM federationsender_retry WHERE retry_at >= $1 AND attempts < 5
ORDER BY retry_at
` `
const deleteExpiredEvents = ` const deleteExpiredEvents = `
DELETE FROM federationsender_retry WHERE attempts > 5 DELETE FROM federationsender_retry WHERE attempts >= 5 OR retry_at < $1
` `
type retryStatements struct { type retryStatements struct {
@ -116,12 +120,16 @@ func (s *retryStatements) selectRetryEventsPending(
defer rows.Close() defer rows.Close()
for rows.Next() { for rows.Next() {
var entry types.PendingPDU var entry types.PendingPDU
var rawEvent []byte
if err = rows.Scan( if err = rows.Scan(
&entry.RetryNID, &entry.Origin, &entry.Destination, &entry.RetryNID, &entry.Origin, &entry.Destination,
&entry.PDU, &entry.Attempts, &entry.Attempts, &rawEvent, &entry.Attempts, &entry.Attempts,
); err != nil { ); err != nil {
return nil, err return nil, err
} }
if err := json.Unmarshal(rawEvent, &entry.PDU); err != nil {
return nil, err
}
pending = append(pending, &entry) pending = append(pending, &entry)
} }
return pending, err return pending, err
@ -131,6 +139,6 @@ func (s *retryStatements) deleteRetryExpiredEvents(
ctx context.Context, txn *sql.Tx, ctx context.Context, txn *sql.Tx,
) error { ) error {
stmt := common.TxStmt(txn, s.deleteExpiredEventsStmt) stmt := common.TxStmt(txn, s.deleteExpiredEventsStmt)
_, err := stmt.ExecContext(ctx) _, err := stmt.ExecContext(ctx, time.Now().UTC().Unix())
return err return err
} }