Stream tweaks, use same codepath for sync vs async input room events, wait for error response via NATS messages (#2283)
This commit is contained in:
parent
485367fcfa
commit
e30aa38fb0
|
@ -59,7 +59,7 @@ func NewInternalAPI(
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
js := jetstream.Prepare(&base.Cfg.Global.JetStream)
|
js, _ := jetstream.Prepare(&base.Cfg.Global.JetStream)
|
||||||
|
|
||||||
// Create a connection to the appservice postgres DB
|
// Create a connection to the appservice postgres DB
|
||||||
appserviceDB, err := storage.NewDatabase(&base.Cfg.AppServiceAPI.Database)
|
appserviceDB, err := storage.NewDatabase(&base.Cfg.AppServiceAPI.Database)
|
||||||
|
|
|
@ -49,7 +49,7 @@ func AddPublicRoutes(
|
||||||
extRoomsProvider api.ExtraPublicRoomsProvider,
|
extRoomsProvider api.ExtraPublicRoomsProvider,
|
||||||
mscCfg *config.MSCs,
|
mscCfg *config.MSCs,
|
||||||
) {
|
) {
|
||||||
js := jetstream.Prepare(&cfg.Matrix.JetStream)
|
js, _ := jetstream.Prepare(&cfg.Matrix.JetStream)
|
||||||
|
|
||||||
syncProducer := &producers.SyncAPIProducer{
|
syncProducer := &producers.SyncAPIProducer{
|
||||||
JetStream: js,
|
JetStream: js,
|
||||||
|
|
|
@ -42,7 +42,7 @@ func NewInternalAPI(
|
||||||
) api.EDUServerInputAPI {
|
) api.EDUServerInputAPI {
|
||||||
cfg := &base.Cfg.EDUServer
|
cfg := &base.Cfg.EDUServer
|
||||||
|
|
||||||
js := jetstream.Prepare(&cfg.Matrix.JetStream)
|
js, _ := jetstream.Prepare(&cfg.Matrix.JetStream)
|
||||||
|
|
||||||
return &input.EDUServerInputAPI{
|
return &input.EDUServerInputAPI{
|
||||||
Cache: eduCache,
|
Cache: eduCache,
|
||||||
|
|
|
@ -92,7 +92,7 @@ func NewInternalAPI(
|
||||||
FailuresUntilBlacklist: cfg.FederationMaxRetries,
|
FailuresUntilBlacklist: cfg.FederationMaxRetries,
|
||||||
}
|
}
|
||||||
|
|
||||||
js := jetstream.Prepare(&cfg.Matrix.JetStream)
|
js, _ := jetstream.Prepare(&cfg.Matrix.JetStream)
|
||||||
|
|
||||||
queues := queue.NewOutgoingQueues(
|
queues := queue.NewOutgoingQueues(
|
||||||
federationDB, base.ProcessContext,
|
federationDB, base.ProcessContext,
|
||||||
|
|
|
@ -39,7 +39,7 @@ func AddInternalRoutes(router *mux.Router, intAPI api.KeyInternalAPI) {
|
||||||
func NewInternalAPI(
|
func NewInternalAPI(
|
||||||
base *base.BaseDendrite, cfg *config.KeyServer, fedClient fedsenderapi.FederationClient,
|
base *base.BaseDendrite, cfg *config.KeyServer, fedClient fedsenderapi.FederationClient,
|
||||||
) api.KeyInternalAPI {
|
) api.KeyInternalAPI {
|
||||||
js := jetstream.Prepare(&cfg.Matrix.JetStream)
|
js, _ := jetstream.Prepare(&cfg.Matrix.JetStream)
|
||||||
|
|
||||||
db, err := storage.NewDatabase(&cfg.Database)
|
db, err := storage.NewDatabase(&cfg.Database)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
|
@ -43,6 +43,7 @@ type RoomserverInternalAPI struct {
|
||||||
ServerACLs *acls.ServerACLs
|
ServerACLs *acls.ServerACLs
|
||||||
fsAPI fsAPI.FederationInternalAPI
|
fsAPI fsAPI.FederationInternalAPI
|
||||||
asAPI asAPI.AppServiceQueryAPI
|
asAPI asAPI.AppServiceQueryAPI
|
||||||
|
NATSClient *nats.Conn
|
||||||
JetStream nats.JetStreamContext
|
JetStream nats.JetStreamContext
|
||||||
Durable string
|
Durable string
|
||||||
InputRoomEventTopic string // JetStream topic for new input room events
|
InputRoomEventTopic string // JetStream topic for new input room events
|
||||||
|
@ -52,7 +53,8 @@ type RoomserverInternalAPI struct {
|
||||||
|
|
||||||
func NewRoomserverAPI(
|
func NewRoomserverAPI(
|
||||||
processCtx *process.ProcessContext, cfg *config.RoomServer, roomserverDB storage.Database,
|
processCtx *process.ProcessContext, cfg *config.RoomServer, roomserverDB storage.Database,
|
||||||
consumer nats.JetStreamContext, inputRoomEventTopic, outputRoomEventTopic string,
|
consumer nats.JetStreamContext, nc *nats.Conn,
|
||||||
|
inputRoomEventTopic, outputRoomEventTopic string,
|
||||||
caches caching.RoomServerCaches, perspectiveServerNames []gomatrixserverlib.ServerName,
|
caches caching.RoomServerCaches, perspectiveServerNames []gomatrixserverlib.ServerName,
|
||||||
) *RoomserverInternalAPI {
|
) *RoomserverInternalAPI {
|
||||||
serverACLs := acls.NewServerACLs(roomserverDB)
|
serverACLs := acls.NewServerACLs(roomserverDB)
|
||||||
|
@ -66,6 +68,7 @@ func NewRoomserverAPI(
|
||||||
InputRoomEventTopic: inputRoomEventTopic,
|
InputRoomEventTopic: inputRoomEventTopic,
|
||||||
OutputRoomEventTopic: outputRoomEventTopic,
|
OutputRoomEventTopic: outputRoomEventTopic,
|
||||||
JetStream: consumer,
|
JetStream: consumer,
|
||||||
|
NATSClient: nc,
|
||||||
Durable: cfg.Matrix.JetStream.Durable("RoomserverInputConsumer"),
|
Durable: cfg.Matrix.JetStream.Durable("RoomserverInputConsumer"),
|
||||||
ServerACLs: serverACLs,
|
ServerACLs: serverACLs,
|
||||||
Queryer: &query.Queryer{
|
Queryer: &query.Queryer{
|
||||||
|
@ -92,6 +95,7 @@ func (r *RoomserverInternalAPI) SetFederationAPI(fsAPI fsAPI.FederationInternalA
|
||||||
InputRoomEventTopic: r.InputRoomEventTopic,
|
InputRoomEventTopic: r.InputRoomEventTopic,
|
||||||
OutputRoomEventTopic: r.OutputRoomEventTopic,
|
OutputRoomEventTopic: r.OutputRoomEventTopic,
|
||||||
JetStream: r.JetStream,
|
JetStream: r.JetStream,
|
||||||
|
NATSClient: r.NATSClient,
|
||||||
Durable: nats.Durable(r.Durable),
|
Durable: nats.Durable(r.Durable),
|
||||||
ServerName: r.Cfg.Matrix.ServerName,
|
ServerName: r.Cfg.Matrix.ServerName,
|
||||||
FSAPI: fsAPI,
|
FSAPI: fsAPI,
|
||||||
|
|
|
@ -48,6 +48,7 @@ var keyContentFields = map[string]string{
|
||||||
type Inputer struct {
|
type Inputer struct {
|
||||||
ProcessContext *process.ProcessContext
|
ProcessContext *process.ProcessContext
|
||||||
DB storage.Database
|
DB storage.Database
|
||||||
|
NATSClient *nats.Conn
|
||||||
JetStream nats.JetStreamContext
|
JetStream nats.JetStreamContext
|
||||||
Durable nats.SubOpt
|
Durable nats.SubOpt
|
||||||
ServerName gomatrixserverlib.ServerName
|
ServerName gomatrixserverlib.ServerName
|
||||||
|
@ -103,6 +104,7 @@ func (r *Inputer) Start() error {
|
||||||
_ = msg.InProgress() // resets the acknowledgement wait timer
|
_ = msg.InProgress() // resets the acknowledgement wait timer
|
||||||
defer eventsInProgress.Delete(index)
|
defer eventsInProgress.Delete(index)
|
||||||
defer roomserverInputBackpressure.With(prometheus.Labels{"room_id": roomID}).Dec()
|
defer roomserverInputBackpressure.With(prometheus.Labels{"room_id": roomID}).Dec()
|
||||||
|
var errString string
|
||||||
if err := r.processRoomEvent(r.ProcessContext.Context(), &inputRoomEvent); err != nil {
|
if err := r.processRoomEvent(r.ProcessContext.Context(), &inputRoomEvent); err != nil {
|
||||||
if !errors.Is(err, context.DeadlineExceeded) && !errors.Is(err, context.Canceled) {
|
if !errors.Is(err, context.DeadlineExceeded) && !errors.Is(err, context.Canceled) {
|
||||||
sentry.CaptureException(err)
|
sentry.CaptureException(err)
|
||||||
|
@ -113,9 +115,19 @@ func (r *Inputer) Start() error {
|
||||||
"type": inputRoomEvent.Event.Type(),
|
"type": inputRoomEvent.Event.Type(),
|
||||||
}).Warn("Roomserver failed to process async event")
|
}).Warn("Roomserver failed to process async event")
|
||||||
_ = msg.Term()
|
_ = msg.Term()
|
||||||
|
errString = err.Error()
|
||||||
} else {
|
} else {
|
||||||
_ = msg.Ack()
|
_ = msg.Ack()
|
||||||
}
|
}
|
||||||
|
if replyTo := msg.Header.Get("sync"); replyTo != "" {
|
||||||
|
if err := r.NATSClient.Publish(replyTo, []byte(errString)); err != nil {
|
||||||
|
logrus.WithError(err).WithFields(logrus.Fields{
|
||||||
|
"room_id": roomID,
|
||||||
|
"event_id": inputRoomEvent.Event.EventID(),
|
||||||
|
"type": inputRoomEvent.Event.Type(),
|
||||||
|
}).Warn("Roomserver failed to respond for sync event")
|
||||||
|
}
|
||||||
|
}
|
||||||
})
|
})
|
||||||
},
|
},
|
||||||
// NATS wants to acknowledge automatically by default when the message is
|
// NATS wants to acknowledge automatically by default when the message is
|
||||||
|
@ -131,6 +143,9 @@ func (r *Inputer) Start() error {
|
||||||
// Ensure that NATS doesn't try to resend us something that wasn't done
|
// Ensure that NATS doesn't try to resend us something that wasn't done
|
||||||
// within the period of time that we might still be processing it.
|
// within the period of time that we might still be processing it.
|
||||||
nats.AckWait(MaximumMissingProcessingTime+(time.Second*10)),
|
nats.AckWait(MaximumMissingProcessingTime+(time.Second*10)),
|
||||||
|
// It is recommended to disable this for pull consumers as per the docs:
|
||||||
|
// https://docs.nats.io/nats-concepts/jetstream/consumers#note-about-push-and-pull-consumers
|
||||||
|
nats.MaxAckPending(-1),
|
||||||
)
|
)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -141,15 +156,30 @@ func (r *Inputer) InputRoomEvents(
|
||||||
request *api.InputRoomEventsRequest,
|
request *api.InputRoomEventsRequest,
|
||||||
response *api.InputRoomEventsResponse,
|
response *api.InputRoomEventsResponse,
|
||||||
) {
|
) {
|
||||||
if request.Asynchronous {
|
var replyTo string
|
||||||
|
var replySub *nats.Subscription
|
||||||
|
if !request.Asynchronous {
|
||||||
|
var err error
|
||||||
|
replyTo = nats.NewInbox()
|
||||||
|
replySub, err = r.NATSClient.SubscribeSync(replyTo)
|
||||||
|
if err != nil {
|
||||||
|
response.ErrMsg = err.Error()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
var err error
|
var err error
|
||||||
for _, e := range request.InputRoomEvents {
|
for _, e := range request.InputRoomEvents {
|
||||||
msg := &nats.Msg{
|
msg := &nats.Msg{
|
||||||
Subject: r.InputRoomEventTopic,
|
Subject: r.InputRoomEventTopic,
|
||||||
Header: nats.Header{},
|
Header: nats.Header{},
|
||||||
|
Reply: replyTo,
|
||||||
}
|
}
|
||||||
roomID := e.Event.RoomID()
|
roomID := e.Event.RoomID()
|
||||||
msg.Header.Set("room_id", roomID)
|
msg.Header.Set("room_id", roomID)
|
||||||
|
if replyTo != "" {
|
||||||
|
msg.Header.Set("sync", replyTo)
|
||||||
|
}
|
||||||
msg.Data, err = json.Marshal(e)
|
msg.Data, err = json.Marshal(e)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
response.ErrMsg = err.Error()
|
response.ErrMsg = err.Error()
|
||||||
|
@ -163,52 +193,21 @@ func (r *Inputer) InputRoomEvents(
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
responses := make(chan error, len(request.InputRoomEvents))
|
if request.Asynchronous || replySub == nil {
|
||||||
for _, e := range request.InputRoomEvents {
|
|
||||||
inputRoomEvent := e
|
|
||||||
roomID := inputRoomEvent.Event.RoomID()
|
|
||||||
index := roomID + "\000" + inputRoomEvent.Event.EventID()
|
|
||||||
if _, ok := eventsInProgress.LoadOrStore(index, struct{}{}); ok {
|
|
||||||
// We're already waiting to deal with this event, so there's no
|
|
||||||
// point in queuing it up again. We've notified NATS that we're
|
|
||||||
// working on the message still, so that will have deferred the
|
|
||||||
// redelivery by a bit.
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
roomserverInputBackpressure.With(prometheus.Labels{"room_id": roomID}).Inc()
|
|
||||||
worker := r.workerForRoom(roomID)
|
defer replySub.Drain() // nolint:errcheck
|
||||||
worker.Act(nil, func() {
|
|
||||||
defer eventsInProgress.Delete(index)
|
|
||||||
defer roomserverInputBackpressure.With(prometheus.Labels{"room_id": roomID}).Dec()
|
|
||||||
err := r.processRoomEvent(ctx, &inputRoomEvent)
|
|
||||||
if err != nil {
|
|
||||||
if !errors.Is(err, context.DeadlineExceeded) && !errors.Is(err, context.Canceled) {
|
|
||||||
sentry.CaptureException(err)
|
|
||||||
}
|
|
||||||
logrus.WithError(err).WithFields(logrus.Fields{
|
|
||||||
"room_id": roomID,
|
|
||||||
"event_id": inputRoomEvent.Event.EventID(),
|
|
||||||
}).Warn("Roomserver failed to process sync event")
|
|
||||||
}
|
|
||||||
select {
|
|
||||||
case <-ctx.Done():
|
|
||||||
default:
|
|
||||||
responses <- err
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
for i := 0; i < len(request.InputRoomEvents); i++ {
|
for i := 0; i < len(request.InputRoomEvents); i++ {
|
||||||
select {
|
msg, err := replySub.NextMsgWithContext(ctx)
|
||||||
case <-ctx.Done():
|
|
||||||
response.ErrMsg = context.DeadlineExceeded.Error()
|
|
||||||
return
|
|
||||||
case err := <-responses:
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
response.ErrMsg = err.Error()
|
response.ErrMsg = err.Error()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
if len(msg.Data) > 0 {
|
||||||
|
response.ErrMsg = string(msg.Data)
|
||||||
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -50,10 +50,10 @@ func NewInternalAPI(
|
||||||
logrus.WithError(err).Panicf("failed to connect to room server db")
|
logrus.WithError(err).Panicf("failed to connect to room server db")
|
||||||
}
|
}
|
||||||
|
|
||||||
js := jetstream.Prepare(&cfg.Matrix.JetStream)
|
js, nc := jetstream.Prepare(&cfg.Matrix.JetStream)
|
||||||
|
|
||||||
return internal.NewRoomserverAPI(
|
return internal.NewRoomserverAPI(
|
||||||
base.ProcessContext, cfg, roomserverDB, js,
|
base.ProcessContext, cfg, roomserverDB, js, nc,
|
||||||
cfg.Matrix.JetStream.TopicFor(jetstream.InputRoomEvent),
|
cfg.Matrix.JetStream.TopicFor(jetstream.InputRoomEvent),
|
||||||
cfg.Matrix.JetStream.TopicFor(jetstream.OutputRoomEvent),
|
cfg.Matrix.JetStream.TopicFor(jetstream.OutputRoomEvent),
|
||||||
base.Caches, perspectiveServerNames,
|
base.Caches, perspectiveServerNames,
|
||||||
|
|
|
@ -71,8 +71,8 @@ func JetStreamConsumer(
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if f(ctx, msg) {
|
if f(ctx, msg) {
|
||||||
if err = msg.Ack(); err != nil {
|
if err = msg.AckSync(); err != nil {
|
||||||
logrus.WithContext(ctx).WithField("subject", subj).Warn(fmt.Errorf("msg.Ack: %w", err))
|
logrus.WithContext(ctx).WithField("subject", subj).Warn(fmt.Errorf("msg.AckSync: %w", err))
|
||||||
sentry.CaptureException(err)
|
sentry.CaptureException(err)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -15,7 +15,7 @@ import (
|
||||||
var natsServer *natsserver.Server
|
var natsServer *natsserver.Server
|
||||||
var natsServerMutex sync.Mutex
|
var natsServerMutex sync.Mutex
|
||||||
|
|
||||||
func Prepare(cfg *config.JetStream) natsclient.JetStreamContext {
|
func Prepare(cfg *config.JetStream) (natsclient.JetStreamContext, *natsclient.Conn) {
|
||||||
// check if we need an in-process NATS Server
|
// check if we need an in-process NATS Server
|
||||||
if len(cfg.Addresses) != 0 {
|
if len(cfg.Addresses) != 0 {
|
||||||
return setupNATS(cfg, nil)
|
return setupNATS(cfg, nil)
|
||||||
|
@ -48,20 +48,20 @@ func Prepare(cfg *config.JetStream) natsclient.JetStreamContext {
|
||||||
return setupNATS(cfg, nc)
|
return setupNATS(cfg, nc)
|
||||||
}
|
}
|
||||||
|
|
||||||
func setupNATS(cfg *config.JetStream, nc *natsclient.Conn) natsclient.JetStreamContext {
|
func setupNATS(cfg *config.JetStream, nc *natsclient.Conn) (natsclient.JetStreamContext, *natsclient.Conn) {
|
||||||
if nc == nil {
|
if nc == nil {
|
||||||
var err error
|
var err error
|
||||||
nc, err = natsclient.Connect(strings.Join(cfg.Addresses, ","))
|
nc, err = natsclient.Connect(strings.Join(cfg.Addresses, ","))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logrus.WithError(err).Panic("Unable to connect to NATS")
|
logrus.WithError(err).Panic("Unable to connect to NATS")
|
||||||
return nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
s, err := nc.JetStream()
|
s, err := nc.JetStream()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logrus.WithError(err).Panic("Unable to get JetStream context")
|
logrus.WithError(err).Panic("Unable to get JetStream context")
|
||||||
return nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, stream := range streams { // streams are defined in streams.go
|
for _, stream := range streams { // streams are defined in streams.go
|
||||||
|
@ -89,5 +89,5 @@ func setupNATS(cfg *config.JetStream, nc *natsclient.Conn) natsclient.JetStreamC
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return s
|
return s, nc
|
||||||
}
|
}
|
||||||
|
|
|
@ -42,7 +42,7 @@ var streams = []*nats.StreamConfig{
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: OutputKeyChangeEvent,
|
Name: OutputKeyChangeEvent,
|
||||||
Retention: nats.LimitsPolicy,
|
Retention: nats.InterestPolicy,
|
||||||
Storage: nats.FileStorage,
|
Storage: nats.FileStorage,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
@ -49,7 +49,7 @@ func AddPublicRoutes(
|
||||||
federation *gomatrixserverlib.FederationClient,
|
federation *gomatrixserverlib.FederationClient,
|
||||||
cfg *config.SyncAPI,
|
cfg *config.SyncAPI,
|
||||||
) {
|
) {
|
||||||
js := jetstream.Prepare(&cfg.Matrix.JetStream)
|
js, _ := jetstream.Prepare(&cfg.Matrix.JetStream)
|
||||||
|
|
||||||
syncDB, err := storage.NewSyncServerDatasource(&cfg.Database)
|
syncDB, err := storage.NewSyncServerDatasource(&cfg.Database)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
|
@ -46,7 +46,7 @@ func NewInternalAPI(
|
||||||
appServices []config.ApplicationService, keyAPI keyapi.KeyInternalAPI,
|
appServices []config.ApplicationService, keyAPI keyapi.KeyInternalAPI,
|
||||||
rsAPI rsapi.RoomserverInternalAPI, pgClient pushgateway.Client,
|
rsAPI rsapi.RoomserverInternalAPI, pgClient pushgateway.Client,
|
||||||
) api.UserInternalAPI {
|
) api.UserInternalAPI {
|
||||||
js := jetstream.Prepare(&cfg.Matrix.JetStream)
|
js, _ := jetstream.Prepare(&cfg.Matrix.JetStream)
|
||||||
|
|
||||||
syncProducer := producers.NewSyncAPI(
|
syncProducer := producers.NewSyncAPI(
|
||||||
db, js,
|
db, js,
|
||||||
|
|
Loading…
Reference in a new issue