2022-01-05 11:44:49 -06:00
|
|
|
package jetstream
|
|
|
|
|
|
|
|
import (
|
2022-08-02 06:58:08 -05:00
|
|
|
"crypto/tls"
|
2022-03-25 07:24:21 -05:00
|
|
|
"fmt"
|
2022-03-23 05:20:18 -05:00
|
|
|
"reflect"
|
2022-01-05 11:44:49 -06:00
|
|
|
"strings"
|
|
|
|
"sync"
|
|
|
|
"time"
|
|
|
|
|
2022-03-25 07:24:21 -05:00
|
|
|
"github.com/getsentry/sentry-go"
|
2022-09-27 08:01:34 -05:00
|
|
|
"github.com/sirupsen/logrus"
|
|
|
|
|
2022-01-05 11:44:49 -06:00
|
|
|
"github.com/matrix-org/dendrite/setup/config"
|
2022-03-21 05:32:34 -05:00
|
|
|
"github.com/matrix-org/dendrite/setup/process"
|
2022-01-05 11:44:49 -06:00
|
|
|
|
|
|
|
natsserver "github.com/nats-io/nats-server/v2/server"
|
|
|
|
natsclient "github.com/nats-io/nats.go"
|
|
|
|
)
|
|
|
|
|
2022-05-09 08:15:24 -05:00
|
|
|
type NATSInstance struct {
|
|
|
|
*natsserver.Server
|
2023-03-17 06:09:45 -05:00
|
|
|
nc *natsclient.Conn
|
|
|
|
js natsclient.JetStreamContext
|
2022-04-08 04:12:30 -05:00
|
|
|
}
|
|
|
|
|
2022-08-05 03:19:33 -05:00
|
|
|
var natsLock sync.Mutex
|
|
|
|
|
|
|
|
func DeleteAllStreams(js natsclient.JetStreamContext, cfg *config.JetStream) {
|
2022-05-09 11:23:02 -05:00
|
|
|
for _, stream := range streams { // streams are defined in streams.go
|
|
|
|
name := cfg.Prefixed(stream.Name)
|
|
|
|
_ = js.DeleteStream(name)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-05-09 08:15:24 -05:00
|
|
|
func (s *NATSInstance) Prepare(process *process.ProcessContext, cfg *config.JetStream) (natsclient.JetStreamContext, *natsclient.Conn) {
|
2022-08-05 03:19:33 -05:00
|
|
|
natsLock.Lock()
|
|
|
|
defer natsLock.Unlock()
|
2022-01-05 11:44:49 -06:00
|
|
|
// check if we need an in-process NATS Server
|
|
|
|
if len(cfg.Addresses) != 0 {
|
2022-03-25 07:24:21 -05:00
|
|
|
return setupNATS(process, cfg, nil)
|
2022-01-05 11:44:49 -06:00
|
|
|
}
|
2022-05-09 08:15:24 -05:00
|
|
|
if s.Server == nil {
|
2022-01-05 11:44:49 -06:00
|
|
|
var err error
|
2023-02-24 01:56:53 -06:00
|
|
|
opts := &natsserver.Options{
|
2022-02-17 07:15:35 -06:00
|
|
|
ServerName: "monolith",
|
|
|
|
DontListen: true,
|
|
|
|
JetStream: true,
|
|
|
|
StoreDir: string(cfg.StoragePath),
|
|
|
|
NoSystemAccount: true,
|
|
|
|
MaxPayload: 16 * 1024 * 1024,
|
2022-04-27 07:36:40 -05:00
|
|
|
NoSigs: true,
|
2022-08-02 06:58:08 -05:00
|
|
|
NoLog: cfg.NoLog,
|
2023-02-24 01:56:53 -06:00
|
|
|
}
|
|
|
|
s.Server, err = natsserver.NewServer(opts)
|
2022-01-05 11:44:49 -06:00
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
2023-03-22 03:21:32 -05:00
|
|
|
if !cfg.NoLog {
|
|
|
|
s.SetLogger(NewLogAdapter(), opts.Debug, opts.Trace)
|
|
|
|
}
|
2022-03-21 05:32:34 -05:00
|
|
|
go func() {
|
|
|
|
process.ComponentStarted()
|
2022-05-09 08:15:24 -05:00
|
|
|
s.Start()
|
2022-03-21 05:32:34 -05:00
|
|
|
}()
|
|
|
|
go func() {
|
|
|
|
<-process.WaitForShutdown()
|
2022-05-09 08:15:24 -05:00
|
|
|
s.Shutdown()
|
|
|
|
s.WaitForShutdown()
|
2022-03-21 05:32:34 -05:00
|
|
|
process.ComponentFinished()
|
|
|
|
}()
|
2022-01-05 11:44:49 -06:00
|
|
|
}
|
2023-07-06 03:04:46 -05:00
|
|
|
if !s.ReadyForConnections(time.Second * 60) {
|
2022-01-05 11:44:49 -06:00
|
|
|
logrus.Fatalln("NATS did not start in time")
|
|
|
|
}
|
2023-03-17 06:09:45 -05:00
|
|
|
// reuse existing connections
|
|
|
|
if s.nc != nil {
|
|
|
|
return s.js, s.nc
|
|
|
|
}
|
2022-05-09 08:15:24 -05:00
|
|
|
nc, err := natsclient.Connect("", natsclient.InProcessServer(s))
|
2022-01-05 11:44:49 -06:00
|
|
|
if err != nil {
|
|
|
|
logrus.Fatalln("Failed to create NATS client")
|
|
|
|
}
|
2023-03-17 06:09:45 -05:00
|
|
|
js, _ := setupNATS(process, cfg, nc)
|
|
|
|
s.js = js
|
|
|
|
s.nc = nc
|
|
|
|
return js, nc
|
2022-01-05 11:44:49 -06:00
|
|
|
}
|
|
|
|
|
2023-07-07 12:59:34 -05:00
|
|
|
// nolint:gocyclo
|
2022-03-25 07:24:21 -05:00
|
|
|
func setupNATS(process *process.ProcessContext, cfg *config.JetStream, nc *natsclient.Conn) (natsclient.JetStreamContext, *natsclient.Conn) {
|
2022-01-05 11:44:49 -06:00
|
|
|
if nc == nil {
|
|
|
|
var err error
|
2022-08-05 03:19:33 -05:00
|
|
|
opts := []natsclient.Option{}
|
2022-08-02 06:58:08 -05:00
|
|
|
if cfg.DisableTLSValidation {
|
2022-08-05 03:19:33 -05:00
|
|
|
opts = append(opts, natsclient.Secure(&tls.Config{
|
2022-08-02 06:58:08 -05:00
|
|
|
InsecureSkipVerify: true,
|
|
|
|
}))
|
|
|
|
}
|
|
|
|
nc, err = natsclient.Connect(strings.Join(cfg.Addresses, ","), opts...)
|
2022-01-05 11:44:49 -06:00
|
|
|
if err != nil {
|
|
|
|
logrus.WithError(err).Panic("Unable to connect to NATS")
|
2022-03-16 09:21:11 -05:00
|
|
|
return nil, nil
|
2022-01-05 11:44:49 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
s, err := nc.JetStream()
|
|
|
|
if err != nil {
|
|
|
|
logrus.WithError(err).Panic("Unable to get JetStream context")
|
2022-03-16 09:21:11 -05:00
|
|
|
return nil, nil
|
2022-01-05 11:44:49 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
for _, stream := range streams { // streams are defined in streams.go
|
2022-03-23 05:20:18 -05:00
|
|
|
name := cfg.Prefixed(stream.Name)
|
2022-01-05 11:44:49 -06:00
|
|
|
info, err := s.StreamInfo(name)
|
|
|
|
if err != nil && err != natsclient.ErrStreamNotFound {
|
|
|
|
logrus.WithError(err).Fatal("Unable to get stream info")
|
|
|
|
}
|
2022-03-23 05:20:18 -05:00
|
|
|
subjects := stream.Subjects
|
|
|
|
if len(subjects) == 0 {
|
|
|
|
// By default we want each stream to listen for the subjects
|
|
|
|
// that are either an exact match for the stream name, or where
|
|
|
|
// the first part of the subject is the stream name. ">" is a
|
|
|
|
// wildcard in NATS for one or more subject tokens. In the case
|
|
|
|
// that the stream is called "Foo", this will match any message
|
|
|
|
// with the subject "Foo", "Foo.Bar" or "Foo.Bar.Baz" etc.
|
|
|
|
subjects = []string{name, name + ".>"}
|
|
|
|
}
|
|
|
|
if info != nil {
|
2023-07-07 12:59:34 -05:00
|
|
|
// If the stream config doesn't match what we expect, try to update
|
|
|
|
// it. If that doesn't work then try to blow it away and we'll then
|
|
|
|
// recreate it in the next section.
|
|
|
|
// Each specific option that we set must be checked by hand, as if
|
|
|
|
// you DeepEqual the whole config struct, it will always show that
|
|
|
|
// there's a difference because the NATS Server will return defaults
|
|
|
|
// in the stream info.
|
2022-03-23 05:20:18 -05:00
|
|
|
switch {
|
|
|
|
case !reflect.DeepEqual(info.Config.Subjects, subjects):
|
|
|
|
fallthrough
|
|
|
|
case info.Config.Retention != stream.Retention:
|
|
|
|
fallthrough
|
|
|
|
case info.Config.Storage != stream.Storage:
|
2023-07-07 12:59:34 -05:00
|
|
|
fallthrough
|
|
|
|
case info.Config.MaxAge != stream.MaxAge:
|
|
|
|
// Try updating the stream first, as many things can be updated
|
|
|
|
// non-destructively.
|
|
|
|
if info, err = s.UpdateStream(stream); err != nil {
|
|
|
|
logrus.WithError(err).Warnf("Unable to update stream %q, recreating...", name)
|
|
|
|
// We failed to update the stream, this is a last attempt to get
|
|
|
|
// things working but may result in data loss.
|
|
|
|
if err = s.DeleteStream(name); err != nil {
|
|
|
|
logrus.WithError(err).Fatalf("Unable to delete stream %q", name)
|
|
|
|
}
|
|
|
|
info = nil
|
2022-03-23 05:20:18 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2022-01-05 11:44:49 -06:00
|
|
|
if info == nil {
|
|
|
|
// If we're trying to keep everything in memory (e.g. unit tests)
|
|
|
|
// then overwrite the storage policy.
|
|
|
|
if cfg.InMemory {
|
2022-02-04 07:08:13 -06:00
|
|
|
stream.Storage = natsclient.MemoryStorage
|
2022-01-05 11:44:49 -06:00
|
|
|
}
|
|
|
|
|
2022-01-07 11:31:57 -06:00
|
|
|
// Namespace the streams without modifying the original streams
|
|
|
|
// array, otherwise we end up with namespaces on namespaces.
|
|
|
|
namespaced := *stream
|
|
|
|
namespaced.Name = name
|
2022-03-23 05:20:18 -05:00
|
|
|
namespaced.Subjects = subjects
|
2022-01-07 11:31:57 -06:00
|
|
|
if _, err = s.AddStream(&namespaced); err != nil {
|
2022-03-25 07:24:21 -05:00
|
|
|
logger := logrus.WithError(err).WithFields(logrus.Fields{
|
|
|
|
"stream": namespaced.Name,
|
|
|
|
"subjects": namespaced.Subjects,
|
|
|
|
})
|
|
|
|
|
|
|
|
// If the stream was supposed to be in-memory to begin with
|
|
|
|
// then an error here is fatal so we'll give up.
|
|
|
|
if namespaced.Storage == natsclient.MemoryStorage {
|
|
|
|
logger.WithError(err).Fatal("Unable to add in-memory stream")
|
|
|
|
}
|
|
|
|
|
|
|
|
// The stream was supposed to be on disk. Let's try starting
|
|
|
|
// Dendrite with the stream in-memory instead. That'll mean that
|
|
|
|
// we can't recover anything that was queued on the disk but we
|
|
|
|
// will still be able to start and run hopefully in the meantime.
|
|
|
|
logger.WithError(err).Error("Unable to add stream")
|
|
|
|
sentry.CaptureException(fmt.Errorf("Unable to add stream %q: %w", namespaced.Name, err))
|
|
|
|
|
|
|
|
namespaced.Storage = natsclient.MemoryStorage
|
|
|
|
if _, err = s.AddStream(&namespaced); err != nil {
|
|
|
|
// We tried to add the stream in-memory instead but something
|
|
|
|
// went wrong. That's an unrecoverable situation so we will
|
|
|
|
// give up at this point.
|
|
|
|
logger.WithError(err).Fatal("Unable to add in-memory stream")
|
|
|
|
}
|
|
|
|
|
|
|
|
if stream.Storage != namespaced.Storage {
|
|
|
|
// We've managed to add the stream in memory. What's on the
|
|
|
|
// disk will be left alone, but our ability to recover from a
|
|
|
|
// future crash will be limited. Yell about it.
|
2022-10-04 07:02:41 -05:00
|
|
|
err := fmt.Errorf("Stream %q is running in-memory; this may be due to data corruption in the JetStream storage directory", namespaced.Name)
|
|
|
|
sentry.CaptureException(err)
|
|
|
|
process.Degraded(err)
|
2022-03-25 07:24:21 -05:00
|
|
|
}
|
2022-01-05 11:44:49 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-03-29 07:14:35 -05:00
|
|
|
// Clean up old consumers so that interest-based consumers do the
|
|
|
|
// right thing.
|
|
|
|
for stream, consumers := range map[string][]string{
|
|
|
|
OutputClientData: {"SyncAPIClientAPIConsumer"},
|
|
|
|
OutputReceiptEvent: {"SyncAPIEDUServerReceiptConsumer", "FederationAPIEDUServerConsumer"},
|
|
|
|
OutputSendToDeviceEvent: {"SyncAPIEDUServerSendToDeviceConsumer", "FederationAPIEDUServerConsumer"},
|
|
|
|
OutputTypingEvent: {"SyncAPIEDUServerTypingConsumer", "FederationAPIEDUServerConsumer"},
|
2022-09-01 03:20:40 -05:00
|
|
|
OutputRoomEvent: {"AppserviceRoomserverConsumer"},
|
2022-09-27 08:01:34 -05:00
|
|
|
OutputStreamEvent: {"UserAPISyncAPIStreamEventConsumer"},
|
|
|
|
OutputReadUpdate: {"UserAPISyncAPIReadUpdateConsumer"},
|
2022-03-29 07:14:35 -05:00
|
|
|
} {
|
|
|
|
streamName := cfg.Matrix.JetStream.Prefixed(stream)
|
|
|
|
for _, consumer := range consumers {
|
|
|
|
consumerName := cfg.Matrix.JetStream.Prefixed(consumer) + "Pull"
|
|
|
|
consumerInfo, err := s.ConsumerInfo(streamName, consumerName)
|
|
|
|
if err != nil || consumerInfo == nil {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if err = s.DeleteConsumer(streamName, consumerName); err != nil {
|
|
|
|
logrus.WithError(err).Errorf("Unable to clean up old consumer %q for stream %q", consumer, stream)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-03-16 09:21:11 -05:00
|
|
|
return s, nc
|
2022-01-05 11:44:49 -06:00
|
|
|
}
|