Improve logging for processEventWithMissingState

This commit is contained in:
Neil Alexander 2022-11-02 11:30:49 +00:00
parent 86b25a6337
commit 16c2a95900
No known key found for this signature in database
GPG key ID: A02A2019A2BB0944

View file

@ -40,6 +40,7 @@ func (p *parsedRespState) Events() []*gomatrixserverlib.Event {
} }
type missingStateReq struct { type missingStateReq struct {
log *logrus.Entry
origin gomatrixserverlib.ServerName origin gomatrixserverlib.ServerName
db storage.Database db storage.Database
roomInfo *types.RoomInfo roomInfo *types.RoomInfo
@ -77,7 +78,7 @@ func (t *missingStateReq) processEventWithMissingState(
// event ids and then use /event to fetch the individual events. // event ids and then use /event to fetch the individual events.
// However not all version of synapse support /state_ids so you may // However not all version of synapse support /state_ids so you may
// need to fallback to /state. // need to fallback to /state.
logger := util.GetLogger(ctx).WithFields(map[string]interface{}{ t.log = util.GetLogger(ctx).WithFields(map[string]interface{}{
"txn_event": e.EventID(), "txn_event": e.EventID(),
"room_id": e.RoomID(), "room_id": e.RoomID(),
"txn_prev_events": e.PrevEventIDs(), "txn_prev_events": e.PrevEventIDs(),
@ -96,7 +97,7 @@ func (t *missingStateReq) processEventWithMissingState(
return nil, fmt.Errorf("expected to find missing events but didn't") return nil, fmt.Errorf("expected to find missing events but didn't")
} }
if isGapFilled { if isGapFilled {
logger.Infof("Gap filled by /get_missing_events, injecting %d new events", len(newEvents)) t.log.Infof("Gap filled by /get_missing_events, injecting %d new events", len(newEvents))
// we can just inject all the newEvents as new as we may have only missed 1 or 2 events and have filled // we can just inject all the newEvents as new as we may have only missed 1 or 2 events and have filled
// in the gap in the DAG // in the gap in the DAG
for _, newEvent := range newEvents { for _, newEvent := range newEvents {
@ -118,7 +119,7 @@ func (t *missingStateReq) processEventWithMissingState(
// then there's nothing else to do, we have everything we need to deal // then there's nothing else to do, we have everything we need to deal
// with the new event. // with the new event.
if isGapFilled && prevStatesKnown { if isGapFilled && prevStatesKnown {
logger.Infof("Gap filled and state found for all prev events") t.log.Infof("Gap filled and state found for all prev events")
return nil, nil return nil, nil
} }
@ -321,7 +322,7 @@ func (t *missingStateReq) lookupStateAfterEvent(ctx context.Context, roomVersion
defer span.Finish() defer span.Finish()
// try doing all this locally before we resort to querying federation // try doing all this locally before we resort to querying federation
respState := t.lookupStateAfterEventLocally(ctx, roomID, eventID) respState := t.lookupStateAfterEventLocally(ctx, eventID)
if respState != nil { if respState != nil {
return respState, true, nil return respState, true, nil
} }
@ -373,7 +374,7 @@ func (t *missingStateReq) cacheAndReturn(ev *gomatrixserverlib.Event) *gomatrixs
return ev return ev
} }
func (t *missingStateReq) lookupStateAfterEventLocally(ctx context.Context, roomID, eventID string) *parsedRespState { func (t *missingStateReq) lookupStateAfterEventLocally(ctx context.Context, eventID string) *parsedRespState {
span, ctx := opentracing.StartSpanFromContext(ctx, "lookupStateAfterEventLocally") span, ctx := opentracing.StartSpanFromContext(ctx, "lookupStateAfterEventLocally")
defer span.Finish() defer span.Finish()
@ -381,12 +382,12 @@ func (t *missingStateReq) lookupStateAfterEventLocally(ctx context.Context, room
roomState := state.NewStateResolution(t.db, t.roomInfo) roomState := state.NewStateResolution(t.db, t.roomInfo)
stateAtEvents, err := t.db.StateAtEventIDs(ctx, []string{eventID}) stateAtEvents, err := t.db.StateAtEventIDs(ctx, []string{eventID})
if err != nil { if err != nil {
util.GetLogger(ctx).WithField("room_id", roomID).WithError(err).Warnf("failed to get state after %s locally", eventID) t.log.WithError(err).Warnf("failed to get state after %s locally", eventID)
return nil return nil
} }
stateEntries, err := roomState.LoadCombinedStateAfterEvents(ctx, stateAtEvents) stateEntries, err := roomState.LoadCombinedStateAfterEvents(ctx, stateAtEvents)
if err != nil { if err != nil {
util.GetLogger(ctx).WithField("room_id", roomID).WithError(err).Warnf("failed to load combined state after %s locally", eventID) t.log.WithError(err).Warnf("failed to load combined state after %s locally", eventID)
return nil return nil
} }
stateEventNIDs := make([]types.EventNID, 0, len(stateEntries)) stateEventNIDs := make([]types.EventNID, 0, len(stateEntries))
@ -395,7 +396,7 @@ func (t *missingStateReq) lookupStateAfterEventLocally(ctx context.Context, room
} }
stateEvents, err := t.db.Events(ctx, stateEventNIDs) stateEvents, err := t.db.Events(ctx, stateEventNIDs)
if err != nil { if err != nil {
util.GetLogger(ctx).WithField("room_id", roomID).WithError(err).Warnf("failed to load state events locally") t.log.WithError(err).Warnf("failed to load state events locally")
return nil return nil
} }
res.StateEvents = make([]*gomatrixserverlib.Event, 0, len(stateEvents)) res.StateEvents = make([]*gomatrixserverlib.Event, 0, len(stateEvents))
@ -429,7 +430,7 @@ func (t *missingStateReq) lookupStateAfterEventLocally(ctx context.Context, room
for evID := range missingAuthEvents { for evID := range missingAuthEvents {
missingEventList = append(missingEventList, evID) missingEventList = append(missingEventList, evID)
} }
util.GetLogger(ctx).WithField("count", len(missingEventList)).Debugf("Fetching missing auth events") t.log.WithField("count", len(missingEventList)).Debugf("Fetching missing auth events")
events, err := t.db.EventsFromIDs(ctx, missingEventList) events, err := t.db.EventsFromIDs(ctx, missingEventList)
if err != nil { if err != nil {
return nil return nil
@ -485,7 +486,7 @@ retryAllowedState:
default: default:
return nil, fmt.Errorf("missing auth event %s and failed to look it up: %w", missing.AuthEventID, err2) return nil, fmt.Errorf("missing auth event %s and failed to look it up: %w", missing.AuthEventID, err2)
} }
util.GetLogger(ctx).Tracef("fetched event %s", missing.AuthEventID) t.log.Tracef("fetched event %s", missing.AuthEventID)
resolvedStateEvents = append(resolvedStateEvents, h) resolvedStateEvents = append(resolvedStateEvents, h)
goto retryAllowedState goto retryAllowedState
default: default:
@ -504,7 +505,7 @@ func (t *missingStateReq) getMissingEvents(ctx context.Context, e *gomatrixserve
span, ctx := opentracing.StartSpanFromContext(ctx, "getMissingEvents") span, ctx := opentracing.StartSpanFromContext(ctx, "getMissingEvents")
defer span.Finish() defer span.Finish()
logger := util.GetLogger(ctx).WithField("event_id", e.EventID()).WithField("room_id", e.RoomID()) logger := t.log.WithField("event_id", e.EventID()).WithField("room_id", e.RoomID())
latest, _, _, err := t.db.LatestEventIDs(ctx, t.roomInfo.RoomNID) latest, _, _, err := t.db.LatestEventIDs(ctx, t.roomInfo.RoomNID)
if err != nil { if err != nil {
return nil, false, false, fmt.Errorf("t.DB.LatestEventIDs: %w", err) return nil, false, false, fmt.Errorf("t.DB.LatestEventIDs: %w", err)
@ -666,7 +667,7 @@ func (t *missingStateReq) lookupMissingStateViaStateIDs(ctx context.Context, roo
span, ctx := opentracing.StartSpanFromContext(ctx, "lookupMissingStateViaStateIDs") span, ctx := opentracing.StartSpanFromContext(ctx, "lookupMissingStateViaStateIDs")
defer span.Finish() defer span.Finish()
util.GetLogger(ctx).WithField("room_id", roomID).Infof("lookupMissingStateViaStateIDs %s", eventID) t.log.Infof("lookupMissingStateViaStateIDs %s", eventID)
// fetch the state event IDs at the time of the event // fetch the state event IDs at the time of the event
var stateIDs gomatrixserverlib.RespStateIDs var stateIDs gomatrixserverlib.RespStateIDs
var err error var err error
@ -719,16 +720,15 @@ func (t *missingStateReq) lookupMissingStateViaStateIDs(ctx context.Context, roo
concurrentRequests := 8 concurrentRequests := 8
missingCount := len(missing) missingCount := len(missing)
util.GetLogger(ctx).WithField("room_id", roomID).WithField("event_id", eventID).Debugf("lookupMissingStateViaStateIDs missing %d/%d events", missingCount, len(wantIDs)) t.log.WithField("event_id", eventID).Debugf("lookupMissingStateViaStateIDs missing %d/%d events", missingCount, len(wantIDs))
// If over 50% of the auth/state events from /state_ids are missing // If over 50% of the auth/state events from /state_ids are missing
// then we'll just call /state instead, otherwise we'll just end up // then we'll just call /state instead, otherwise we'll just end up
// hammering the remote side with /event requests unnecessarily. // hammering the remote side with /event requests unnecessarily.
if missingCount > concurrentRequests && missingCount > len(wantIDs)/2 { if missingCount > concurrentRequests && missingCount > len(wantIDs)/2 {
util.GetLogger(ctx).WithFields(logrus.Fields{ t.log.WithFields(logrus.Fields{
"missing": missingCount, "missing": missingCount,
"event_id": eventID, "event_id": eventID,
"room_id": roomID,
"total_state": len(stateIDs.StateEventIDs), "total_state": len(stateIDs.StateEventIDs),
"total_auth_events": len(stateIDs.AuthEventIDs), "total_auth_events": len(stateIDs.AuthEventIDs),
}).Debug("Fetching all state at event") }).Debug("Fetching all state at event")
@ -736,10 +736,9 @@ func (t *missingStateReq) lookupMissingStateViaStateIDs(ctx context.Context, roo
} }
if missingCount > 0 { if missingCount > 0 {
util.GetLogger(ctx).WithFields(logrus.Fields{ t.log.WithFields(logrus.Fields{
"missing": missingCount, "missing": missingCount,
"event_id": eventID, "event_id": eventID,
"room_id": roomID,
"total_state": len(stateIDs.StateEventIDs), "total_state": len(stateIDs.StateEventIDs),
"total_auth_events": len(stateIDs.AuthEventIDs), "total_auth_events": len(stateIDs.AuthEventIDs),
"concurrent_requests": concurrentRequests, "concurrent_requests": concurrentRequests,
@ -776,9 +775,8 @@ func (t *missingStateReq) lookupMissingStateViaStateIDs(ctx context.Context, roo
case nil: case nil:
break break
default: default:
util.GetLogger(ctx).WithFields(logrus.Fields{ t.log.WithFields(logrus.Fields{
"event_id": missingEventID, "missing_event_id": missingEventID,
"room_id": roomID,
}).WithError(herr).Warn("Failed to fetch missing event") }).WithError(herr).Warn("Failed to fetch missing event")
return return
} }
@ -847,7 +845,7 @@ func (t *missingStateReq) lookupEvent(ctx context.Context, roomVersion gomatrixs
// fetch from the roomserver // fetch from the roomserver
events, err := t.db.EventsFromIDs(ctx, []string{missingEventID}) events, err := t.db.EventsFromIDs(ctx, []string{missingEventID})
if err != nil { if err != nil {
util.GetLogger(ctx).Warnf("Failed to query roomserver for missing event %s: %s - falling back to remote", missingEventID, err) t.log.Warnf("Failed to query roomserver for missing event %s: %s - falling back to remote", missingEventID, err)
} else if len(events) == 1 { } else if len(events) == 1 {
return events[0].Event, nil return events[0].Event, nil
} }
@ -859,7 +857,7 @@ func (t *missingStateReq) lookupEvent(ctx context.Context, roomVersion gomatrixs
defer cancel() defer cancel()
txn, err := t.federation.GetEvent(reqctx, serverName, missingEventID) txn, err := t.federation.GetEvent(reqctx, serverName, missingEventID)
if err != nil || len(txn.PDUs) == 0 { if err != nil || len(txn.PDUs) == 0 {
util.GetLogger(ctx).WithError(err).WithField("event_id", missingEventID).Warn("Failed to get missing /event for event ID") t.log.WithError(err).WithField("missing_event_id", missingEventID).Warn("Failed to get missing /event for event ID")
if errors.Is(err, context.DeadlineExceeded) { if errors.Is(err, context.DeadlineExceeded) {
select { select {
case <-reqctx.Done(): // this server took too long case <-reqctx.Done(): // this server took too long
@ -872,18 +870,18 @@ func (t *missingStateReq) lookupEvent(ctx context.Context, roomVersion gomatrixs
} }
event, err = gomatrixserverlib.NewEventFromUntrustedJSON(txn.PDUs[0], roomVersion) event, err = gomatrixserverlib.NewEventFromUntrustedJSON(txn.PDUs[0], roomVersion)
if err != nil { if err != nil {
util.GetLogger(ctx).WithError(err).WithField("event_id", missingEventID).Warnf("Failed to parse event JSON of event returned from /event") t.log.WithError(err).WithField("missing_event_id", missingEventID).Warnf("Failed to parse event JSON of event returned from /event")
continue continue
} }
found = true found = true
break break
} }
if !found { if !found {
util.GetLogger(ctx).WithField("event_id", missingEventID).Warnf("Failed to get missing /event for event ID from %d server(s)", len(t.servers)) t.log.WithField("missing_event_id", missingEventID).Warnf("Failed to get missing /event for event ID from %d server(s)", len(t.servers))
return nil, fmt.Errorf("wasn't able to find event via %d server(s)", len(t.servers)) return nil, fmt.Errorf("wasn't able to find event via %d server(s)", len(t.servers))
} }
if err := event.VerifyEventSignatures(ctx, t.keys); err != nil { if err := event.VerifyEventSignatures(ctx, t.keys); err != nil {
util.GetLogger(ctx).WithError(err).Warnf("Couldn't validate signature of event %q from /event", event.EventID()) t.log.WithError(err).Warnf("Couldn't validate signature of event %q from /event", event.EventID())
return nil, verifySigError{event.EventID(), err} return nil, verifySigError{event.EventID(), err}
} }
return t.cacheAndReturn(event), nil return t.cacheAndReturn(event), nil