Add /search tests (#3025)

This commit is contained in:
Till 2023-03-27 11:26:52 +02:00 committed by GitHub
parent aa1bda4c58
commit e8b2162a01
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 389 additions and 50 deletions

View file

@ -18,6 +18,7 @@
package fulltext package fulltext
import ( import (
"regexp"
"strings" "strings"
"github.com/blevesearch/bleve/v2" "github.com/blevesearch/bleve/v2"
@ -60,6 +61,7 @@ type Indexer interface {
Index(elements ...IndexElement) error Index(elements ...IndexElement) error
Delete(eventID string) error Delete(eventID string) error
Search(term string, roomIDs, keys []string, limit, from int, orderByStreamPos bool) (*bleve.SearchResult, error) Search(term string, roomIDs, keys []string, limit, from int, orderByStreamPos bool) (*bleve.SearchResult, error)
GetHighlights(result *bleve.SearchResult) []string
Close() error Close() error
} }
@ -124,6 +126,47 @@ func (f *Search) Delete(eventID string) error {
return f.FulltextIndex.Delete(eventID) return f.FulltextIndex.Delete(eventID)
} }
var highlightMatcher = regexp.MustCompile("<mark>(.*?)</mark>")
// GetHighlights extracts the highlights from a SearchResult.
func (f *Search) GetHighlights(result *bleve.SearchResult) []string {
if result == nil {
return []string{}
}
seenMatches := make(map[string]struct{})
for _, hit := range result.Hits {
if hit.Fragments == nil {
continue
}
fragments, ok := hit.Fragments["Content"]
if !ok {
continue
}
for _, x := range fragments {
substringMatches := highlightMatcher.FindAllStringSubmatch(x, -1)
for _, matches := range substringMatches {
for i := range matches {
if i == 0 { // skip first match, this is the complete substring match
continue
}
if _, ok := seenMatches[matches[i]]; ok {
continue
}
seenMatches[matches[i]] = struct{}{}
}
}
}
}
res := make([]string, 0, len(seenMatches))
for m := range seenMatches {
res = append(res, m)
}
return res
}
// Search searches the index given a search term, roomIDs and keys. // Search searches the index given a search term, roomIDs and keys.
func (f *Search) Search(term string, roomIDs, keys []string, limit, from int, orderByStreamPos bool) (*bleve.SearchResult, error) { func (f *Search) Search(term string, roomIDs, keys []string, limit, from int, orderByStreamPos bool) (*bleve.SearchResult, error) {
qry := bleve.NewConjunctionQuery() qry := bleve.NewConjunctionQuery()
@ -163,6 +206,10 @@ func (f *Search) Search(term string, roomIDs, keys []string, limit, from int, or
s.SortBy([]string{"-StreamPosition"}) s.SortBy([]string{"-StreamPosition"})
} }
// Highlight some words
s.Highlight = bleve.NewHighlight()
s.Highlight.Fields = []string{"Content"}
return f.FulltextIndex.Search(s) return f.FulltextIndex.Search(s)
} }

View file

@ -164,10 +164,12 @@ func TestSearch(t *testing.T) {
args args args args
wantCount int wantCount int
wantErr bool wantErr bool
wantHighlights []string
}{ }{
{ {
name: "Can search for many results in one room", name: "Can search for many results in one room",
wantCount: 16, wantCount: 16,
wantHighlights: []string{"lorem"},
args: args{ args: args{
term: "lorem", term: "lorem",
roomIndex: []int{0}, roomIndex: []int{0},
@ -177,6 +179,7 @@ func TestSearch(t *testing.T) {
{ {
name: "Can search for one result in one room", name: "Can search for one result in one room",
wantCount: 1, wantCount: 1,
wantHighlights: []string{"lorem"},
args: args{ args: args{
term: "lorem", term: "lorem",
roomIndex: []int{16}, roomIndex: []int{16},
@ -186,6 +189,7 @@ func TestSearch(t *testing.T) {
{ {
name: "Can search for many results in multiple rooms", name: "Can search for many results in multiple rooms",
wantCount: 17, wantCount: 17,
wantHighlights: []string{"lorem"},
args: args{ args: args{
term: "lorem", term: "lorem",
roomIndex: []int{0, 16}, roomIndex: []int{0, 16},
@ -195,6 +199,7 @@ func TestSearch(t *testing.T) {
{ {
name: "Can search for many results in all rooms, reversed", name: "Can search for many results in all rooms, reversed",
wantCount: 30, wantCount: 30,
wantHighlights: []string{"lorem"},
args: args{ args: args{
term: "lorem", term: "lorem",
limit: 30, limit: 30,
@ -204,6 +209,7 @@ func TestSearch(t *testing.T) {
{ {
name: "Can search for specific search room name", name: "Can search for specific search room name",
wantCount: 1, wantCount: 1,
wantHighlights: []string{"testing"},
args: args{ args: args{
term: "testing", term: "testing",
roomIndex: []int{}, roomIndex: []int{},
@ -214,6 +220,7 @@ func TestSearch(t *testing.T) {
{ {
name: "Can search for specific search room topic", name: "Can search for specific search room topic",
wantCount: 1, wantCount: 1,
wantHighlights: []string{"fulltext"},
args: args{ args: args{
term: "fulltext", term: "fulltext",
roomIndex: []int{}, roomIndex: []int{},
@ -222,6 +229,7 @@ func TestSearch(t *testing.T) {
}, },
}, },
} }
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
f, ctx := mustOpenIndex(t, "") f, ctx := mustOpenIndex(t, "")
@ -238,6 +246,12 @@ func TestSearch(t *testing.T) {
t.Errorf("Search() error = %v, wantErr %v", err, tt.wantErr) t.Errorf("Search() error = %v, wantErr %v", err, tt.wantErr)
return return
} }
highlights := f.GetHighlights(got)
if !reflect.DeepEqual(highlights, tt.wantHighlights) {
t.Errorf("Search() got highligts = %v, want %v", highlights, tt.wantHighlights)
}
if !reflect.DeepEqual(len(got.Hits), tt.wantCount) { if !reflect.DeepEqual(len(got.Hits), tt.wantCount) {
t.Errorf("Search() got = %v, want %v", len(got.Hits), tt.wantCount) t.Errorf("Search() got = %v, want %v", len(got.Hits), tt.wantCount)
} }

View file

@ -33,6 +33,7 @@ type Indexer interface {
Index(elements ...IndexElement) error Index(elements ...IndexElement) error
Delete(eventID string) error Delete(eventID string) error
Search(term string, roomIDs, keys []string, limit, from int, orderByStreamPos bool) (SearchResult, error) Search(term string, roomIDs, keys []string, limit, from int, orderByStreamPos bool) (SearchResult, error)
GetHighlights(result SearchResult) []string
Close() error Close() error
} }
@ -71,3 +72,7 @@ func (f *Search) Delete(eventID string) error {
func (f *Search) Search(term string, roomIDs, keys []string, limit, from int, orderByStreamPos bool) (SearchResult, error) { func (f *Search) Search(term string, roomIDs, keys []string, limit, from int, orderByStreamPos bool) (SearchResult, error) {
return SearchResult{}, nil return SearchResult{}, nil
} }
func (f *Search) GetHighlights(result SearchResult) []string {
return []string{}
}

View file

@ -19,7 +19,6 @@ import (
"net/http" "net/http"
"sort" "sort"
"strconv" "strconv"
"strings"
"time" "time"
"github.com/blevesearch/bleve/v2/search" "github.com/blevesearch/bleve/v2/search"
@ -123,8 +122,8 @@ func Search(req *http.Request, device *api.Device, syncDB storage.Database, fts
return util.JSONResponse{ return util.JSONResponse{
Code: http.StatusOK, Code: http.StatusOK,
JSON: SearchResponse{ JSON: SearchResponse{
SearchCategories: SearchCategories{ SearchCategories: SearchCategoriesResponse{
RoomEvents: RoomEvents{ RoomEvents: RoomEventsResponse{
Count: int(result.Total), Count: int(result.Total),
NextBatch: nil, NextBatch: nil,
}, },
@ -158,7 +157,7 @@ func Search(req *http.Request, device *api.Device, syncDB storage.Database, fts
} }
groups := make(map[string]RoomResult) groups := make(map[string]RoomResult)
knownUsersProfiles := make(map[string]ProfileInfo) knownUsersProfiles := make(map[string]ProfileInfoResponse)
// Sort the events by depth, as the returned values aren't ordered // Sort the events by depth, as the returned values aren't ordered
if orderByTime { if orderByTime {
@ -180,7 +179,7 @@ func Search(req *http.Request, device *api.Device, syncDB storage.Database, fts
return jsonerror.InternalServerError() return jsonerror.InternalServerError()
} }
profileInfos := make(map[string]ProfileInfo) profileInfos := make(map[string]ProfileInfoResponse)
for _, ev := range append(eventsBefore, eventsAfter...) { for _, ev := range append(eventsBefore, eventsAfter...) {
profile, ok := knownUsersProfiles[event.Sender()] profile, ok := knownUsersProfiles[event.Sender()]
if !ok { if !ok {
@ -192,7 +191,7 @@ func Search(req *http.Request, device *api.Device, syncDB storage.Database, fts
if stateEvent == nil { if stateEvent == nil {
continue continue
} }
profile = ProfileInfo{ profile = ProfileInfoResponse{
AvatarURL: gjson.GetBytes(stateEvent.Content(), "avatar_url").Str, AvatarURL: gjson.GetBytes(stateEvent.Content(), "avatar_url").Str,
DisplayName: gjson.GetBytes(stateEvent.Content(), "displayname").Str, DisplayName: gjson.GetBytes(stateEvent.Content(), "displayname").Str,
} }
@ -237,13 +236,13 @@ func Search(req *http.Request, device *api.Device, syncDB storage.Database, fts
} }
res := SearchResponse{ res := SearchResponse{
SearchCategories: SearchCategories{ SearchCategories: SearchCategoriesResponse{
RoomEvents: RoomEvents{ RoomEvents: RoomEventsResponse{
Count: int(result.Total), Count: int(result.Total),
Groups: Groups{RoomID: groups}, Groups: Groups{RoomID: groups},
Results: results, Results: results,
NextBatch: nextBatchResult, NextBatch: nextBatchResult,
Highlights: strings.Split(searchReq.SearchCategories.RoomEvents.SearchTerm, " "), Highlights: fts.GetHighlights(result),
State: stateForRooms, State: stateForRooms,
}, },
}, },
@ -286,30 +285,40 @@ func contextEvents(
return eventsBefore, eventsAfter, err return eventsBefore, eventsAfter, err
} }
type SearchRequest struct { type EventContext struct {
SearchCategories struct {
RoomEvents struct {
EventContext struct {
AfterLimit int `json:"after_limit,omitempty"` AfterLimit int `json:"after_limit,omitempty"`
BeforeLimit int `json:"before_limit,omitempty"` BeforeLimit int `json:"before_limit,omitempty"`
IncludeProfile bool `json:"include_profile,omitempty"` IncludeProfile bool `json:"include_profile,omitempty"`
} `json:"event_context"` }
Filter gomatrixserverlib.RoomEventFilter `json:"filter"`
Groupings struct { type GroupBy struct {
GroupBy []struct {
Key string `json:"key"` Key string `json:"key"`
} `json:"group_by"` }
} `json:"groupings"`
type Groupings struct {
GroupBy []GroupBy `json:"group_by"`
}
type RoomEvents struct {
EventContext EventContext `json:"event_context"`
Filter gomatrixserverlib.RoomEventFilter `json:"filter"`
Groupings Groupings `json:"groupings"`
IncludeState bool `json:"include_state"` IncludeState bool `json:"include_state"`
Keys []string `json:"keys"` Keys []string `json:"keys"`
OrderBy string `json:"order_by"` OrderBy string `json:"order_by"`
SearchTerm string `json:"search_term"` SearchTerm string `json:"search_term"`
} `json:"room_events"` }
} `json:"search_categories"`
type SearchCategories struct {
RoomEvents RoomEvents `json:"room_events"`
}
type SearchRequest struct {
SearchCategories SearchCategories `json:"search_categories"`
} }
type SearchResponse struct { type SearchResponse struct {
SearchCategories SearchCategories `json:"search_categories"` SearchCategories SearchCategoriesResponse `json:"search_categories"`
} }
type RoomResult struct { type RoomResult struct {
NextBatch *string `json:"next_batch,omitempty"` NextBatch *string `json:"next_batch,omitempty"`
@ -332,15 +341,15 @@ type SearchContextResponse struct {
EventsAfter []gomatrixserverlib.ClientEvent `json:"events_after"` EventsAfter []gomatrixserverlib.ClientEvent `json:"events_after"`
EventsBefore []gomatrixserverlib.ClientEvent `json:"events_before"` EventsBefore []gomatrixserverlib.ClientEvent `json:"events_before"`
Start string `json:"start"` Start string `json:"start"`
ProfileInfo map[string]ProfileInfo `json:"profile_info"` ProfileInfo map[string]ProfileInfoResponse `json:"profile_info"`
} }
type ProfileInfo struct { type ProfileInfoResponse struct {
AvatarURL string `json:"avatar_url"` AvatarURL string `json:"avatar_url"`
DisplayName string `json:"display_name"` DisplayName string `json:"display_name"`
} }
type RoomEvents struct { type RoomEventsResponse struct {
Count int `json:"count"` Count int `json:"count"`
Groups Groups `json:"groups"` Groups Groups `json:"groups"`
Highlights []string `json:"highlights"` Highlights []string `json:"highlights"`
@ -348,6 +357,6 @@ type RoomEvents struct {
Results []Result `json:"results"` Results []Result `json:"results"`
State map[string][]gomatrixserverlib.ClientEvent `json:"state,omitempty"` State map[string][]gomatrixserverlib.ClientEvent `json:"state,omitempty"`
} }
type SearchCategories struct { type SearchCategoriesResponse struct {
RoomEvents RoomEvents `json:"room_events"` RoomEvents RoomEventsResponse `json:"room_events"`
} }

View file

@ -0,0 +1,264 @@
package routing
import (
"bytes"
"encoding/json"
"net/http"
"net/http/httptest"
"testing"
"github.com/matrix-org/dendrite/internal/fulltext"
"github.com/matrix-org/dendrite/internal/sqlutil"
"github.com/matrix-org/dendrite/syncapi/storage"
"github.com/matrix-org/dendrite/syncapi/types"
"github.com/matrix-org/dendrite/test"
"github.com/matrix-org/dendrite/test/testrig"
userapi "github.com/matrix-org/dendrite/userapi/api"
"github.com/matrix-org/gomatrixserverlib"
"github.com/stretchr/testify/assert"
)
func TestSearch(t *testing.T) {
alice := test.NewUser(t)
aliceDevice := userapi.Device{UserID: alice.ID}
room := test.NewRoom(t, alice)
room.CreateAndInsert(t, alice, "m.room.message", map[string]interface{}{"body": "context before"})
room.CreateAndInsert(t, alice, "m.room.message", map[string]interface{}{"body": "hello world3!"})
room.CreateAndInsert(t, alice, "m.room.message", map[string]interface{}{"body": "context after"})
roomsFilter := []string{room.ID}
roomsFilterUnknown := []string{"!unknown"}
emptyFromString := ""
fromStringValid := "1"
fromStringInvalid := "iCantBeParsed"
testCases := []struct {
name string
wantOK bool
searchReq SearchRequest
device *userapi.Device
wantResponseCount int
from *string
}{
{
name: "no user ID",
searchReq: SearchRequest{},
device: &userapi.Device{},
},
{
name: "with alice ID",
wantOK: true,
searchReq: SearchRequest{},
device: &aliceDevice,
},
{
name: "searchTerm specified, found at the beginning",
wantOK: true,
searchReq: SearchRequest{
SearchCategories: SearchCategories{RoomEvents: RoomEvents{SearchTerm: "hello"}},
},
device: &aliceDevice,
wantResponseCount: 1,
},
{
name: "searchTerm specified, found at the end",
wantOK: true,
searchReq: SearchRequest{
SearchCategories: SearchCategories{RoomEvents: RoomEvents{SearchTerm: "world3"}},
},
device: &aliceDevice,
wantResponseCount: 1,
},
/* the following would need matchQuery.SetFuzziness(1) in bleve.go
{
name: "searchTerm fuzzy search",
wantOK: true,
searchReq: SearchRequest{
SearchCategories: SearchCategories{RoomEvents: RoomEvents{SearchTerm: "hell"}}, // this still should find hello world
},
device: &aliceDevice,
wantResponseCount: 1,
},
*/
{
name: "searchTerm specified but no result",
wantOK: true,
searchReq: SearchRequest{
SearchCategories: SearchCategories{RoomEvents: RoomEvents{SearchTerm: "i don't match"}},
},
device: &aliceDevice,
},
{
name: "filter on room",
wantOK: true,
searchReq: SearchRequest{
SearchCategories: SearchCategories{
RoomEvents: RoomEvents{
SearchTerm: "hello",
Filter: gomatrixserverlib.RoomEventFilter{
Rooms: &roomsFilter,
},
},
},
},
device: &aliceDevice,
wantResponseCount: 1,
},
{
name: "filter on unknown room",
searchReq: SearchRequest{
SearchCategories: SearchCategories{
RoomEvents: RoomEvents{
SearchTerm: "hello",
Filter: gomatrixserverlib.RoomEventFilter{
Rooms: &roomsFilterUnknown,
},
},
},
},
device: &aliceDevice,
},
{
name: "include state",
wantOK: true,
searchReq: SearchRequest{
SearchCategories: SearchCategories{
RoomEvents: RoomEvents{
SearchTerm: "hello",
Filter: gomatrixserverlib.RoomEventFilter{
Rooms: &roomsFilter,
},
IncludeState: true,
},
},
},
device: &aliceDevice,
wantResponseCount: 1,
},
{
name: "empty from does not error",
wantOK: true,
searchReq: SearchRequest{
SearchCategories: SearchCategories{
RoomEvents: RoomEvents{
SearchTerm: "hello",
Filter: gomatrixserverlib.RoomEventFilter{
Rooms: &roomsFilter,
},
},
},
},
wantResponseCount: 1,
device: &aliceDevice,
from: &emptyFromString,
},
{
name: "valid from does not error",
wantOK: true,
searchReq: SearchRequest{
SearchCategories: SearchCategories{
RoomEvents: RoomEvents{
SearchTerm: "hello",
Filter: gomatrixserverlib.RoomEventFilter{
Rooms: &roomsFilter,
},
},
},
},
wantResponseCount: 1,
device: &aliceDevice,
from: &fromStringValid,
},
{
name: "invalid from does error",
searchReq: SearchRequest{
SearchCategories: SearchCategories{
RoomEvents: RoomEvents{
SearchTerm: "hello",
Filter: gomatrixserverlib.RoomEventFilter{
Rooms: &roomsFilter,
},
},
},
},
device: &aliceDevice,
from: &fromStringInvalid,
},
{
name: "order by stream position",
wantOK: true,
searchReq: SearchRequest{
SearchCategories: SearchCategories{RoomEvents: RoomEvents{SearchTerm: "hello", OrderBy: "recent"}},
},
device: &aliceDevice,
wantResponseCount: 1,
},
}
test.WithAllDatabases(t, func(t *testing.T, dbType test.DBType) {
cfg, processCtx, closeDB := testrig.CreateConfig(t, dbType)
defer closeDB()
// create requisites
fts, err := fulltext.New(processCtx, cfg.SyncAPI.Fulltext)
assert.NoError(t, err)
assert.NotNil(t, fts)
cm := sqlutil.NewConnectionManager(processCtx, cfg.Global.DatabaseOptions)
db, err := storage.NewSyncServerDatasource(processCtx.Context(), cm, &cfg.SyncAPI.Database)
assert.NoError(t, err)
elements := []fulltext.IndexElement{}
// store the events in the database
var sp types.StreamPosition
for _, x := range room.Events() {
var stateEvents []*gomatrixserverlib.HeaderedEvent
var stateEventIDs []string
if x.Type() == gomatrixserverlib.MRoomMember {
stateEvents = append(stateEvents, x)
stateEventIDs = append(stateEventIDs, x.EventID())
}
sp, err = db.WriteEvent(processCtx.Context(), x, stateEvents, stateEventIDs, nil, nil, false, gomatrixserverlib.HistoryVisibilityShared)
assert.NoError(t, err)
if x.Type() != "m.room.message" {
continue
}
elements = append(elements, fulltext.IndexElement{
EventID: x.EventID(),
RoomID: x.RoomID(),
Content: string(x.Content()),
ContentType: x.Type(),
StreamPosition: int64(sp),
})
}
// Index the events
err = fts.Index(elements...)
assert.NoError(t, err)
// run the tests
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
reqBody := &bytes.Buffer{}
err = json.NewEncoder(reqBody).Encode(tc.searchReq)
assert.NoError(t, err)
req := httptest.NewRequest(http.MethodPost, "/", reqBody)
res := Search(req, tc.device, db, fts, tc.from)
if !tc.wantOK && !res.Is2xx() {
return
}
resp, ok := res.JSON.(SearchResponse)
if !ok && !tc.wantOK {
t.Fatalf("not a SearchResponse: %T: %s", res.JSON, res.JSON)
}
assert.Equal(t, tc.wantResponseCount, resp.SearchCategories.RoomEvents.Count)
// if we requested state, it should not be empty
if tc.searchReq.SearchCategories.RoomEvents.IncludeState {
assert.NotEmpty(t, resp.SearchCategories.RoomEvents.State)
}
})
}
})
}