Restructure tests, fix issue with multiple search queries

This commit is contained in:
Till Faelligen 2022-05-18 15:13:50 +02:00
parent 9983f92bcf
commit c909b12b90
5 changed files with 269 additions and 160 deletions

2
go.mod
View file

@ -11,7 +11,7 @@ require (
github.com/HdrHistogram/hdrhistogram-go v1.1.2 // indirect github.com/HdrHistogram/hdrhistogram-go v1.1.2 // indirect
github.com/MFAshby/stdemuxerhook v1.0.0 github.com/MFAshby/stdemuxerhook v1.0.0
github.com/Masterminds/semver/v3 v3.1.1 github.com/Masterminds/semver/v3 v3.1.1
github.com/blevesearch/bleve/v2 v2.3.2 // indirect github.com/blevesearch/bleve/v2 v2.3.2
github.com/codeclysm/extract v2.2.0+incompatible github.com/codeclysm/extract v2.2.0+incompatible
github.com/containerd/containerd v1.6.2 // indirect github.com/containerd/containerd v1.6.2 // indirect
github.com/docker/docker v20.10.14+incompatible github.com/docker/docker v20.10.14+incompatible

View file

@ -15,32 +15,42 @@
package fulltext package fulltext
import ( import (
"strings"
"time"
"github.com/blevesearch/bleve/v2" "github.com/blevesearch/bleve/v2"
"github.com/blevesearch/bleve/v2/analysis/lang/en" "github.com/blevesearch/bleve/v2/analysis/lang/en"
"github.com/blevesearch/bleve/v2/search/query" "github.com/blevesearch/bleve/v2/search/query"
"github.com/matrix-org/gomatrixserverlib"
) )
// Search contains all existing bleve.Index // Search contains all existing bleve.Index
type Search struct { type Search struct {
MessageIndex bleve.Index FulltextIndex bleve.Index
} }
// IndexElement describes the layout of an element to index // IndexElement describes the layout of an element to index
type IndexElement struct { type IndexElement struct {
EventID string `json:"event_id,omitempty"` EventID string
RoomID string `json:"room_id,omitempty"` RoomID string
Content string `json:"content,omitempty"` Content string
Time time.Time `json:"timestamp,omitempty"` ContentType string
StreamPosition int64
}
// SetContentType sets i.ContentType given an identifier
func (i *IndexElement) SetContentType(v string) {
switch v {
case "m.room.message":
i.ContentType = "content.body"
case gomatrixserverlib.MRoomName:
i.ContentType = "content.name"
case gomatrixserverlib.MRoomTopic:
i.ContentType = "content.topic"
}
} }
// New opens a new/existing fulltext index // New opens a new/existing fulltext index
func New(path string) (*Search, error) { func New(path string) (fts *Search, err error) {
fts := &Search{} fts = &Search{}
var err error fts.FulltextIndex, err = openIndex(path)
fts.MessageIndex, err = openIndex(path)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -49,17 +59,17 @@ func New(path string) (*Search, error) {
// Close closes the fulltext index // Close closes the fulltext index
func (f *Search) Close() error { func (f *Search) Close() error {
return f.MessageIndex.Close() return f.FulltextIndex.Close()
} }
// Index indexes a given element // FulltextIndex indexes a given element
func (f *Search) Index(e IndexElement) error { func (f *Search) Index(e IndexElement) error {
return f.MessageIndex.Index(e.EventID, e) return f.FulltextIndex.Index(e.EventID, e)
} }
// BatchIndex indexes the given elements // BatchIndex indexes the given elements
func (f *Search) BatchIndex(elements []IndexElement) error { func (f *Search) BatchIndex(elements []IndexElement) error {
batch := f.MessageIndex.NewBatch() batch := f.FulltextIndex.NewBatch()
for _, element := range elements { for _, element := range elements {
err := batch.Index(element.EventID, element) err := batch.Index(element.EventID, element)
@ -67,40 +77,54 @@ func (f *Search) BatchIndex(elements []IndexElement) error {
return err return err
} }
} }
return f.MessageIndex.Batch(batch) return f.FulltextIndex.Batch(batch)
} }
// Delete deletes an indexed element by the eventID // Delete deletes an indexed element by the eventID
func (f *Search) Delete(eventID string) error { func (f *Search) Delete(eventID string) error {
return f.MessageIndex.Delete(eventID) return f.FulltextIndex.Delete(eventID)
} }
// Search searches the index given a search term // Search searches the index given a search term, roomIDs and keys.
func (f *Search) Search(term string, roomIDs []string, limit, from int, orderByTime bool) (*bleve.SearchResult, error) { func (f *Search) Search(term string, roomIDs, keys []string, limit, from int, orderByStreamPos bool) (*bleve.SearchResult, error) {
terms := strings.Split(term, " ")
qry := bleve.NewConjunctionQuery() qry := bleve.NewConjunctionQuery()
for _, t := range terms { termQuery := bleve.NewBooleanQuery()
qry.AddQuery(bleve.NewQueryStringQuery(t))
}
matchQuery := bleve.NewMatchQuery(term)
matchQuery.SetField("Content")
termQuery.AddMust(matchQuery)
qry.AddQuery(termQuery)
roomQuery := bleve.NewBooleanQuery()
for _, roomID := range roomIDs { for _, roomID := range roomIDs {
roomSearch := bleve.NewMatchQuery(roomID) roomSearch := bleve.NewMatchQuery(roomID)
roomSearch.SetField("room_id") roomSearch.SetField("RoomID")
roomSearch.SetOperator(query.MatchQueryOperatorAnd) roomSearch.SetOperator(query.MatchQueryOperatorOr)
qry.AddQuery(roomSearch) roomQuery.AddShould(roomSearch)
}
if len(roomIDs) > 0 {
qry.AddQuery(roomQuery)
}
keyQuery := bleve.NewBooleanQuery()
for _, key := range keys {
keySearch := bleve.NewMatchQuery(key)
keySearch.SetField("ContentType")
keySearch.SetOperator(query.MatchQueryOperatorOr)
keyQuery.AddShould(keySearch)
}
if len(keys) > 0 {
keyQuery.SetMinShould(1)
qry.AddQuery(keyQuery)
} }
s := bleve.NewSearchRequest(qry) s := bleve.NewSearchRequestOptions(qry, limit, from, false)
s.Size = limit
s.From = from
s.SortBy([]string{"_score"}) s.SortBy([]string{"_score"})
if orderByTime { if orderByStreamPos {
s.SortBy([]string{"-timestamp"}) s.SortBy([]string{"-StreamPosition"})
} }
return f.MessageIndex.Search(s) return f.FulltextIndex.Search(s)
} }
func openIndex(path string) (bleve.Index, error) { func openIndex(path string) (bleve.Index, error) {
@ -112,22 +136,17 @@ func openIndex(path string) (bleve.Index, error) {
enFieldMapping.Analyzer = en.AnalyzerName enFieldMapping.Analyzer = en.AnalyzerName
eventMapping := bleve.NewDocumentMapping() eventMapping := bleve.NewDocumentMapping()
eventMapping.AddFieldMappingsAt("Content", enFieldMapping)
eventMapping.AddFieldMappingsAt("StreamPosition", bleve.NewNumericFieldMapping())
eventMapping.AddFieldMappingsAt("content", enFieldMapping) idFieldMapping := bleve.NewKeywordFieldMapping()
eventMapping.AddFieldMappingsAt("room_id", bleve.NewTextFieldMapping()) eventMapping.AddFieldMappingsAt("ContentType", idFieldMapping)
eventMapping.AddFieldMappingsAt("RoomID", idFieldMapping)
idMapping := bleve.NewTextFieldMapping() eventMapping.AddFieldMappingsAt("EventID", idFieldMapping)
idMapping.IncludeInAll = false
idMapping.Index = false
idMapping.IncludeTermVectors = false
idMapping.SkipFreqNorm = true
eventMapping.AddFieldMappingsAt("event_id", idMapping)
mapping := bleve.NewIndexMapping() mapping := bleve.NewIndexMapping()
mapping.AddDocumentMapping("event", eventMapping) mapping.AddDocumentMapping("Event", eventMapping)
mapping.DefaultType = "event" mapping.DefaultType = "Event"
mapping.TypeField = "type"
mapping.DefaultAnalyzer = "en"
index, err := bleve.New(path, mapping) index, err := bleve.New(path, mapping)
if err != nil { if err != nil {
@ -135,20 +154,3 @@ func openIndex(path string) (bleve.Index, error) {
} }
return index, nil return index, nil
} }
type IndexElements []IndexElement
// Len implements sort.Interface
func (ie IndexElements) Len() int {
return len(ie)
}
// Less implements sort.Interface
func (ie IndexElements) Less(i, j int) bool {
return ie[i].Time.After(ie[j].Time)
}
// Swap implements sort.Interface
func (ie IndexElements) Swap(i, j int) {
ie[i], ie[j] = ie[j], ie[i]
}

View file

@ -15,119 +15,225 @@
package fulltext_test package fulltext_test
import ( import (
"sort" "reflect"
"testing" "testing"
"time"
"github.com/matrix-org/dendrite/internal/fulltext" "github.com/matrix-org/dendrite/internal/fulltext"
"github.com/matrix-org/gomatrixserverlib"
"github.com/matrix-org/util" "github.com/matrix-org/util"
) )
func TestSearch(t *testing.T) { func mustOpenIndex(t *testing.T, tempDir string) *fulltext.Search {
// create new index t.Helper()
dataDir := t.TempDir() fts, err := fulltext.New(tempDir)
fts, err := fulltext.New(dataDir)
if err != nil { if err != nil {
t.Fatal("failed to open fulltext index:", err) t.Fatal("failed to open fulltext index:", err)
} }
if err = fts.Close(); err != nil { return fts
}
func mustAddTestData(t *testing.T, fts *fulltext.Search, firstStreamPos int64) (eventIDs, roomIDs []string) {
t.Helper()
// create some more random data
var batchItems []fulltext.IndexElement
streamPos := firstStreamPos
wantRoomID := util.RandomString(16)
for i := 0; i < 30; i++ {
streamPos++
eventID := util.RandomString(16)
// Create more data for the first room
if i > 15 {
wantRoomID = util.RandomString(16)
}
e := fulltext.IndexElement{
EventID: eventID,
RoomID: wantRoomID,
Content: "lorem ipsum",
StreamPosition: streamPos,
}
e.SetContentType("m.room.message")
batchItems = append(batchItems, e)
roomIDs = append(roomIDs, wantRoomID)
eventIDs = append(eventIDs, eventID)
}
e := fulltext.IndexElement{
EventID: util.RandomString(16),
RoomID: wantRoomID,
Content: "Roomname testing",
StreamPosition: streamPos,
}
e.SetContentType(gomatrixserverlib.MRoomName)
batchItems = append(batchItems, e)
e = fulltext.IndexElement{
EventID: util.RandomString(16),
RoomID: wantRoomID,
Content: "Room topic fulltext",
StreamPosition: streamPos,
}
e.SetContentType(gomatrixserverlib.MRoomTopic)
batchItems = append(batchItems, e)
if err := fts.BatchIndex(batchItems); err != nil {
t.Fatalf("failed to batch insert elements: %v", err)
}
return eventIDs, roomIDs
}
func TestOpen(t *testing.T) {
dataDir := t.TempDir()
fts := mustOpenIndex(t, dataDir)
if err := fts.Close(); err != nil {
t.Fatal("unable to close fulltext index", err) t.Fatal("unable to close fulltext index", err)
} }
// open existing index // open existing index
fts, err = fulltext.New(dataDir) fts = mustOpenIndex(t, dataDir)
if err != nil { defer fts.Close()
t.Fatal("failed to open fulltext index:", err) }
}
func TestIndex(t *testing.T) {
fts := mustOpenIndex(t, t.TempDir())
defer fts.Close() defer fts.Close()
if fts == nil {
t.Fatal("fts is nil")
}
// add some data // add some data
var streamPos int64 = 1
roomID := util.RandomString(8) roomID := util.RandomString(8)
e := fulltext.IndexElement{
EventID: util.RandomString(16),
RoomID: roomID,
Content: "lorem ipsum",
Time: time.Now(),
}
if err = fts.Index(e); err != nil {
t.Fatal("failed to index element", err)
}
eventID := util.RandomString(16) eventID := util.RandomString(16)
e = fulltext.IndexElement{ e := fulltext.IndexElement{
EventID: eventID, EventID: eventID,
RoomID: roomID, RoomID: roomID,
Content: "lorem ipsum", Content: "lorem ipsum",
Time: time.Now(), StreamPosition: streamPos,
} }
e.SetContentType("m.room.message")
if err = fts.Index(e); err != nil { if err := fts.Index(e); err != nil {
t.Fatal("failed to index element", err) t.Fatal("failed to index element", err)
} }
// search data
res, err := fts.Search("lorem", nil, 10, 0, false)
if err != nil {
t.Fatal(err)
}
if res.Total != 2 {
t.Fatalf("expected %d results, got %d", 2, res.Total)
}
// remove element
if err = fts.Delete(eventID); err != nil {
t.Fatal(err)
}
// create some more random data // create some more random data
var batchItems []fulltext.IndexElement mustAddTestData(t, fts, streamPos)
}
wantRoomID := util.RandomString(8)
for i := 0; i < 30; i++ { func TestDelete(t *testing.T) {
eventID = util.RandomString(16) fts := mustOpenIndex(t, t.TempDir())
e = fulltext.IndexElement{ defer fts.Close()
EventID: eventID, eventIDs, roomIDs := mustAddTestData(t, fts, 0)
RoomID: wantRoomID, res1, err := fts.Search("lorem", roomIDs[:1], nil, 50, 0, false)
Content: "lorem ipsum", if err != nil {
Time: time.Now(), t.Fatal(err)
} }
batchItems = append(batchItems, e)
} if err = fts.Delete(eventIDs[0]); err != nil {
t.Fatal(err)
// Index the data }
if err = fts.BatchIndex(batchItems); err != nil {
t.Fatal("failed to batch index") res2, err := fts.Search("lorem", roomIDs[:1], nil, 50, 0, false)
} if err != nil {
t.Fatal(err)
// search for lorem, but only in a given room }
searchRooms := []string{roomID}
res, err = fts.Search("lorem", searchRooms, 10, 0, false) if res1.Total <= res2.Total {
if err != nil { t.Fatalf("got unexpected result: %d <= %d", res1.Total, res2.Total)
t.Fatal(err) }
} }
if res.Total != 1 {
t.Fatalf("expected %d results, got %d", 1, res.Total) func TestSearch(t *testing.T) {
} type args struct {
term string
// can get sorted results keys []string
res, err = fts.Search("lorem", []string{wantRoomID}, 10, 0, true) limit int
if err != nil { from int
t.Fatal(err) orderByStreamPos bool
} roomIndex []int
}
if res.Hits[0].ID != eventID { tests := []struct {
t.Fatalf("expected %s to be first, got %s", eventID, res.Hits[0].ID) name string
} args args
wantCount int
sort.Sort(fulltext.IndexElements(batchItems)) wantErr bool
if eventID != batchItems[0].EventID { }{
t.Fatalf("expected %s to be first, got %s", eventID, batchItems[0].EventID) {
} name: "Can search for many results in one room",
wantCount: 16,
// test back pagination args: args{
term: "lorem",
roomIndex: []int{0},
limit: 20,
},
},
{
name: "Can search for one result in one room",
wantCount: 1,
args: args{
term: "lorem",
roomIndex: []int{16},
limit: 20,
},
},
{
name: "Can search for many results in multiple rooms",
wantCount: 17,
args: args{
term: "lorem",
roomIndex: []int{0, 16},
limit: 20,
},
},
{
name: "Can search for many results in all rooms, reversed",
wantCount: 30,
args: args{
term: "lorem",
limit: 30,
orderByStreamPos: true,
},
},
{
name: "Can search for specific search room name",
wantCount: 1,
args: args{
term: "testing",
roomIndex: []int{},
limit: 20,
keys: []string{"content.name"},
},
},
{
name: "Can search for specific search room topic",
wantCount: 1,
args: args{
term: "fulltext",
roomIndex: []int{},
limit: 20,
keys: []string{"content.topic"},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
f := mustOpenIndex(t, t.TempDir())
eventIDs, roomIDs := mustAddTestData(t, f, 0)
var searchRooms []string
for _, x := range tt.args.roomIndex {
searchRooms = append(searchRooms, roomIDs[x])
}
t.Logf("searching in rooms: %v - %v\n", searchRooms, tt.args.keys)
got, err := f.Search(tt.args.term, searchRooms, tt.args.keys, tt.args.limit, tt.args.from, tt.args.orderByStreamPos)
if (err != nil) != tt.wantErr {
t.Errorf("Search() error = %v, wantErr %v", err, tt.wantErr)
return
}
if !reflect.DeepEqual(len(got.Hits), tt.wantCount) {
t.Errorf("Search() got = %v, want %v", len(got.Hits), tt.wantCount)
}
if tt.args.orderByStreamPos {
if got.Hits[0].ID != eventIDs[29] {
t.Fatalf("expected ID %s, got %s", eventIDs[29], got.Hits[0].ID)
}
}
})
}
} }

View file

@ -75,6 +75,7 @@ func Search(req *http.Request, device *api.Device, syncDB storage.Database, fts
result, err := fts.Search( result, err := fts.Search(
searchReq.SearchCategories.RoomEvents.SearchTerm, searchReq.SearchCategories.RoomEvents.SearchTerm,
rooms, rooms,
[]string{},
searchReq.SearchCategories.RoomEvents.Filter.Limit, searchReq.SearchCategories.RoomEvents.Filter.Limit,
nextBatch, nextBatch,
orderByTime, orderByTime,

View file

@ -19,7 +19,6 @@ import (
"database/sql" "database/sql"
"encoding/json" "encoding/json"
"fmt" "fmt"
"time"
"github.com/matrix-org/dendrite/internal/fulltext" "github.com/matrix-org/dendrite/internal/fulltext"
userapi "github.com/matrix-org/dendrite/userapi/api" userapi "github.com/matrix-org/dendrite/userapi/api"
@ -396,10 +395,11 @@ func (d *Database) WriteEvent(
}) })
e := fulltext.IndexElement{ e := fulltext.IndexElement{
EventID: ev.EventID(), EventID: ev.EventID(),
RoomID: ev.RoomID(), RoomID: ev.RoomID(),
Time: time.Now(), StreamPosition: int64(pduPosition),
} }
e.SetContentType(ev.Type())
switch ev.Type() { switch ev.Type() {
case "m.room.message": case "m.room.message":