Add "result at the end" test, let the fulltext determine which words to

highlight
This commit is contained in:
Till Faelligen 2023-03-23 15:37:08 +01:00
parent 88ca95bf40
commit 89428bc3a8
No known key found for this signature in database
GPG key ID: ACCDC9606D472758
5 changed files with 104 additions and 19 deletions

View file

@ -18,6 +18,7 @@
package fulltext package fulltext
import ( import (
"regexp"
"strings" "strings"
"github.com/blevesearch/bleve/v2" "github.com/blevesearch/bleve/v2"
@ -60,6 +61,7 @@ type Indexer interface {
Index(elements ...IndexElement) error Index(elements ...IndexElement) error
Delete(eventID string) error Delete(eventID string) error
Search(term string, roomIDs, keys []string, limit, from int, orderByStreamPos bool) (*bleve.SearchResult, error) Search(term string, roomIDs, keys []string, limit, from int, orderByStreamPos bool) (*bleve.SearchResult, error)
GetHighlights(result *bleve.SearchResult) []string
Close() error Close() error
} }
@ -124,6 +126,47 @@ func (f *Search) Delete(eventID string) error {
return f.FulltextIndex.Delete(eventID) return f.FulltextIndex.Delete(eventID)
} }
var highlightMatcher = regexp.MustCompile("<mark>(.*?)</mark>")
// GetHighlights extracts the highlights from a SearchResult.
func (f *Search) GetHighlights(result *bleve.SearchResult) []string {
if result == nil {
return []string{}
}
seenMatches := make(map[string]struct{})
for _, hit := range result.Hits {
if hit.Fragments == nil {
continue
}
fragments, ok := hit.Fragments["Content"]
if !ok {
continue
}
for _, x := range fragments {
substringMatches := highlightMatcher.FindAllStringSubmatch(x, -1)
for _, matches := range substringMatches {
for i := range matches {
if i == 0 { // skip first match, this is the complete substring match
continue
}
if _, ok := seenMatches[matches[i]]; ok {
continue
}
seenMatches[matches[i]] = struct{}{}
}
}
}
}
res := make([]string, 0, len(seenMatches))
for m := range seenMatches {
res = append(res, m)
}
return res
}
// Search searches the index given a search term, roomIDs and keys. // Search searches the index given a search term, roomIDs and keys.
func (f *Search) Search(term string, roomIDs, keys []string, limit, from int, orderByStreamPos bool) (*bleve.SearchResult, error) { func (f *Search) Search(term string, roomIDs, keys []string, limit, from int, orderByStreamPos bool) (*bleve.SearchResult, error) {
qry := bleve.NewConjunctionQuery() qry := bleve.NewConjunctionQuery()
@ -163,6 +206,10 @@ func (f *Search) Search(term string, roomIDs, keys []string, limit, from int, or
s.SortBy([]string{"-StreamPosition"}) s.SortBy([]string{"-StreamPosition"})
} }
// Highlight some words
s.Highlight = bleve.NewHighlight()
s.Highlight.Fields = []string{"Content"}
return f.FulltextIndex.Search(s) return f.FulltextIndex.Search(s)
} }

View file

@ -160,14 +160,16 @@ func TestSearch(t *testing.T) {
roomIndex []int roomIndex []int
} }
tests := []struct { tests := []struct {
name string name string
args args args args
wantCount int wantCount int
wantErr bool wantErr bool
wantHighlights []string
}{ }{
{ {
name: "Can search for many results in one room", name: "Can search for many results in one room",
wantCount: 16, wantCount: 16,
wantHighlights: []string{"lorem"},
args: args{ args: args{
term: "lorem", term: "lorem",
roomIndex: []int{0}, roomIndex: []int{0},
@ -175,8 +177,9 @@ func TestSearch(t *testing.T) {
}, },
}, },
{ {
name: "Can search for one result in one room", name: "Can search for one result in one room",
wantCount: 1, wantCount: 1,
wantHighlights: []string{"lorem"},
args: args{ args: args{
term: "lorem", term: "lorem",
roomIndex: []int{16}, roomIndex: []int{16},
@ -184,8 +187,9 @@ func TestSearch(t *testing.T) {
}, },
}, },
{ {
name: "Can search for many results in multiple rooms", name: "Can search for many results in multiple rooms",
wantCount: 17, wantCount: 17,
wantHighlights: []string{"lorem"},
args: args{ args: args{
term: "lorem", term: "lorem",
roomIndex: []int{0, 16}, roomIndex: []int{0, 16},
@ -193,8 +197,9 @@ func TestSearch(t *testing.T) {
}, },
}, },
{ {
name: "Can search for many results in all rooms, reversed", name: "Can search for many results in all rooms, reversed",
wantCount: 30, wantCount: 30,
wantHighlights: []string{"lorem"},
args: args{ args: args{
term: "lorem", term: "lorem",
limit: 30, limit: 30,
@ -202,8 +207,9 @@ func TestSearch(t *testing.T) {
}, },
}, },
{ {
name: "Can search for specific search room name", name: "Can search for specific search room name",
wantCount: 1, wantCount: 1,
wantHighlights: []string{"testing"},
args: args{ args: args{
term: "testing", term: "testing",
roomIndex: []int{}, roomIndex: []int{},
@ -212,8 +218,9 @@ func TestSearch(t *testing.T) {
}, },
}, },
{ {
name: "Can search for specific search room topic", name: "Can search for specific search room topic",
wantCount: 1, wantCount: 1,
wantHighlights: []string{"fulltext"},
args: args{ args: args{
term: "fulltext", term: "fulltext",
roomIndex: []int{}, roomIndex: []int{},
@ -222,6 +229,7 @@ func TestSearch(t *testing.T) {
}, },
}, },
} }
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
f, ctx := mustOpenIndex(t, "") f, ctx := mustOpenIndex(t, "")
@ -238,6 +246,12 @@ func TestSearch(t *testing.T) {
t.Errorf("Search() error = %v, wantErr %v", err, tt.wantErr) t.Errorf("Search() error = %v, wantErr %v", err, tt.wantErr)
return return
} }
highlights := f.GetHighlights(got)
if !reflect.DeepEqual(highlights, tt.wantHighlights) {
t.Errorf("Search() got highligts = %v, want %v", highlights, tt.wantHighlights)
}
if !reflect.DeepEqual(len(got.Hits), tt.wantCount) { if !reflect.DeepEqual(len(got.Hits), tt.wantCount) {
t.Errorf("Search() got = %v, want %v", len(got.Hits), tt.wantCount) t.Errorf("Search() got = %v, want %v", len(got.Hits), tt.wantCount)
} }

View file

@ -33,6 +33,7 @@ type Indexer interface {
Index(elements ...IndexElement) error Index(elements ...IndexElement) error
Delete(eventID string) error Delete(eventID string) error
Search(term string, roomIDs, keys []string, limit, from int, orderByStreamPos bool) (SearchResult, error) Search(term string, roomIDs, keys []string, limit, from int, orderByStreamPos bool) (SearchResult, error)
GetHighlights(result SearchResult) []string
Close() error Close() error
} }
@ -71,3 +72,7 @@ func (f *Search) Delete(eventID string) error {
func (f *Search) Search(term string, roomIDs, keys []string, limit, from int, orderByStreamPos bool) (SearchResult, error) { func (f *Search) Search(term string, roomIDs, keys []string, limit, from int, orderByStreamPos bool) (SearchResult, error) {
return SearchResult{}, nil return SearchResult{}, nil
} }
func (f *Search) GetHighlights(result SearchResult) []string {
return []string{}
}

View file

@ -19,7 +19,6 @@ import (
"net/http" "net/http"
"sort" "sort"
"strconv" "strconv"
"strings"
"time" "time"
"github.com/blevesearch/bleve/v2/search" "github.com/blevesearch/bleve/v2/search"
@ -243,7 +242,7 @@ func Search(req *http.Request, device *api.Device, syncDB storage.Database, fts
Groups: Groups{RoomID: groups}, Groups: Groups{RoomID: groups},
Results: results, Results: results,
NextBatch: nextBatchResult, NextBatch: nextBatchResult,
Highlights: strings.Split(searchReq.SearchCategories.RoomEvents.SearchTerm, " "), Highlights: fts.GetHighlights(result),
State: stateForRooms, State: stateForRooms,
}, },
}, },

View file

@ -53,7 +53,7 @@ func TestSearch(t *testing.T) {
device: &aliceDevice, device: &aliceDevice,
}, },
{ {
name: "searchTerm specified", name: "searchTerm specified, found at the beginning",
wantOK: true, wantOK: true,
searchReq: SearchRequest{ searchReq: SearchRequest{
SearchCategories: SearchCategories{RoomEvents: RoomEvents{SearchTerm: "hello"}}, SearchCategories: SearchCategories{RoomEvents: RoomEvents{SearchTerm: "hello"}},
@ -61,6 +61,26 @@ func TestSearch(t *testing.T) {
device: &aliceDevice, device: &aliceDevice,
wantResponseCount: 1, wantResponseCount: 1,
}, },
{
name: "searchTerm specified, found at the end",
wantOK: true,
searchReq: SearchRequest{
SearchCategories: SearchCategories{RoomEvents: RoomEvents{SearchTerm: "world3"}},
},
device: &aliceDevice,
wantResponseCount: 1,
},
/* the following would need matchQuery.SetFuzziness(1) in bleve.go
{
name: "searchTerm fuzzy search",
wantOK: true,
searchReq: SearchRequest{
SearchCategories: SearchCategories{RoomEvents: RoomEvents{SearchTerm: "hell"}}, // this still should find hello world
},
device: &aliceDevice,
wantResponseCount: 1,
},
*/
{ {
name: "searchTerm specified but no result", name: "searchTerm specified but no result",
wantOK: true, wantOK: true,