Add "result at the end" test, let the fulltext determine which words to

highlight
This commit is contained in:
Till Faelligen 2023-03-23 15:37:08 +01:00
parent 88ca95bf40
commit 89428bc3a8
No known key found for this signature in database
GPG key ID: ACCDC9606D472758
5 changed files with 104 additions and 19 deletions

View file

@ -18,6 +18,7 @@
package fulltext
import (
"regexp"
"strings"
"github.com/blevesearch/bleve/v2"
@ -60,6 +61,7 @@ type Indexer interface {
Index(elements ...IndexElement) error
Delete(eventID string) error
Search(term string, roomIDs, keys []string, limit, from int, orderByStreamPos bool) (*bleve.SearchResult, error)
GetHighlights(result *bleve.SearchResult) []string
Close() error
}
@ -124,6 +126,47 @@ func (f *Search) Delete(eventID string) error {
return f.FulltextIndex.Delete(eventID)
}
var highlightMatcher = regexp.MustCompile("<mark>(.*?)</mark>")
// GetHighlights extracts the highlights from a SearchResult.
func (f *Search) GetHighlights(result *bleve.SearchResult) []string {
if result == nil {
return []string{}
}
seenMatches := make(map[string]struct{})
for _, hit := range result.Hits {
if hit.Fragments == nil {
continue
}
fragments, ok := hit.Fragments["Content"]
if !ok {
continue
}
for _, x := range fragments {
substringMatches := highlightMatcher.FindAllStringSubmatch(x, -1)
for _, matches := range substringMatches {
for i := range matches {
if i == 0 { // skip first match, this is the complete substring match
continue
}
if _, ok := seenMatches[matches[i]]; ok {
continue
}
seenMatches[matches[i]] = struct{}{}
}
}
}
}
res := make([]string, 0, len(seenMatches))
for m := range seenMatches {
res = append(res, m)
}
return res
}
// Search searches the index given a search term, roomIDs and keys.
func (f *Search) Search(term string, roomIDs, keys []string, limit, from int, orderByStreamPos bool) (*bleve.SearchResult, error) {
qry := bleve.NewConjunctionQuery()
@ -163,6 +206,10 @@ func (f *Search) Search(term string, roomIDs, keys []string, limit, from int, or
s.SortBy([]string{"-StreamPosition"})
}
// Highlight some words
s.Highlight = bleve.NewHighlight()
s.Highlight.Fields = []string{"Content"}
return f.FulltextIndex.Search(s)
}

View file

@ -164,10 +164,12 @@ func TestSearch(t *testing.T) {
args args
wantCount int
wantErr bool
wantHighlights []string
}{
{
name: "Can search for many results in one room",
wantCount: 16,
wantHighlights: []string{"lorem"},
args: args{
term: "lorem",
roomIndex: []int{0},
@ -177,6 +179,7 @@ func TestSearch(t *testing.T) {
{
name: "Can search for one result in one room",
wantCount: 1,
wantHighlights: []string{"lorem"},
args: args{
term: "lorem",
roomIndex: []int{16},
@ -186,6 +189,7 @@ func TestSearch(t *testing.T) {
{
name: "Can search for many results in multiple rooms",
wantCount: 17,
wantHighlights: []string{"lorem"},
args: args{
term: "lorem",
roomIndex: []int{0, 16},
@ -195,6 +199,7 @@ func TestSearch(t *testing.T) {
{
name: "Can search for many results in all rooms, reversed",
wantCount: 30,
wantHighlights: []string{"lorem"},
args: args{
term: "lorem",
limit: 30,
@ -204,6 +209,7 @@ func TestSearch(t *testing.T) {
{
name: "Can search for specific search room name",
wantCount: 1,
wantHighlights: []string{"testing"},
args: args{
term: "testing",
roomIndex: []int{},
@ -214,6 +220,7 @@ func TestSearch(t *testing.T) {
{
name: "Can search for specific search room topic",
wantCount: 1,
wantHighlights: []string{"fulltext"},
args: args{
term: "fulltext",
roomIndex: []int{},
@ -222,6 +229,7 @@ func TestSearch(t *testing.T) {
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
f, ctx := mustOpenIndex(t, "")
@ -238,6 +246,12 @@ func TestSearch(t *testing.T) {
t.Errorf("Search() error = %v, wantErr %v", err, tt.wantErr)
return
}
highlights := f.GetHighlights(got)
if !reflect.DeepEqual(highlights, tt.wantHighlights) {
t.Errorf("Search() got highligts = %v, want %v", highlights, tt.wantHighlights)
}
if !reflect.DeepEqual(len(got.Hits), tt.wantCount) {
t.Errorf("Search() got = %v, want %v", len(got.Hits), tt.wantCount)
}

View file

@ -33,6 +33,7 @@ type Indexer interface {
Index(elements ...IndexElement) error
Delete(eventID string) error
Search(term string, roomIDs, keys []string, limit, from int, orderByStreamPos bool) (SearchResult, error)
GetHighlights(result SearchResult) []string
Close() error
}
@ -71,3 +72,7 @@ func (f *Search) Delete(eventID string) error {
func (f *Search) Search(term string, roomIDs, keys []string, limit, from int, orderByStreamPos bool) (SearchResult, error) {
return SearchResult{}, nil
}
func (f *Search) GetHighlights(result SearchResult) []string {
return []string{}
}

View file

@ -19,7 +19,6 @@ import (
"net/http"
"sort"
"strconv"
"strings"
"time"
"github.com/blevesearch/bleve/v2/search"
@ -243,7 +242,7 @@ func Search(req *http.Request, device *api.Device, syncDB storage.Database, fts
Groups: Groups{RoomID: groups},
Results: results,
NextBatch: nextBatchResult,
Highlights: strings.Split(searchReq.SearchCategories.RoomEvents.SearchTerm, " "),
Highlights: fts.GetHighlights(result),
State: stateForRooms,
},
},

View file

@ -53,7 +53,7 @@ func TestSearch(t *testing.T) {
device: &aliceDevice,
},
{
name: "searchTerm specified",
name: "searchTerm specified, found at the beginning",
wantOK: true,
searchReq: SearchRequest{
SearchCategories: SearchCategories{RoomEvents: RoomEvents{SearchTerm: "hello"}},
@ -61,6 +61,26 @@ func TestSearch(t *testing.T) {
device: &aliceDevice,
wantResponseCount: 1,
},
{
name: "searchTerm specified, found at the end",
wantOK: true,
searchReq: SearchRequest{
SearchCategories: SearchCategories{RoomEvents: RoomEvents{SearchTerm: "world3"}},
},
device: &aliceDevice,
wantResponseCount: 1,
},
/* the following would need matchQuery.SetFuzziness(1) in bleve.go
{
name: "searchTerm fuzzy search",
wantOK: true,
searchReq: SearchRequest{
SearchCategories: SearchCategories{RoomEvents: RoomEvents{SearchTerm: "hell"}}, // this still should find hello world
},
device: &aliceDevice,
wantResponseCount: 1,
},
*/
{
name: "searchTerm specified but no result",
wantOK: true,