From 89428bc3a82ee61dd00a246507ca46dbb77e545f Mon Sep 17 00:00:00 2001 From: Till Faelligen <2353100+S7evinK@users.noreply.github.com> Date: Thu, 23 Mar 2023 15:37:08 +0100 Subject: [PATCH] Add "result at the end" test, let the fulltext determine which words to highlight --- internal/fulltext/bleve.go | 47 +++++++++++++++++++++++++++++++++ internal/fulltext/bleve_test.go | 46 +++++++++++++++++++++----------- internal/fulltext/bleve_wasm.go | 5 ++++ syncapi/routing/search.go | 3 +-- syncapi/routing/search_test.go | 22 ++++++++++++++- 5 files changed, 104 insertions(+), 19 deletions(-) diff --git a/internal/fulltext/bleve.go b/internal/fulltext/bleve.go index dea7c504c..f7412470d 100644 --- a/internal/fulltext/bleve.go +++ b/internal/fulltext/bleve.go @@ -18,6 +18,7 @@ package fulltext import ( + "regexp" "strings" "github.com/blevesearch/bleve/v2" @@ -60,6 +61,7 @@ type Indexer interface { Index(elements ...IndexElement) error Delete(eventID string) error Search(term string, roomIDs, keys []string, limit, from int, orderByStreamPos bool) (*bleve.SearchResult, error) + GetHighlights(result *bleve.SearchResult) []string Close() error } @@ -124,6 +126,47 @@ func (f *Search) Delete(eventID string) error { return f.FulltextIndex.Delete(eventID) } +var highlightMatcher = regexp.MustCompile("(.*?)") + +// GetHighlights extracts the highlights from a SearchResult. +func (f *Search) GetHighlights(result *bleve.SearchResult) []string { + if result == nil { + return []string{} + } + + seenMatches := make(map[string]struct{}) + + for _, hit := range result.Hits { + if hit.Fragments == nil { + continue + } + fragments, ok := hit.Fragments["Content"] + if !ok { + continue + } + for _, x := range fragments { + substringMatches := highlightMatcher.FindAllStringSubmatch(x, -1) + for _, matches := range substringMatches { + for i := range matches { + if i == 0 { // skip first match, this is the complete substring match + continue + } + if _, ok := seenMatches[matches[i]]; ok { + continue + } + seenMatches[matches[i]] = struct{}{} + } + } + } + } + + res := make([]string, 0, len(seenMatches)) + for m := range seenMatches { + res = append(res, m) + } + return res +} + // Search searches the index given a search term, roomIDs and keys. func (f *Search) Search(term string, roomIDs, keys []string, limit, from int, orderByStreamPos bool) (*bleve.SearchResult, error) { qry := bleve.NewConjunctionQuery() @@ -163,6 +206,10 @@ func (f *Search) Search(term string, roomIDs, keys []string, limit, from int, or s.SortBy([]string{"-StreamPosition"}) } + // Highlight some words + s.Highlight = bleve.NewHighlight() + s.Highlight.Fields = []string{"Content"} + return f.FulltextIndex.Search(s) } diff --git a/internal/fulltext/bleve_test.go b/internal/fulltext/bleve_test.go index bd8289d58..a77c23937 100644 --- a/internal/fulltext/bleve_test.go +++ b/internal/fulltext/bleve_test.go @@ -160,14 +160,16 @@ func TestSearch(t *testing.T) { roomIndex []int } tests := []struct { - name string - args args - wantCount int - wantErr bool + name string + args args + wantCount int + wantErr bool + wantHighlights []string }{ { - name: "Can search for many results in one room", - wantCount: 16, + name: "Can search for many results in one room", + wantCount: 16, + wantHighlights: []string{"lorem"}, args: args{ term: "lorem", roomIndex: []int{0}, @@ -175,8 +177,9 @@ func TestSearch(t *testing.T) { }, }, { - name: "Can search for one result in one room", - wantCount: 1, + name: "Can search for one result in one room", + wantCount: 1, + wantHighlights: []string{"lorem"}, args: args{ term: "lorem", roomIndex: []int{16}, @@ -184,8 +187,9 @@ func TestSearch(t *testing.T) { }, }, { - name: "Can search for many results in multiple rooms", - wantCount: 17, + name: "Can search for many results in multiple rooms", + wantCount: 17, + wantHighlights: []string{"lorem"}, args: args{ term: "lorem", roomIndex: []int{0, 16}, @@ -193,8 +197,9 @@ func TestSearch(t *testing.T) { }, }, { - name: "Can search for many results in all rooms, reversed", - wantCount: 30, + name: "Can search for many results in all rooms, reversed", + wantCount: 30, + wantHighlights: []string{"lorem"}, args: args{ term: "lorem", limit: 30, @@ -202,8 +207,9 @@ func TestSearch(t *testing.T) { }, }, { - name: "Can search for specific search room name", - wantCount: 1, + name: "Can search for specific search room name", + wantCount: 1, + wantHighlights: []string{"testing"}, args: args{ term: "testing", roomIndex: []int{}, @@ -212,8 +218,9 @@ func TestSearch(t *testing.T) { }, }, { - name: "Can search for specific search room topic", - wantCount: 1, + name: "Can search for specific search room topic", + wantCount: 1, + wantHighlights: []string{"fulltext"}, args: args{ term: "fulltext", roomIndex: []int{}, @@ -222,6 +229,7 @@ func TestSearch(t *testing.T) { }, }, } + for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { f, ctx := mustOpenIndex(t, "") @@ -238,6 +246,12 @@ func TestSearch(t *testing.T) { t.Errorf("Search() error = %v, wantErr %v", err, tt.wantErr) return } + + highlights := f.GetHighlights(got) + if !reflect.DeepEqual(highlights, tt.wantHighlights) { + t.Errorf("Search() got highligts = %v, want %v", highlights, tt.wantHighlights) + } + if !reflect.DeepEqual(len(got.Hits), tt.wantCount) { t.Errorf("Search() got = %v, want %v", len(got.Hits), tt.wantCount) } diff --git a/internal/fulltext/bleve_wasm.go b/internal/fulltext/bleve_wasm.go index 0053ed8c2..12709900b 100644 --- a/internal/fulltext/bleve_wasm.go +++ b/internal/fulltext/bleve_wasm.go @@ -33,6 +33,7 @@ type Indexer interface { Index(elements ...IndexElement) error Delete(eventID string) error Search(term string, roomIDs, keys []string, limit, from int, orderByStreamPos bool) (SearchResult, error) + GetHighlights(result SearchResult) []string Close() error } @@ -71,3 +72,7 @@ func (f *Search) Delete(eventID string) error { func (f *Search) Search(term string, roomIDs, keys []string, limit, from int, orderByStreamPos bool) (SearchResult, error) { return SearchResult{}, nil } + +func (f *Search) GetHighlights(result SearchResult) []string { + return []string{} +} diff --git a/syncapi/routing/search.go b/syncapi/routing/search.go index a3b6b1a80..69fa52942 100644 --- a/syncapi/routing/search.go +++ b/syncapi/routing/search.go @@ -19,7 +19,6 @@ import ( "net/http" "sort" "strconv" - "strings" "time" "github.com/blevesearch/bleve/v2/search" @@ -243,7 +242,7 @@ func Search(req *http.Request, device *api.Device, syncDB storage.Database, fts Groups: Groups{RoomID: groups}, Results: results, NextBatch: nextBatchResult, - Highlights: strings.Split(searchReq.SearchCategories.RoomEvents.SearchTerm, " "), + Highlights: fts.GetHighlights(result), State: stateForRooms, }, }, diff --git a/syncapi/routing/search_test.go b/syncapi/routing/search_test.go index 8559e5c9f..05479300e 100644 --- a/syncapi/routing/search_test.go +++ b/syncapi/routing/search_test.go @@ -53,7 +53,7 @@ func TestSearch(t *testing.T) { device: &aliceDevice, }, { - name: "searchTerm specified", + name: "searchTerm specified, found at the beginning", wantOK: true, searchReq: SearchRequest{ SearchCategories: SearchCategories{RoomEvents: RoomEvents{SearchTerm: "hello"}}, @@ -61,6 +61,26 @@ func TestSearch(t *testing.T) { device: &aliceDevice, wantResponseCount: 1, }, + { + name: "searchTerm specified, found at the end", + wantOK: true, + searchReq: SearchRequest{ + SearchCategories: SearchCategories{RoomEvents: RoomEvents{SearchTerm: "world3"}}, + }, + device: &aliceDevice, + wantResponseCount: 1, + }, + /* the following would need matchQuery.SetFuzziness(1) in bleve.go + { + name: "searchTerm fuzzy search", + wantOK: true, + searchReq: SearchRequest{ + SearchCategories: SearchCategories{RoomEvents: RoomEvents{SearchTerm: "hell"}}, // this still should find hello world + }, + device: &aliceDevice, + wantResponseCount: 1, + }, + */ { name: "searchTerm specified but no result", wantOK: true,