mirror of
https://github.com/matrix-org/dendrite.git
synced 2026-01-16 10:33:11 -06:00
Add "result at the end" test, let the fulltext determine which words to
highlight
This commit is contained in:
parent
88ca95bf40
commit
89428bc3a8
|
|
@ -18,6 +18,7 @@
|
|||
package fulltext
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/blevesearch/bleve/v2"
|
||||
|
|
@ -60,6 +61,7 @@ type Indexer interface {
|
|||
Index(elements ...IndexElement) error
|
||||
Delete(eventID string) error
|
||||
Search(term string, roomIDs, keys []string, limit, from int, orderByStreamPos bool) (*bleve.SearchResult, error)
|
||||
GetHighlights(result *bleve.SearchResult) []string
|
||||
Close() error
|
||||
}
|
||||
|
||||
|
|
@ -124,6 +126,47 @@ func (f *Search) Delete(eventID string) error {
|
|||
return f.FulltextIndex.Delete(eventID)
|
||||
}
|
||||
|
||||
var highlightMatcher = regexp.MustCompile("<mark>(.*?)</mark>")
|
||||
|
||||
// GetHighlights extracts the highlights from a SearchResult.
|
||||
func (f *Search) GetHighlights(result *bleve.SearchResult) []string {
|
||||
if result == nil {
|
||||
return []string{}
|
||||
}
|
||||
|
||||
seenMatches := make(map[string]struct{})
|
||||
|
||||
for _, hit := range result.Hits {
|
||||
if hit.Fragments == nil {
|
||||
continue
|
||||
}
|
||||
fragments, ok := hit.Fragments["Content"]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
for _, x := range fragments {
|
||||
substringMatches := highlightMatcher.FindAllStringSubmatch(x, -1)
|
||||
for _, matches := range substringMatches {
|
||||
for i := range matches {
|
||||
if i == 0 { // skip first match, this is the complete substring match
|
||||
continue
|
||||
}
|
||||
if _, ok := seenMatches[matches[i]]; ok {
|
||||
continue
|
||||
}
|
||||
seenMatches[matches[i]] = struct{}{}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
res := make([]string, 0, len(seenMatches))
|
||||
for m := range seenMatches {
|
||||
res = append(res, m)
|
||||
}
|
||||
return res
|
||||
}
|
||||
|
||||
// Search searches the index given a search term, roomIDs and keys.
|
||||
func (f *Search) Search(term string, roomIDs, keys []string, limit, from int, orderByStreamPos bool) (*bleve.SearchResult, error) {
|
||||
qry := bleve.NewConjunctionQuery()
|
||||
|
|
@ -163,6 +206,10 @@ func (f *Search) Search(term string, roomIDs, keys []string, limit, from int, or
|
|||
s.SortBy([]string{"-StreamPosition"})
|
||||
}
|
||||
|
||||
// Highlight some words
|
||||
s.Highlight = bleve.NewHighlight()
|
||||
s.Highlight.Fields = []string{"Content"}
|
||||
|
||||
return f.FulltextIndex.Search(s)
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -164,10 +164,12 @@ func TestSearch(t *testing.T) {
|
|||
args args
|
||||
wantCount int
|
||||
wantErr bool
|
||||
wantHighlights []string
|
||||
}{
|
||||
{
|
||||
name: "Can search for many results in one room",
|
||||
wantCount: 16,
|
||||
wantHighlights: []string{"lorem"},
|
||||
args: args{
|
||||
term: "lorem",
|
||||
roomIndex: []int{0},
|
||||
|
|
@ -177,6 +179,7 @@ func TestSearch(t *testing.T) {
|
|||
{
|
||||
name: "Can search for one result in one room",
|
||||
wantCount: 1,
|
||||
wantHighlights: []string{"lorem"},
|
||||
args: args{
|
||||
term: "lorem",
|
||||
roomIndex: []int{16},
|
||||
|
|
@ -186,6 +189,7 @@ func TestSearch(t *testing.T) {
|
|||
{
|
||||
name: "Can search for many results in multiple rooms",
|
||||
wantCount: 17,
|
||||
wantHighlights: []string{"lorem"},
|
||||
args: args{
|
||||
term: "lorem",
|
||||
roomIndex: []int{0, 16},
|
||||
|
|
@ -195,6 +199,7 @@ func TestSearch(t *testing.T) {
|
|||
{
|
||||
name: "Can search for many results in all rooms, reversed",
|
||||
wantCount: 30,
|
||||
wantHighlights: []string{"lorem"},
|
||||
args: args{
|
||||
term: "lorem",
|
||||
limit: 30,
|
||||
|
|
@ -204,6 +209,7 @@ func TestSearch(t *testing.T) {
|
|||
{
|
||||
name: "Can search for specific search room name",
|
||||
wantCount: 1,
|
||||
wantHighlights: []string{"testing"},
|
||||
args: args{
|
||||
term: "testing",
|
||||
roomIndex: []int{},
|
||||
|
|
@ -214,6 +220,7 @@ func TestSearch(t *testing.T) {
|
|||
{
|
||||
name: "Can search for specific search room topic",
|
||||
wantCount: 1,
|
||||
wantHighlights: []string{"fulltext"},
|
||||
args: args{
|
||||
term: "fulltext",
|
||||
roomIndex: []int{},
|
||||
|
|
@ -222,6 +229,7 @@ func TestSearch(t *testing.T) {
|
|||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
f, ctx := mustOpenIndex(t, "")
|
||||
|
|
@ -238,6 +246,12 @@ func TestSearch(t *testing.T) {
|
|||
t.Errorf("Search() error = %v, wantErr %v", err, tt.wantErr)
|
||||
return
|
||||
}
|
||||
|
||||
highlights := f.GetHighlights(got)
|
||||
if !reflect.DeepEqual(highlights, tt.wantHighlights) {
|
||||
t.Errorf("Search() got highligts = %v, want %v", highlights, tt.wantHighlights)
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(len(got.Hits), tt.wantCount) {
|
||||
t.Errorf("Search() got = %v, want %v", len(got.Hits), tt.wantCount)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ type Indexer interface {
|
|||
Index(elements ...IndexElement) error
|
||||
Delete(eventID string) error
|
||||
Search(term string, roomIDs, keys []string, limit, from int, orderByStreamPos bool) (SearchResult, error)
|
||||
GetHighlights(result SearchResult) []string
|
||||
Close() error
|
||||
}
|
||||
|
||||
|
|
@ -71,3 +72,7 @@ func (f *Search) Delete(eventID string) error {
|
|||
func (f *Search) Search(term string, roomIDs, keys []string, limit, from int, orderByStreamPos bool) (SearchResult, error) {
|
||||
return SearchResult{}, nil
|
||||
}
|
||||
|
||||
func (f *Search) GetHighlights(result SearchResult) []string {
|
||||
return []string{}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -19,7 +19,6 @@ import (
|
|||
"net/http"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
|
|
@ -243,7 +242,7 @@ func Search(req *http.Request, device *api.Device, syncDB storage.Database, fts
|
|||
Groups: Groups{RoomID: groups},
|
||||
Results: results,
|
||||
NextBatch: nextBatchResult,
|
||||
Highlights: strings.Split(searchReq.SearchCategories.RoomEvents.SearchTerm, " "),
|
||||
Highlights: fts.GetHighlights(result),
|
||||
State: stateForRooms,
|
||||
},
|
||||
},
|
||||
|
|
|
|||
|
|
@ -53,7 +53,7 @@ func TestSearch(t *testing.T) {
|
|||
device: &aliceDevice,
|
||||
},
|
||||
{
|
||||
name: "searchTerm specified",
|
||||
name: "searchTerm specified, found at the beginning",
|
||||
wantOK: true,
|
||||
searchReq: SearchRequest{
|
||||
SearchCategories: SearchCategories{RoomEvents: RoomEvents{SearchTerm: "hello"}},
|
||||
|
|
@ -61,6 +61,26 @@ func TestSearch(t *testing.T) {
|
|||
device: &aliceDevice,
|
||||
wantResponseCount: 1,
|
||||
},
|
||||
{
|
||||
name: "searchTerm specified, found at the end",
|
||||
wantOK: true,
|
||||
searchReq: SearchRequest{
|
||||
SearchCategories: SearchCategories{RoomEvents: RoomEvents{SearchTerm: "world3"}},
|
||||
},
|
||||
device: &aliceDevice,
|
||||
wantResponseCount: 1,
|
||||
},
|
||||
/* the following would need matchQuery.SetFuzziness(1) in bleve.go
|
||||
{
|
||||
name: "searchTerm fuzzy search",
|
||||
wantOK: true,
|
||||
searchReq: SearchRequest{
|
||||
SearchCategories: SearchCategories{RoomEvents: RoomEvents{SearchTerm: "hell"}}, // this still should find hello world
|
||||
},
|
||||
device: &aliceDevice,
|
||||
wantResponseCount: 1,
|
||||
},
|
||||
*/
|
||||
{
|
||||
name: "searchTerm specified but no result",
|
||||
wantOK: true,
|
||||
|
|
|
|||
Loading…
Reference in a new issue