mirror of
https://github.com/matrix-org/dendrite.git
synced 2026-01-16 18:43:10 -06:00
Add "result at the end" test, let the fulltext determine which words to
highlight
This commit is contained in:
parent
88ca95bf40
commit
89428bc3a8
|
|
@ -18,6 +18,7 @@
|
||||||
package fulltext
|
package fulltext
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/v2"
|
"github.com/blevesearch/bleve/v2"
|
||||||
|
|
@ -60,6 +61,7 @@ type Indexer interface {
|
||||||
Index(elements ...IndexElement) error
|
Index(elements ...IndexElement) error
|
||||||
Delete(eventID string) error
|
Delete(eventID string) error
|
||||||
Search(term string, roomIDs, keys []string, limit, from int, orderByStreamPos bool) (*bleve.SearchResult, error)
|
Search(term string, roomIDs, keys []string, limit, from int, orderByStreamPos bool) (*bleve.SearchResult, error)
|
||||||
|
GetHighlights(result *bleve.SearchResult) []string
|
||||||
Close() error
|
Close() error
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -124,6 +126,47 @@ func (f *Search) Delete(eventID string) error {
|
||||||
return f.FulltextIndex.Delete(eventID)
|
return f.FulltextIndex.Delete(eventID)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var highlightMatcher = regexp.MustCompile("<mark>(.*?)</mark>")
|
||||||
|
|
||||||
|
// GetHighlights extracts the highlights from a SearchResult.
|
||||||
|
func (f *Search) GetHighlights(result *bleve.SearchResult) []string {
|
||||||
|
if result == nil {
|
||||||
|
return []string{}
|
||||||
|
}
|
||||||
|
|
||||||
|
seenMatches := make(map[string]struct{})
|
||||||
|
|
||||||
|
for _, hit := range result.Hits {
|
||||||
|
if hit.Fragments == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
fragments, ok := hit.Fragments["Content"]
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
for _, x := range fragments {
|
||||||
|
substringMatches := highlightMatcher.FindAllStringSubmatch(x, -1)
|
||||||
|
for _, matches := range substringMatches {
|
||||||
|
for i := range matches {
|
||||||
|
if i == 0 { // skip first match, this is the complete substring match
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if _, ok := seenMatches[matches[i]]; ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
seenMatches[matches[i]] = struct{}{}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
res := make([]string, 0, len(seenMatches))
|
||||||
|
for m := range seenMatches {
|
||||||
|
res = append(res, m)
|
||||||
|
}
|
||||||
|
return res
|
||||||
|
}
|
||||||
|
|
||||||
// Search searches the index given a search term, roomIDs and keys.
|
// Search searches the index given a search term, roomIDs and keys.
|
||||||
func (f *Search) Search(term string, roomIDs, keys []string, limit, from int, orderByStreamPos bool) (*bleve.SearchResult, error) {
|
func (f *Search) Search(term string, roomIDs, keys []string, limit, from int, orderByStreamPos bool) (*bleve.SearchResult, error) {
|
||||||
qry := bleve.NewConjunctionQuery()
|
qry := bleve.NewConjunctionQuery()
|
||||||
|
|
@ -163,6 +206,10 @@ func (f *Search) Search(term string, roomIDs, keys []string, limit, from int, or
|
||||||
s.SortBy([]string{"-StreamPosition"})
|
s.SortBy([]string{"-StreamPosition"})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Highlight some words
|
||||||
|
s.Highlight = bleve.NewHighlight()
|
||||||
|
s.Highlight.Fields = []string{"Content"}
|
||||||
|
|
||||||
return f.FulltextIndex.Search(s)
|
return f.FulltextIndex.Search(s)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -164,10 +164,12 @@ func TestSearch(t *testing.T) {
|
||||||
args args
|
args args
|
||||||
wantCount int
|
wantCount int
|
||||||
wantErr bool
|
wantErr bool
|
||||||
|
wantHighlights []string
|
||||||
}{
|
}{
|
||||||
{
|
{
|
||||||
name: "Can search for many results in one room",
|
name: "Can search for many results in one room",
|
||||||
wantCount: 16,
|
wantCount: 16,
|
||||||
|
wantHighlights: []string{"lorem"},
|
||||||
args: args{
|
args: args{
|
||||||
term: "lorem",
|
term: "lorem",
|
||||||
roomIndex: []int{0},
|
roomIndex: []int{0},
|
||||||
|
|
@ -177,6 +179,7 @@ func TestSearch(t *testing.T) {
|
||||||
{
|
{
|
||||||
name: "Can search for one result in one room",
|
name: "Can search for one result in one room",
|
||||||
wantCount: 1,
|
wantCount: 1,
|
||||||
|
wantHighlights: []string{"lorem"},
|
||||||
args: args{
|
args: args{
|
||||||
term: "lorem",
|
term: "lorem",
|
||||||
roomIndex: []int{16},
|
roomIndex: []int{16},
|
||||||
|
|
@ -186,6 +189,7 @@ func TestSearch(t *testing.T) {
|
||||||
{
|
{
|
||||||
name: "Can search for many results in multiple rooms",
|
name: "Can search for many results in multiple rooms",
|
||||||
wantCount: 17,
|
wantCount: 17,
|
||||||
|
wantHighlights: []string{"lorem"},
|
||||||
args: args{
|
args: args{
|
||||||
term: "lorem",
|
term: "lorem",
|
||||||
roomIndex: []int{0, 16},
|
roomIndex: []int{0, 16},
|
||||||
|
|
@ -195,6 +199,7 @@ func TestSearch(t *testing.T) {
|
||||||
{
|
{
|
||||||
name: "Can search for many results in all rooms, reversed",
|
name: "Can search for many results in all rooms, reversed",
|
||||||
wantCount: 30,
|
wantCount: 30,
|
||||||
|
wantHighlights: []string{"lorem"},
|
||||||
args: args{
|
args: args{
|
||||||
term: "lorem",
|
term: "lorem",
|
||||||
limit: 30,
|
limit: 30,
|
||||||
|
|
@ -204,6 +209,7 @@ func TestSearch(t *testing.T) {
|
||||||
{
|
{
|
||||||
name: "Can search for specific search room name",
|
name: "Can search for specific search room name",
|
||||||
wantCount: 1,
|
wantCount: 1,
|
||||||
|
wantHighlights: []string{"testing"},
|
||||||
args: args{
|
args: args{
|
||||||
term: "testing",
|
term: "testing",
|
||||||
roomIndex: []int{},
|
roomIndex: []int{},
|
||||||
|
|
@ -214,6 +220,7 @@ func TestSearch(t *testing.T) {
|
||||||
{
|
{
|
||||||
name: "Can search for specific search room topic",
|
name: "Can search for specific search room topic",
|
||||||
wantCount: 1,
|
wantCount: 1,
|
||||||
|
wantHighlights: []string{"fulltext"},
|
||||||
args: args{
|
args: args{
|
||||||
term: "fulltext",
|
term: "fulltext",
|
||||||
roomIndex: []int{},
|
roomIndex: []int{},
|
||||||
|
|
@ -222,6 +229,7 @@ func TestSearch(t *testing.T) {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, tt := range tests {
|
for _, tt := range tests {
|
||||||
t.Run(tt.name, func(t *testing.T) {
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
f, ctx := mustOpenIndex(t, "")
|
f, ctx := mustOpenIndex(t, "")
|
||||||
|
|
@ -238,6 +246,12 @@ func TestSearch(t *testing.T) {
|
||||||
t.Errorf("Search() error = %v, wantErr %v", err, tt.wantErr)
|
t.Errorf("Search() error = %v, wantErr %v", err, tt.wantErr)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
highlights := f.GetHighlights(got)
|
||||||
|
if !reflect.DeepEqual(highlights, tt.wantHighlights) {
|
||||||
|
t.Errorf("Search() got highligts = %v, want %v", highlights, tt.wantHighlights)
|
||||||
|
}
|
||||||
|
|
||||||
if !reflect.DeepEqual(len(got.Hits), tt.wantCount) {
|
if !reflect.DeepEqual(len(got.Hits), tt.wantCount) {
|
||||||
t.Errorf("Search() got = %v, want %v", len(got.Hits), tt.wantCount)
|
t.Errorf("Search() got = %v, want %v", len(got.Hits), tt.wantCount)
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -33,6 +33,7 @@ type Indexer interface {
|
||||||
Index(elements ...IndexElement) error
|
Index(elements ...IndexElement) error
|
||||||
Delete(eventID string) error
|
Delete(eventID string) error
|
||||||
Search(term string, roomIDs, keys []string, limit, from int, orderByStreamPos bool) (SearchResult, error)
|
Search(term string, roomIDs, keys []string, limit, from int, orderByStreamPos bool) (SearchResult, error)
|
||||||
|
GetHighlights(result SearchResult) []string
|
||||||
Close() error
|
Close() error
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -71,3 +72,7 @@ func (f *Search) Delete(eventID string) error {
|
||||||
func (f *Search) Search(term string, roomIDs, keys []string, limit, from int, orderByStreamPos bool) (SearchResult, error) {
|
func (f *Search) Search(term string, roomIDs, keys []string, limit, from int, orderByStreamPos bool) (SearchResult, error) {
|
||||||
return SearchResult{}, nil
|
return SearchResult{}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (f *Search) GetHighlights(result SearchResult) []string {
|
||||||
|
return []string{}
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -19,7 +19,6 @@ import (
|
||||||
"net/http"
|
"net/http"
|
||||||
"sort"
|
"sort"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/v2/search"
|
"github.com/blevesearch/bleve/v2/search"
|
||||||
|
|
@ -243,7 +242,7 @@ func Search(req *http.Request, device *api.Device, syncDB storage.Database, fts
|
||||||
Groups: Groups{RoomID: groups},
|
Groups: Groups{RoomID: groups},
|
||||||
Results: results,
|
Results: results,
|
||||||
NextBatch: nextBatchResult,
|
NextBatch: nextBatchResult,
|
||||||
Highlights: strings.Split(searchReq.SearchCategories.RoomEvents.SearchTerm, " "),
|
Highlights: fts.GetHighlights(result),
|
||||||
State: stateForRooms,
|
State: stateForRooms,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|
|
||||||
|
|
@ -53,7 +53,7 @@ func TestSearch(t *testing.T) {
|
||||||
device: &aliceDevice,
|
device: &aliceDevice,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "searchTerm specified",
|
name: "searchTerm specified, found at the beginning",
|
||||||
wantOK: true,
|
wantOK: true,
|
||||||
searchReq: SearchRequest{
|
searchReq: SearchRequest{
|
||||||
SearchCategories: SearchCategories{RoomEvents: RoomEvents{SearchTerm: "hello"}},
|
SearchCategories: SearchCategories{RoomEvents: RoomEvents{SearchTerm: "hello"}},
|
||||||
|
|
@ -61,6 +61,26 @@ func TestSearch(t *testing.T) {
|
||||||
device: &aliceDevice,
|
device: &aliceDevice,
|
||||||
wantResponseCount: 1,
|
wantResponseCount: 1,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "searchTerm specified, found at the end",
|
||||||
|
wantOK: true,
|
||||||
|
searchReq: SearchRequest{
|
||||||
|
SearchCategories: SearchCategories{RoomEvents: RoomEvents{SearchTerm: "world3"}},
|
||||||
|
},
|
||||||
|
device: &aliceDevice,
|
||||||
|
wantResponseCount: 1,
|
||||||
|
},
|
||||||
|
/* the following would need matchQuery.SetFuzziness(1) in bleve.go
|
||||||
|
{
|
||||||
|
name: "searchTerm fuzzy search",
|
||||||
|
wantOK: true,
|
||||||
|
searchReq: SearchRequest{
|
||||||
|
SearchCategories: SearchCategories{RoomEvents: RoomEvents{SearchTerm: "hell"}}, // this still should find hello world
|
||||||
|
},
|
||||||
|
device: &aliceDevice,
|
||||||
|
wantResponseCount: 1,
|
||||||
|
},
|
||||||
|
*/
|
||||||
{
|
{
|
||||||
name: "searchTerm specified but no result",
|
name: "searchTerm specified but no result",
|
||||||
wantOK: true,
|
wantOK: true,
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue