mirror of
https://github.com/matrix-org/dendrite.git
synced 2026-01-04 20:53:09 -06:00
Add fulltext search
This commit is contained in:
parent
a53c9300aa
commit
e5f49812a7
1
go.mod
1
go.mod
|
|
@ -11,6 +11,7 @@ require (
|
|||
github.com/HdrHistogram/hdrhistogram-go v1.1.2 // indirect
|
||||
github.com/MFAshby/stdemuxerhook v1.0.0
|
||||
github.com/Masterminds/semver/v3 v3.1.1
|
||||
github.com/blevesearch/bleve/v2 v2.3.2
|
||||
github.com/codeclysm/extract v2.2.0+incompatible
|
||||
github.com/containerd/containerd v1.6.2 // indirect
|
||||
github.com/docker/docker v20.10.14+incompatible
|
||||
|
|
|
|||
47
go.sum
47
go.sum
|
|
@ -114,6 +114,8 @@ github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbt
|
|||
github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
|
||||
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
|
||||
github.com/RoaringBitmap/roaring v0.4.7/go.mod h1:8khRDP4HmeXns4xIj9oGrKSz7XTQiJx2zgh7AcNke4w=
|
||||
github.com/RoaringBitmap/roaring v0.9.4 h1:ckvZSX5gwCRaJYBNe7syNawCU5oruY9gQmjXlp4riwo=
|
||||
github.com/RoaringBitmap/roaring v0.9.4/go.mod h1:icnadbWcNyfEHlYdr+tDlOTih1Bf/h+rzPpv4sbomAA=
|
||||
github.com/RyanCarrier/dijkstra v1.0.0/go.mod h1:5agGUBNEtUAGIANmbw09fuO3a2htPEkc1jNH01qxCWA=
|
||||
github.com/RyanCarrier/dijkstra-1 v0.0.0-20170512020943-0e5801a26345/go.mod h1:OK4EvWJ441LQqGzed5NGB6vKBAE34n3z7iayPcEwr30=
|
||||
github.com/Shopify/goreferrer v0.0.0-20181106222321-ec9c9a553398/go.mod h1:a1uqRtAwp2Xwc6WNPJEufxJ7fx3npB4UV/JOLmbu5I0=
|
||||
|
|
@ -158,10 +160,45 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
|
|||
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
|
||||
github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs=
|
||||
github.com/bitly/go-simplejson v0.5.0/go.mod h1:cXHtHw4XUPsvGaxgjIAn8PhEWG9NfngEKAMDJEczWVA=
|
||||
github.com/bits-and-blooms/bitset v1.2.0 h1:Kn4yilvwNtMACtf1eYDlG8H77R07mZSPbMjLyS07ChA=
|
||||
github.com/bits-and-blooms/bitset v1.2.0/go.mod h1:gIdJ4wp64HaoK2YrL1Q5/N7Y16edYb8uY+O0FJTyyDA=
|
||||
github.com/bketelsen/crypt v0.0.3-0.20200106085610-5cbc8cc4026c/go.mod h1:MKsuJmJgSg28kpZDP6UIiPt0e0Oz0kqKNGyRaWEPv84=
|
||||
github.com/blang/semver v3.1.0+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk=
|
||||
github.com/blang/semver v3.5.1+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk=
|
||||
github.com/blevesearch/bleve/v2 v2.3.2 h1:BJUnMhi2nrkl+vboHmKfW+9l+tJSj39HeWa5c3BN3/Y=
|
||||
github.com/blevesearch/bleve/v2 v2.3.2/go.mod h1:96+xE5pZUOsr3Y4vHzV1cBC837xZCpwLlX0hrrxnvIg=
|
||||
github.com/blevesearch/bleve_index_api v1.0.1 h1:nx9++0hnyiGOHJwQQYfsUGzpRdEVE5LsylmmngQvaFk=
|
||||
github.com/blevesearch/bleve_index_api v1.0.1/go.mod h1:fiwKS0xLEm+gBRgv5mumf0dhgFr2mDgZah1pqv1c1M4=
|
||||
github.com/blevesearch/go-metrics v0.0.0-20190826022208-cac0b30c2563/go.mod h1:9eJDeqxJ3E7WnLebQUlPD7ZjSce7AnDb9vjGmMCbD0A=
|
||||
github.com/blevesearch/go-porterstemmer v1.0.3 h1:GtmsqID0aZdCSNiY8SkuPJ12pD4jI+DdXTAn4YRcHCo=
|
||||
github.com/blevesearch/go-porterstemmer v1.0.3/go.mod h1:angGc5Ht+k2xhJdZi511LtmxuEf0OVpvUUNrwmM1P7M=
|
||||
github.com/blevesearch/goleveldb v1.0.1/go.mod h1:WrU8ltZbIp0wAoig/MHbrPCXSOLpe79nz5lv5nqfYrQ=
|
||||
github.com/blevesearch/gtreap v0.1.1 h1:2JWigFrzDMR+42WGIN/V2p0cUvn4UP3C4Q5nmaZGW8Y=
|
||||
github.com/blevesearch/gtreap v0.1.1/go.mod h1:QaQyDRAT51sotthUWAH4Sj08awFSSWzgYICSZ3w0tYk=
|
||||
github.com/blevesearch/mmap-go v1.0.2/go.mod h1:ol2qBqYaOUsGdm7aRMRrYGgPvnwLe6Y+7LMvAB5IbSA=
|
||||
github.com/blevesearch/mmap-go v1.0.3 h1:7QkALgFNooSq3a46AE+pWeKASAZc9SiNFJhDGF1NDx4=
|
||||
github.com/blevesearch/mmap-go v1.0.3/go.mod h1:pYvKl/grLQrBxuaRYgoTssa4rVujYYeenDp++2E+yvs=
|
||||
github.com/blevesearch/scorch_segment_api/v2 v2.1.0 h1:NFwteOpZEvJk5Vg0H6gD0hxupsG3JYocE4DBvsA2GZI=
|
||||
github.com/blevesearch/scorch_segment_api/v2 v2.1.0/go.mod h1:uch7xyyO/Alxkuxa+CGs79vw0QY8BENSBjg6Mw5L5DE=
|
||||
github.com/blevesearch/segment v0.9.0 h1:5lG7yBCx98or7gK2cHMKPukPZ/31Kag7nONpoBt22Ac=
|
||||
github.com/blevesearch/segment v0.9.0/go.mod h1:9PfHYUdQCgHktBgvtUOF4x+pc4/l8rdH0u5spnW85UQ=
|
||||
github.com/blevesearch/snowball v0.6.1/go.mod h1:ZF0IBg5vgpeoUhnMza2v0A/z8m1cWPlwhke08LpNusg=
|
||||
github.com/blevesearch/snowballstem v0.9.0 h1:lMQ189YspGP6sXvZQ4WZ+MLawfV8wOmPoD/iWeNXm8s=
|
||||
github.com/blevesearch/snowballstem v0.9.0/go.mod h1:PivSj3JMc8WuaFkTSRDW2SlrulNWPl4ABg1tC/hlgLs=
|
||||
github.com/blevesearch/upsidedown_store_api v1.0.1 h1:1SYRwyoFLwG3sj0ed89RLtM15amfX2pXlYbFOnF8zNU=
|
||||
github.com/blevesearch/upsidedown_store_api v1.0.1/go.mod h1:MQDVGpHZrpe3Uy26zJBf/a8h0FZY6xJbthIMm8myH2Q=
|
||||
github.com/blevesearch/vellum v1.0.7 h1:+vn8rfyCRHxKVRgDLeR0FAXej2+6mEb5Q15aQE/XESQ=
|
||||
github.com/blevesearch/vellum v1.0.7/go.mod h1:doBZpmRhwTsASB4QdUZANlJvqVAUdUyX0ZK7QJCTeBE=
|
||||
github.com/blevesearch/zapx/v11 v11.3.3 h1:8vQMO5hdA2qPCmicIMuKS+qcvUAEh6Vcb0uve4Nh8e4=
|
||||
github.com/blevesearch/zapx/v11 v11.3.3/go.mod h1:YzTfUm4kS3e8OmTXDHVV8OzC5MWPO/VPJZQgPNVb4Lc=
|
||||
github.com/blevesearch/zapx/v12 v12.3.3 h1:MQO5YNI8MqdPz12ALCoXiJw5cl9QQamYZSp285Z/+Mo=
|
||||
github.com/blevesearch/zapx/v12 v12.3.3/go.mod h1:RMl6lOZqF+sTxKvhQDJ5yK2LT3Mu7E2p/jGdjAaiRxs=
|
||||
github.com/blevesearch/zapx/v13 v13.3.3 h1:TS4xpMK1ARPYHq+1WwuEOKMOiwvKpTK3RuWOkKlI7BE=
|
||||
github.com/blevesearch/zapx/v13 v13.3.3/go.mod h1:eppobNM35U4C22yDvTuxV9xPqo10pwfP/jugL4INWG4=
|
||||
github.com/blevesearch/zapx/v14 v14.3.3 h1:dqqAzGphKl0yehHKKntDHKlEMhi9B/tJrD4OsWpY7YE=
|
||||
github.com/blevesearch/zapx/v14 v14.3.3/go.mod h1:zXNcVzukh0AvG57oUtT1T0ndi09H0kELNaNmekEy0jw=
|
||||
github.com/blevesearch/zapx/v15 v15.3.3 h1:60oE+qsJkveLenJmbc0eaH59GWYCbJJsPDV6Z5hEoYY=
|
||||
github.com/blevesearch/zapx/v15 v15.3.3/go.mod h1:C+f/97ZzTzK6vt/7sVlZdzZxKu+5+j4SrGCvr9dJzaY=
|
||||
github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869/go.mod h1:Ekp36dRnpXw/yCqJaO+ZrUyxD+3VXMFFr56k5XYrpB4=
|
||||
github.com/bradfitz/go-smtpd v0.0.0-20170404230938-deb6d6237625/go.mod h1:HYsPBTaaSFSlLx/70C2HPIMNZpVV8+vt/A+FMnYP11g=
|
||||
github.com/bradfitz/iter v0.0.0-20140124041915-454541ec3da2/go.mod h1:PyRFw1Lt2wKX4ZVSQ2mk+PeDa1rxyObEDlApuIsUKuo=
|
||||
|
|
@ -332,6 +369,8 @@ github.com/coreos/go-systemd/v22 v22.1.0/go.mod h1:xO0FLkIi5MaZafQlIrOotqXZ90ih+
|
|||
github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
|
||||
github.com/coreos/pkg v0.0.0-20160727233714-3ac0863d7acf/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA=
|
||||
github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA=
|
||||
github.com/couchbase/ghistogram v0.1.0/go.mod h1:s1Jhy76zqfEecpNWJfWUiKZookAFaiGOEoyzgHt9i7k=
|
||||
github.com/couchbase/moss v0.2.0/go.mod h1:9MaHIaRuy9pvLPUJxB8sh8OrLfyDczECVL37grCIubs=
|
||||
github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
|
||||
|
|
@ -534,6 +573,8 @@ github.com/golang/protobuf v1.5.1/go.mod h1:DopwsBzvsk0Fs44TXzsVbJyPhcCPeIwnvohx
|
|||
github.com/golang/protobuf v1.5.2 h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw=
|
||||
github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
|
||||
github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
|
||||
github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4=
|
||||
github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
|
||||
github.com/gologme/log v1.2.0/go.mod h1:gq31gQ8wEHkR+WekdWsqDuf8pXTUZA9BnnzTuPz1Y9U=
|
||||
github.com/gologme/log v1.3.0 h1:l781G4dE+pbigClDSDzSaaYKtiueHCILUa/qSDsmHAo=
|
||||
github.com/gologme/log v1.3.0/go.mod h1:yKT+DvIPdDdDoPtqFrFxheooyVmoqi0BAsw+erN3wA4=
|
||||
|
|
@ -874,6 +915,8 @@ github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7P
|
|||
github.com/moul/http2curl v1.0.0/go.mod h1:8UbvGypXm98wA/IqH45anm5Y2Z6ep6O31QGOAZ3H0fQ=
|
||||
github.com/mrunalp/fileutils v0.5.0/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ=
|
||||
github.com/mschoch/smat v0.0.0-20160514031455-90eadee771ae/go.mod h1:qAyveg+e4CE+eKJXWVjKXM4ck2QobLqTDytGJbLLhJg=
|
||||
github.com/mschoch/smat v0.2.0 h1:8imxQsjDm8yFEAVBe7azKmKSgzSkZXDuKkSq9374khM=
|
||||
github.com/mschoch/smat v0.2.0/go.mod h1:kc9mz7DoBKqDyiRL7VZN8KvXQMWeTaVnttLRXOlotKw=
|
||||
github.com/munnerz/goautoneg v0.0.0-20120707110453-a547fc61f48d/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
|
||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
|
||||
github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
|
||||
|
|
@ -909,6 +952,7 @@ github.com/olekukonko/tablewriter v0.0.0-20170122224234-a0225b3f23b5/go.mod h1:v
|
|||
github.com/onsi/ginkgo v0.0.0-20151202141238-7f8ab55aaf3b/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
|
||||
github.com/onsi/ginkgo v0.0.0-20170829012221-11459a886d9c/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
|
||||
github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
|
||||
github.com/onsi/ginkgo v1.7.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
|
||||
github.com/onsi/ginkgo v1.8.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
|
||||
github.com/onsi/ginkgo v1.10.1/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
|
||||
github.com/onsi/ginkgo v1.10.3/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
|
||||
|
|
@ -922,6 +966,7 @@ github.com/onsi/ginkgo v1.16.4 h1:29JGrr5oVBm5ulCWet69zQkzWipVXIol6ygQUe/EzNc=
|
|||
github.com/onsi/ginkgo v1.16.4/go.mod h1:dX+/inL/fNMqNlz0e9LfyB9TswhZpCVdJM/Z6Vvnwo0=
|
||||
github.com/onsi/gomega v0.0.0-20151007035656-2152b45fa28a/go.mod h1:C1qb7wdrVGGVU+Z6iS04AVkA3Q65CEZX59MT0QO5uiA=
|
||||
github.com/onsi/gomega v0.0.0-20170829124025-dcabb60a477c/go.mod h1:C1qb7wdrVGGVU+Z6iS04AVkA3Q65CEZX59MT0QO5uiA=
|
||||
github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
|
||||
github.com/onsi/gomega v1.5.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
|
||||
github.com/onsi/gomega v1.7.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
|
||||
github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY=
|
||||
|
|
@ -1200,6 +1245,7 @@ github.com/yvasiyarov/newrelic_platform_go v0.0.0-20140908184405-b21fdbd4370f/go
|
|||
go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU=
|
||||
go.etcd.io/bbolt v1.3.3/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU=
|
||||
go.etcd.io/bbolt v1.3.5/go.mod h1:G5EMThwa9y8QZGBClrRx5EY+Yw9kAhnjy3bSjsnlVTQ=
|
||||
go.etcd.io/bbolt v1.3.6 h1:/ecaJf0sk1l4l6V4awd65v2C3ILy7MSj+s/x1ADCIMU=
|
||||
go.etcd.io/bbolt v1.3.6/go.mod h1:qXsaaIqmgQH0T+OPdb99Bf+PKfBBQVAdyD6TY9G8XM4=
|
||||
go.etcd.io/etcd v0.5.0-alpha.5.0.20200910180754-dd1b699fc489/go.mod h1:yVHk9ub3CSBatqGNg7GRmsnfLWtoW60w4eDYfh7vHDg=
|
||||
go.etcd.io/etcd/api/v3 v3.5.0/go.mod h1:cbVKeC6lCfl7j/8jBhAK6aIYO9XOjdptoxU/nLQcPvs=
|
||||
|
|
@ -1438,6 +1484,7 @@ golang.org/x/sys v0.0.0-20181029174526-d69651ed3497/go.mod h1:STP8DvDyc/dI5b8T5h
|
|||
golang.org/x/sys v0.0.0-20181107165924-66b7b1311ac8/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20181221143128-b4a75ba826a6/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190130150945-aca44879d564/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
|
|
|
|||
166
internal/fulltext/bleve.go
Normal file
166
internal/fulltext/bleve.go
Normal file
|
|
@ -0,0 +1,166 @@
|
|||
// Copyright 2022 The Matrix.org Foundation C.I.C.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package fulltext
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"github.com/blevesearch/bleve/v2"
|
||||
"github.com/blevesearch/bleve/v2/mapping"
|
||||
"github.com/matrix-org/dendrite/setup/config"
|
||||
"github.com/matrix-org/gomatrixserverlib"
|
||||
)
|
||||
|
||||
// Search contains all existing bleve.Index
|
||||
type Search struct {
|
||||
FulltextIndex bleve.Index
|
||||
}
|
||||
|
||||
// IndexElement describes the layout of an element to index
|
||||
type IndexElement struct {
|
||||
EventID string
|
||||
RoomID string
|
||||
Content string
|
||||
ContentType string
|
||||
StreamPosition int64
|
||||
}
|
||||
|
||||
// SetContentType sets i.ContentType given an identifier
|
||||
func (i *IndexElement) SetContentType(v string) {
|
||||
switch v {
|
||||
case "m.room.message":
|
||||
i.ContentType = "content.body"
|
||||
case gomatrixserverlib.MRoomName:
|
||||
i.ContentType = "content.name"
|
||||
case gomatrixserverlib.MRoomTopic:
|
||||
i.ContentType = "content.topic"
|
||||
}
|
||||
}
|
||||
|
||||
// New opens a new/existing fulltext index
|
||||
func New(cfg config.Fulltext) (fts *Search, err error) {
|
||||
fts = &Search{}
|
||||
fts.FulltextIndex, err = openIndex(cfg)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return fts, nil
|
||||
}
|
||||
|
||||
// Close closes the fulltext index
|
||||
func (f *Search) Close() error {
|
||||
return f.FulltextIndex.Close()
|
||||
}
|
||||
|
||||
// FulltextIndex indexes a given element
|
||||
func (f *Search) Index(e IndexElement) error {
|
||||
return f.FulltextIndex.Index(e.EventID, e)
|
||||
}
|
||||
|
||||
// BatchIndex indexes the given elements
|
||||
func (f *Search) BatchIndex(elements []IndexElement) error {
|
||||
batch := f.FulltextIndex.NewBatch()
|
||||
|
||||
for _, element := range elements {
|
||||
err := batch.Index(element.EventID, element)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return f.FulltextIndex.Batch(batch)
|
||||
}
|
||||
|
||||
// Delete deletes an indexed element by the eventID
|
||||
func (f *Search) Delete(eventID string) error {
|
||||
return f.FulltextIndex.Delete(eventID)
|
||||
}
|
||||
|
||||
// Search searches the index given a search term, roomIDs and keys.
|
||||
func (f *Search) Search(term string, roomIDs, keys []string, limit, from int, orderByStreamPos bool) (*bleve.SearchResult, error) {
|
||||
qry := bleve.NewConjunctionQuery()
|
||||
termQuery := bleve.NewBooleanQuery()
|
||||
|
||||
terms := strings.Split(term, " ")
|
||||
for _, term := range terms {
|
||||
matchQuery := bleve.NewMatchQuery(term)
|
||||
matchQuery.SetField("Content")
|
||||
termQuery.AddMust(matchQuery)
|
||||
}
|
||||
qry.AddQuery(termQuery)
|
||||
|
||||
roomQuery := bleve.NewBooleanQuery()
|
||||
for _, roomID := range roomIDs {
|
||||
roomSearch := bleve.NewMatchQuery(roomID)
|
||||
roomSearch.SetField("RoomID")
|
||||
roomQuery.AddShould(roomSearch)
|
||||
}
|
||||
if len(roomIDs) > 0 {
|
||||
qry.AddQuery(roomQuery)
|
||||
}
|
||||
keyQuery := bleve.NewBooleanQuery()
|
||||
for _, key := range keys {
|
||||
keySearch := bleve.NewMatchQuery(key)
|
||||
keySearch.SetField("ContentType")
|
||||
keyQuery.AddShould(keySearch)
|
||||
}
|
||||
if len(keys) > 0 {
|
||||
qry.AddQuery(keyQuery)
|
||||
}
|
||||
|
||||
s := bleve.NewSearchRequestOptions(qry, limit, from, false)
|
||||
s.Fields = []string{"*"}
|
||||
s.SortBy([]string{"_score"})
|
||||
if orderByStreamPos {
|
||||
s.SortBy([]string{"-StreamPosition"})
|
||||
}
|
||||
|
||||
return f.FulltextIndex.Search(s)
|
||||
}
|
||||
|
||||
func openIndex(cfg config.Fulltext) (bleve.Index, error) {
|
||||
m := getMapping(cfg)
|
||||
if cfg.InMemory {
|
||||
return bleve.NewMemOnly(m)
|
||||
}
|
||||
if index, err := bleve.Open(string(cfg.IndexPath)); err == nil {
|
||||
return index, nil
|
||||
}
|
||||
|
||||
index, err := bleve.New(string(cfg.IndexPath), m)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return index, nil
|
||||
}
|
||||
|
||||
func getMapping(cfg config.Fulltext) *mapping.IndexMappingImpl {
|
||||
enFieldMapping := bleve.NewTextFieldMapping()
|
||||
enFieldMapping.Analyzer = cfg.Language
|
||||
|
||||
eventMapping := bleve.NewDocumentMapping()
|
||||
eventMapping.AddFieldMappingsAt("Content", enFieldMapping)
|
||||
eventMapping.AddFieldMappingsAt("StreamPosition", bleve.NewNumericFieldMapping())
|
||||
|
||||
// Index entries as is
|
||||
idFieldMapping := bleve.NewKeywordFieldMapping()
|
||||
eventMapping.AddFieldMappingsAt("ContentType", idFieldMapping)
|
||||
eventMapping.AddFieldMappingsAt("RoomID", idFieldMapping)
|
||||
eventMapping.AddFieldMappingsAt("EventID", idFieldMapping)
|
||||
|
||||
indexMapping := bleve.NewIndexMapping()
|
||||
indexMapping.AddDocumentMapping("Event", eventMapping)
|
||||
indexMapping.DefaultType = "Event"
|
||||
return indexMapping
|
||||
}
|
||||
246
internal/fulltext/bleve_test.go
Normal file
246
internal/fulltext/bleve_test.go
Normal file
|
|
@ -0,0 +1,246 @@
|
|||
// Copyright 2022 The Matrix.org Foundation C.I.C.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package fulltext_test
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/matrix-org/dendrite/internal/fulltext"
|
||||
"github.com/matrix-org/dendrite/setup/config"
|
||||
"github.com/matrix-org/gomatrixserverlib"
|
||||
"github.com/matrix-org/util"
|
||||
)
|
||||
|
||||
func mustOpenIndex(t *testing.T, tempDir string) *fulltext.Search {
|
||||
t.Helper()
|
||||
cfg := config.Fulltext{}
|
||||
cfg.Defaults(true)
|
||||
if tempDir != "" {
|
||||
cfg.IndexPath = config.Path(tempDir)
|
||||
cfg.InMemory = false
|
||||
}
|
||||
fts, err := fulltext.New(cfg)
|
||||
if err != nil {
|
||||
t.Fatal("failed to open fulltext index:", err)
|
||||
}
|
||||
return fts
|
||||
}
|
||||
|
||||
func mustAddTestData(t *testing.T, fts *fulltext.Search, firstStreamPos int64) (eventIDs, roomIDs []string) {
|
||||
t.Helper()
|
||||
// create some more random data
|
||||
var batchItems []fulltext.IndexElement
|
||||
streamPos := firstStreamPos
|
||||
|
||||
wantRoomID := util.RandomString(16)
|
||||
|
||||
for i := 0; i < 30; i++ {
|
||||
streamPos++
|
||||
eventID := util.RandomString(16)
|
||||
// Create more data for the first room
|
||||
if i > 15 {
|
||||
wantRoomID = util.RandomString(16)
|
||||
}
|
||||
e := fulltext.IndexElement{
|
||||
EventID: eventID,
|
||||
RoomID: wantRoomID,
|
||||
Content: "lorem ipsum",
|
||||
StreamPosition: streamPos,
|
||||
}
|
||||
e.SetContentType("m.room.message")
|
||||
batchItems = append(batchItems, e)
|
||||
roomIDs = append(roomIDs, wantRoomID)
|
||||
eventIDs = append(eventIDs, eventID)
|
||||
}
|
||||
e := fulltext.IndexElement{
|
||||
EventID: util.RandomString(16),
|
||||
RoomID: wantRoomID,
|
||||
Content: "Roomname testing",
|
||||
StreamPosition: streamPos,
|
||||
}
|
||||
e.SetContentType(gomatrixserverlib.MRoomName)
|
||||
batchItems = append(batchItems, e)
|
||||
e = fulltext.IndexElement{
|
||||
EventID: util.RandomString(16),
|
||||
RoomID: wantRoomID,
|
||||
Content: "Room topic fulltext",
|
||||
StreamPosition: streamPos,
|
||||
}
|
||||
e.SetContentType(gomatrixserverlib.MRoomTopic)
|
||||
batchItems = append(batchItems, e)
|
||||
if err := fts.BatchIndex(batchItems); err != nil {
|
||||
t.Fatalf("failed to batch insert elements: %v", err)
|
||||
}
|
||||
return eventIDs, roomIDs
|
||||
}
|
||||
|
||||
func TestOpen(t *testing.T) {
|
||||
dataDir := t.TempDir()
|
||||
fts := mustOpenIndex(t, dataDir)
|
||||
if err := fts.Close(); err != nil {
|
||||
t.Fatal("unable to close fulltext index", err)
|
||||
}
|
||||
|
||||
// open existing index
|
||||
fts = mustOpenIndex(t, dataDir)
|
||||
defer fts.Close()
|
||||
}
|
||||
|
||||
func TestIndex(t *testing.T) {
|
||||
fts := mustOpenIndex(t, "")
|
||||
defer fts.Close()
|
||||
|
||||
// add some data
|
||||
var streamPos int64 = 1
|
||||
roomID := util.RandomString(8)
|
||||
eventID := util.RandomString(16)
|
||||
e := fulltext.IndexElement{
|
||||
EventID: eventID,
|
||||
RoomID: roomID,
|
||||
Content: "lorem ipsum",
|
||||
StreamPosition: streamPos,
|
||||
}
|
||||
e.SetContentType("m.room.message")
|
||||
|
||||
if err := fts.Index(e); err != nil {
|
||||
t.Fatal("failed to index element", err)
|
||||
}
|
||||
|
||||
// create some more random data
|
||||
mustAddTestData(t, fts, streamPos)
|
||||
}
|
||||
|
||||
func TestDelete(t *testing.T) {
|
||||
fts := mustOpenIndex(t, "")
|
||||
defer fts.Close()
|
||||
eventIDs, roomIDs := mustAddTestData(t, fts, 0)
|
||||
res1, err := fts.Search("lorem", roomIDs[:1], nil, 50, 0, false)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err = fts.Delete(eventIDs[0]); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
res2, err := fts.Search("lorem", roomIDs[:1], nil, 50, 0, false)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if res1.Total <= res2.Total {
|
||||
t.Fatalf("got unexpected result: %d <= %d", res1.Total, res2.Total)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSearch(t *testing.T) {
|
||||
type args struct {
|
||||
term string
|
||||
keys []string
|
||||
limit int
|
||||
from int
|
||||
orderByStreamPos bool
|
||||
roomIndex []int
|
||||
}
|
||||
tests := []struct {
|
||||
name string
|
||||
args args
|
||||
wantCount int
|
||||
wantErr bool
|
||||
}{
|
||||
{
|
||||
name: "Can search for many results in one room",
|
||||
wantCount: 16,
|
||||
args: args{
|
||||
term: "lorem",
|
||||
roomIndex: []int{0},
|
||||
limit: 20,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Can search for one result in one room",
|
||||
wantCount: 1,
|
||||
args: args{
|
||||
term: "lorem",
|
||||
roomIndex: []int{16},
|
||||
limit: 20,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Can search for many results in multiple rooms",
|
||||
wantCount: 17,
|
||||
args: args{
|
||||
term: "lorem",
|
||||
roomIndex: []int{0, 16},
|
||||
limit: 20,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Can search for many results in all rooms, reversed",
|
||||
wantCount: 30,
|
||||
args: args{
|
||||
term: "lorem",
|
||||
limit: 30,
|
||||
orderByStreamPos: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Can search for specific search room name",
|
||||
wantCount: 1,
|
||||
args: args{
|
||||
term: "testing",
|
||||
roomIndex: []int{},
|
||||
limit: 20,
|
||||
keys: []string{"content.name"},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Can search for specific search room topic",
|
||||
wantCount: 1,
|
||||
args: args{
|
||||
term: "fulltext",
|
||||
roomIndex: []int{},
|
||||
limit: 20,
|
||||
keys: []string{"content.topic"},
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
f := mustOpenIndex(t, "")
|
||||
eventIDs, roomIDs := mustAddTestData(t, f, 0)
|
||||
var searchRooms []string
|
||||
for _, x := range tt.args.roomIndex {
|
||||
searchRooms = append(searchRooms, roomIDs[x])
|
||||
}
|
||||
t.Logf("searching in rooms: %v - %v\n", searchRooms, tt.args.keys)
|
||||
|
||||
got, err := f.Search(tt.args.term, searchRooms, tt.args.keys, tt.args.limit, tt.args.from, tt.args.orderByStreamPos)
|
||||
if (err != nil) != tt.wantErr {
|
||||
t.Errorf("Search() error = %v, wantErr %v", err, tt.wantErr)
|
||||
return
|
||||
}
|
||||
if !reflect.DeepEqual(len(got.Hits), tt.wantCount) {
|
||||
t.Errorf("Search() got = %v, want %v", len(got.Hits), tt.wantCount)
|
||||
}
|
||||
if tt.args.orderByStreamPos {
|
||||
if got.Hits[0].ID != eventIDs[29] {
|
||||
t.Fatalf("expected ID %s, got %s", eventIDs[29], got.Hits[0].ID)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
@ -31,6 +31,7 @@ import (
|
|||
"github.com/getsentry/sentry-go"
|
||||
sentryhttp "github.com/getsentry/sentry-go/http"
|
||||
"github.com/matrix-org/dendrite/internal/caching"
|
||||
"github.com/matrix-org/dendrite/internal/fulltext"
|
||||
"github.com/matrix-org/dendrite/internal/httputil"
|
||||
"github.com/matrix-org/dendrite/internal/pushgateway"
|
||||
"github.com/matrix-org/dendrite/internal/sqlutil"
|
||||
|
|
@ -87,6 +88,7 @@ type BaseDendrite struct {
|
|||
Database *sql.DB
|
||||
DatabaseWriter sqlutil.Writer
|
||||
EnableMetrics bool
|
||||
Fulltext *fulltext.Search
|
||||
}
|
||||
|
||||
const NoListener = ""
|
||||
|
|
@ -146,6 +148,15 @@ func NewBaseDendrite(cfg *config.Dendrite, componentName string, options ...Base
|
|||
logrus.WithError(err).Panicf("failed to start opentracing")
|
||||
}
|
||||
|
||||
var fts *fulltext.Search
|
||||
isSyncOrMonolith := componentName == "syncapi" || isMonolith
|
||||
if cfg.SyncAPI.Fulltext.Enabled && isSyncOrMonolith {
|
||||
fts, err = fulltext.New(cfg.SyncAPI.Fulltext)
|
||||
if err != nil {
|
||||
logrus.WithError(err).Panicf("failed to create full text")
|
||||
}
|
||||
}
|
||||
|
||||
if cfg.Global.Sentry.Enabled {
|
||||
logrus.Info("Setting up Sentry for debugging...")
|
||||
err = sentry.Init(sentry.ClientOptions{
|
||||
|
|
@ -248,6 +259,7 @@ func NewBaseDendrite(cfg *config.Dendrite, componentName string, options ...Base
|
|||
Database: db, // set if monolith with global connection pool only
|
||||
DatabaseWriter: writer, // set if monolith with global connection pool only
|
||||
EnableMetrics: enableMetrics,
|
||||
Fulltext: fts,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -9,6 +9,8 @@ type SyncAPI struct {
|
|||
Database DatabaseOptions `yaml:"database"`
|
||||
|
||||
RealIPHeader string `yaml:"real_ip_header"`
|
||||
|
||||
Fulltext Fulltext `yaml:"fulltext"`
|
||||
}
|
||||
|
||||
func (c *SyncAPI) Defaults(generate bool) {
|
||||
|
|
@ -16,12 +18,14 @@ func (c *SyncAPI) Defaults(generate bool) {
|
|||
c.InternalAPI.Connect = "http://localhost:7773"
|
||||
c.ExternalAPI.Listen = "http://localhost:8073"
|
||||
c.Database.Defaults(10)
|
||||
c.Fulltext.Defaults(generate)
|
||||
if generate {
|
||||
c.Database.ConnectionString = "file:syncapi.db"
|
||||
}
|
||||
}
|
||||
|
||||
func (c *SyncAPI) Verify(configErrs *ConfigErrors, isMonolith bool) {
|
||||
c.Fulltext.Verify(configErrs, isMonolith)
|
||||
if c.Matrix.DatabaseOptions.ConnectionString == "" {
|
||||
checkNotEmpty(configErrs, "sync_api.database", string(c.Database.ConnectionString))
|
||||
}
|
||||
|
|
@ -32,3 +36,25 @@ func (c *SyncAPI) Verify(configErrs *ConfigErrors, isMonolith bool) {
|
|||
checkURL(configErrs, "sync_api.internal_api.connect", string(c.InternalAPI.Connect))
|
||||
checkURL(configErrs, "sync_api.external_api.listen", string(c.ExternalAPI.Listen))
|
||||
}
|
||||
|
||||
type Fulltext struct {
|
||||
Enabled bool `yaml:"enabled"`
|
||||
IndexPath Path `yaml:"index_path"`
|
||||
InMemory bool `yaml:"in_memory"` // only useful in tests
|
||||
Language string `yaml:"language"` // the language to use when analysing content
|
||||
}
|
||||
|
||||
func (f *Fulltext) Defaults(generate bool) {
|
||||
f.Enabled = false
|
||||
f.IndexPath = "./fulltextindex"
|
||||
f.Language = "en"
|
||||
if generate {
|
||||
f.Enabled = true
|
||||
f.InMemory = true
|
||||
}
|
||||
}
|
||||
|
||||
func (f *Fulltext) Verify(configErrs *ConfigErrors, isMonolith bool) {
|
||||
checkNotEmpty(configErrs, "syncapi.fulltext.index_path", string(f.IndexPath))
|
||||
checkNotEmpty(configErrs, "syncapi.fulltext.language", f.Language)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ var (
|
|||
OutputReadUpdate = "OutputReadUpdate"
|
||||
RequestPresence = "GetPresence"
|
||||
OutputPresenceEvent = "OutputPresenceEvent"
|
||||
InputFulltextReindex = "InputFulltextReindex"
|
||||
)
|
||||
|
||||
var safeCharacters = regexp.MustCompile("[^A-Za-z0-9$]+")
|
||||
|
|
|
|||
|
|
@ -35,8 +35,9 @@ func CreateBaseDendrite(t *testing.T, dbType test.DBType) (*base.BaseDendrite, f
|
|||
|
||||
switch dbType {
|
||||
case test.DBTypePostgres:
|
||||
cfg.Global.Defaults(true) // autogen a signing key
|
||||
cfg.MediaAPI.Defaults(true) // autogen a media path
|
||||
cfg.Global.Defaults(true) // autogen a signing key
|
||||
cfg.MediaAPI.Defaults(true) // autogen a media path
|
||||
cfg.SyncAPI.Fulltext.Defaults(true) // use in memory fts
|
||||
// use a distinct prefix else concurrent postgres/sqlite runs will clash since NATS will use
|
||||
// the file system event with InMemory=true :(
|
||||
cfg.Global.JetStream.TopicPrefix = fmt.Sprintf("Test_%d_", dbType)
|
||||
|
|
@ -85,6 +86,7 @@ func Base(cfg *config.Dendrite) (*base.BaseDendrite, nats.JetStreamContext, *nat
|
|||
cfg.Defaults(true)
|
||||
}
|
||||
cfg.Global.JetStream.InMemory = true
|
||||
cfg.SyncAPI.Fulltext.InMemory = true
|
||||
base := base.NewBaseDendrite(cfg, "Tests")
|
||||
js, jc := base.NATS.Prepare(base.ProcessContext, &cfg.Global.JetStream)
|
||||
return base, js, jc
|
||||
|
|
|
|||
Loading…
Reference in a new issue