url preview test version

Signed-off-by: Aleksandr Dubovikov <d.lexand@gmail.com>
This commit is contained in:
ad 2024-09-29 17:02:42 +02:00 committed by Aleksandr Dubovikov
parent 6cd1285ca0
commit 5845244aa9
7 changed files with 542 additions and 1 deletions

View file

@ -161,6 +161,10 @@ func moveFile(src types.Path, dst types.Path) error {
return nil
}
func MoveFile(src types.Path, dst types.Path) error {
return moveFile(src, dst)
}
func createTempFileWriter(absBasePath config.Path) (*bufio.Writer, *os.File, types.Path, error) {
tmpDir, err := createTempDir(absBasePath)
if err != nil {

View file

@ -316,10 +316,11 @@ func (r *downloadRequest) respondFromLocalFile(
return nil, fmt.Errorf("fileutils.GetPathFromBase64Hash: %w", err)
}
file, err := os.Open(filePath)
defer file.Close() // nolint: errcheck, staticcheck, megacheck
if err != nil {
return nil, fmt.Errorf("os.Open: %w", err)
}
defer file.Close() // nolint: errcheck, staticcheck, megacheck
stat, err := file.Stat()
if err != nil {
return nil, fmt.Errorf("file.Stat: %w", err)

View file

@ -96,6 +96,8 @@ func Setup(
MXCToResult: map[string]*types.RemoteRequestResult{},
}
// v1 url_preview endpoint requiring auth
downloadHandler := makeDownloadAPI("download_unauthed", &cfg.MediaAPI, rateLimits, db, client, federationClient, activeRemoteRequests, activeThumbnailGeneration, false)
v3mux.Handle("/download/{serverName}/{mediaId}", downloadHandler).Methods(http.MethodGet, http.MethodOptions)
v3mux.Handle("/download/{serverName}/{mediaId}/{downloadName}", downloadHandler).Methods(http.MethodGet, http.MethodOptions)
@ -110,6 +112,21 @@ func Setup(
v1mux.Handle("/download/{serverName}/{mediaId}", downloadHandlerAuthed).Methods(http.MethodGet, http.MethodOptions)
v1mux.Handle("/download/{serverName}/{mediaId}/{downloadName}", downloadHandlerAuthed).Methods(http.MethodGet, http.MethodOptions)
// urlPreviewHandler := httputil.MakeAuthAPI(
// "preview_url", userAPI,
// makeUrlPreviewHandler(&cfg.MediaAPI, rateLimits, db, client, activeThumbnailGeneration),
// )
f := makeUrlPreviewHandler(&cfg.MediaAPI, rateLimits, db, activeThumbnailGeneration)
urlPreviewHandler := httputil.MakeExternalAPI(
"preview_url",
func(req *http.Request) util.JSONResponse {
return f(req, nil)
},
)
v1mux.Handle("/preview_url", urlPreviewHandler).Methods(http.MethodGet, http.MethodOptions)
// That method is deprecated according to spec but still in use
v3mux.Handle("/preview_url", urlPreviewHandler).Methods(http.MethodGet, http.MethodOptions)
v1mux.Handle("/thumbnail/{serverName}/{mediaId}",
httputil.MakeHTTPAPI("thumbnail", userAPI, cfg.Global.Metrics.Enabled, makeDownloadAPI("thumbnail_authed_client", &cfg.MediaAPI, rateLimits, db, client, federationClient, activeRemoteRequests, activeThumbnailGeneration, false), httputil.WithAuth()),
).Methods(http.MethodGet, http.MethodOptions)

View file

@ -0,0 +1,427 @@
package routing
import (
"context"
"fmt"
"io"
"net/http"
"net/url"
"os"
"path/filepath"
"strconv"
"strings"
"sync"
"time"
"github.com/matrix-org/dendrite/internal/httputil"
"github.com/matrix-org/dendrite/mediaapi/fileutils"
"github.com/matrix-org/dendrite/mediaapi/storage"
"github.com/matrix-org/dendrite/mediaapi/thumbnailer"
"github.com/matrix-org/dendrite/mediaapi/types"
"github.com/matrix-org/dendrite/setup/config"
userapi "github.com/matrix-org/dendrite/userapi/api"
"github.com/matrix-org/gomatrixserverlib/spec"
"github.com/matrix-org/util"
"github.com/pkg/errors"
log "github.com/sirupsen/logrus"
"golang.org/x/net/html"
)
var ErrorMissingUrl = errors.New("missing url")
var ErrorUnsupportedContentType = errors.New("unsupported content type")
var ErrorFileTooLarge = errors.New("file too large")
func makeUrlPreviewHandler(
cfg *config.MediaAPI,
rateLimits *httputil.RateLimits,
db storage.Database,
activeThumbnailGeneration *types.ActiveThumbnailGeneration,
) func(req *http.Request, device *userapi.Device) util.JSONResponse {
activeUrlPreviewRequests := &types.ActiveUrlPreviewRequests{Url: map[string]*types.UrlPreviewResult{}}
urlPreviewCache := &types.UrlPreviewCache{Records: map[string]*types.UrlPreviewCacheRecord{}}
go func() {
for {
t := time.Now().Unix()
for k, record := range urlPreviewCache.Records {
if record.Created < (t - int64(cfg.UrlPreviewCacheTime)) {
urlPreviewCache.Lock.Lock()
delete(urlPreviewCache.Records, k)
urlPreviewCache.Lock.Unlock()
}
}
time.Sleep(time.Duration(16) * time.Second)
}
}()
httpHandler := func(req *http.Request, device *userapi.Device) util.JSONResponse {
req = util.RequestWithLogging(req)
// log := util.GetLogger(req.Context())
// Here be call to the url preview handler
pUrl := req.URL.Query().Get("url")
ts := req.URL.Query().Get("ts")
if pUrl == "" {
return util.ErrorResponse(ErrorMissingUrl)
}
_ = ts
logger := util.GetLogger(req.Context()).WithFields(log.Fields{
"url": pUrl,
})
// Check rate limits
if r := rateLimits.Limit(req, device); r != nil {
return *r
}
// Get url preview from cache
if cacheRecord, ok := urlPreviewCache.Records[pUrl]; ok {
if cacheRecord.Error != nil {
return util.ErrorResponse(cacheRecord.Error)
}
return util.JSONResponse{
Code: http.StatusOK,
JSON: cacheRecord.Preview,
}
}
// Check if there is an active request
activeUrlPreviewRequests.Lock()
if activeUrlPreviewRequest, ok := activeUrlPreviewRequests.Url[pUrl]; ok {
activeUrlPreviewRequests.Unlock()
// Wait for it to complete
activeUrlPreviewRequest.Cond.L.Lock()
defer activeUrlPreviewRequest.Cond.L.Unlock()
activeUrlPreviewRequest.Cond.Wait()
if activeUrlPreviewRequest.Error != nil {
return util.ErrorResponse(activeUrlPreviewRequest.Error)
}
return util.JSONResponse{
Code: http.StatusOK,
JSON: activeUrlPreviewRequest.Preview,
}
}
// Start new url preview request
activeUrlPreviewRequest := &types.UrlPreviewResult{Cond: sync.NewCond(&sync.Mutex{})}
activeUrlPreviewRequests.Url[pUrl] = activeUrlPreviewRequest
activeUrlPreviewRequests.Unlock()
// we defer caching the url preview response as well as signalling the waiting goroutines
// about the completion of the request
defer func() {
urlPreviewCacheItem := &types.UrlPreviewCacheRecord{
Created: time.Now().Unix(),
}
if activeUrlPreviewRequest.Error != nil {
urlPreviewCacheItem.Error = activeUrlPreviewRequest.Error
} else {
urlPreviewCacheItem.Preview = activeUrlPreviewRequest.Preview
}
urlPreviewCache.Lock.Lock()
urlPreviewCache.Records[pUrl] = urlPreviewCacheItem
defer urlPreviewCache.Lock.Unlock()
activeUrlPreviewRequests.Lock()
activeUrlPreviewRequests.Url[pUrl].Cond.Broadcast()
delete(activeUrlPreviewRequests.Url, pUrl)
defer activeUrlPreviewRequests.Unlock()
}()
resp, err := downloadUrl(pUrl, time.Duration(cfg.UrlPreviewTimeout)*time.Second)
if err != nil {
activeUrlPreviewRequest.Error = err
} else {
defer resp.Body.Close()
var result *types.UrlPreview
var imgReader *http.Response
if strings.HasPrefix(resp.Header.Get("Content-Type"), "text/html") {
result, err = getPreviewFromHTML(resp, pUrl)
if err == nil && result.ImageUrl != "" {
if imgUrl, err := url.Parse(result.ImageUrl); err == nil {
imgReader, err = downloadUrl(result.ImageUrl, time.Duration(cfg.UrlPreviewTimeout)*time.Second)
if err == nil {
mediaData, err := downloadAndStoreImage(imgUrl.Path, req.Context(), imgReader, cfg, device, db, activeThumbnailGeneration, logger)
if err == nil {
result.ImageUrl = fmt.Sprintf("mxc://%s/%s", mediaData.Origin, mediaData.MediaID)
}
}
}
}
} else if strings.HasPrefix(resp.Header.Get("Content-Type"), "image/") {
mediaData, err := downloadAndStoreImage("somefile", req.Context(), resp, cfg, device, db, activeThumbnailGeneration, logger)
if err == nil {
result = &types.UrlPreview{ImageUrl: fmt.Sprintf("mxc://%s/%s", mediaData.Origin, mediaData.MediaID)}
}
} else {
return util.ErrorResponse(errors.New("Unsupported content type"))
}
if err != nil {
activeUrlPreviewRequest.Error = err
} else {
activeUrlPreviewRequest.Preview = result
}
}
// choose the answer based on the result
if activeUrlPreviewRequest.Error != nil {
return util.ErrorResponse(activeUrlPreviewRequest.Error)
} else {
return util.JSONResponse{
Code: http.StatusOK,
JSON: activeUrlPreviewRequest.Preview,
}
}
}
return httpHandler
}
func downloadUrl(url string, t time.Duration) (*http.Response, error) {
client := http.Client{Timeout: t}
resp, err := client.Get(url)
if err != nil {
return nil, err
}
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
return nil, errors.New("HTTP status code: " + strconv.Itoa(resp.StatusCode))
}
return resp, nil
}
func getPreviewFromHTML(resp *http.Response, url string) (*types.UrlPreview, error) {
fields := getMetaFieldsFromHTML(resp)
preview := &types.UrlPreview{
Title: fields["og:title"],
Description: fields["og:description"],
}
if fields["og:title"] == "" {
preview.Title = url
}
if fields["og:image"] != "" {
preview.ImageUrl = fields["og:image"]
} else if fields["og:image:url"] != "" {
preview.ImageUrl = fields["og:image:url"]
} else if fields["og:image:secure_url"] != "" {
preview.ImageUrl = fields["og:image:secure_url"]
}
if fields["og:image:width"] != "" {
if width, err := strconv.Atoi(fields["og:image:width"]); err == nil {
preview.ImageWidth = width
}
}
if fields["og:image:height"] != "" {
if height, err := strconv.Atoi(fields["og:image:height"]); err == nil {
preview.ImageHeight = height
}
}
return preview, nil
}
func downloadAndStoreImage(
filename string,
ctx context.Context,
req *http.Response,
cfg *config.MediaAPI,
dev *userapi.Device,
db storage.Database,
activeThumbnailGeneration *types.ActiveThumbnailGeneration,
logger *log.Entry,
) (*types.MediaMetadata, error) {
userid := types.MatrixUserID("user")
if dev != nil {
userid = types.MatrixUserID(dev.UserID)
}
reqReader := req.Body.(io.Reader)
if cfg.MaxFileSizeBytes > 0 {
reqReader = io.LimitReader(reqReader, int64(cfg.MaxFileSizeBytes)+1)
}
hash, bytesWritten, tmpDir, err := fileutils.WriteTempFile(ctx, reqReader, cfg.AbsBasePath)
if err != nil {
logger.WithError(err).WithFields(log.Fields{
"MaxFileSizeBytes": cfg.MaxFileSizeBytes,
}).Warn("Error while transferring file")
return nil, err
}
defer fileutils.RemoveDir(tmpDir, logger)
// Check if temp file size exceeds max file size configuration
if cfg.MaxFileSizeBytes > 0 && bytesWritten > types.FileSizeBytes(cfg.MaxFileSizeBytes) {
return nil, ErrorFileTooLarge
}
// Check if we already have this file
existingMetadata, err := db.GetMediaMetadataByHash(
ctx, hash, cfg.Matrix.ServerName,
)
if err != nil {
logger.WithError(err).Error("unable to get media metadata by hash")
return nil, err
}
if existingMetadata != nil {
logger.WithField("mediaID", existingMetadata.MediaID).Debug("media already exists")
return existingMetadata, nil
}
tmpFileName := filepath.Join(string(tmpDir), "content")
// Check if the file is an image.
// Otherwise return an error
file, err := os.Open(string(tmpFileName))
if err != nil {
logger.WithError(err).Error("unable to open file")
return nil, err
}
defer file.Close()
buf := make([]byte, 512)
_, err = file.Read(buf)
if err != nil {
logger.WithError(err).Error("unable to read file")
return nil, err
}
fileType := http.DetectContentType(buf)
if !strings.HasPrefix(fileType, "image") {
logger.WithField("contentType", fileType).Debugf("uploaded file is not an image or can not be thumbnailed, not generating thumbnails")
return nil, ErrorUnsupportedContentType
}
logger.WithField("contentType", fileType).Debug("uploaded file is an image")
// Create a thumbnail from the image
thumbnailPath := tmpFileName + ".thumbnail"
err = thumbnailer.CreateThumbnailFromFile(types.Path(tmpFileName), types.Path(thumbnailPath), types.ThumbnailSize(cfg.UrlPreviewThumbnailSize), logger)
if err != nil {
if errors.Is(err, thumbnailer.ErrThumbnailTooLarge) {
thumbnailPath = tmpFileName
} else {
logger.WithError(err).Error("unable to create thumbnail")
return nil, err
}
}
logger.Debug("thumbnail created", thumbnailPath)
thumbnailFileInfo, err := os.Stat(string(thumbnailPath))
if err != nil {
logger.WithError(err).Error("unable to get thumbnail file info")
return nil, err
}
r := &uploadRequest{
MediaMetadata: &types.MediaMetadata{
Origin: cfg.Matrix.ServerName,
},
Logger: logger,
}
// Move the thumbnail to the media store
mediaID, err := r.generateMediaID(ctx, db)
if err != nil {
logger.WithError(err).Error("unable to generate media ID")
return nil, err
}
mediaMetaData := &types.MediaMetadata{
MediaID: mediaID,
Origin: cfg.Matrix.ServerName,
ContentType: types.ContentType(fileType),
FileSizeBytes: types.FileSizeBytes(thumbnailFileInfo.Size()),
UploadName: types.Filename(filename),
CreationTimestamp: spec.Timestamp(time.Now().Unix()),
Base64Hash: hash,
UserID: userid,
}
fmt.Println("mediaMetaData", mediaMetaData)
finalPath, err := fileutils.GetPathFromBase64Hash(mediaMetaData.Base64Hash, cfg.AbsBasePath)
if err != nil {
logger.WithError(err).Error("unable to get path from base64 hash")
return nil, err
}
err = fileutils.MoveFile(types.Path(thumbnailPath), types.Path(finalPath))
if err != nil {
logger.WithError(err).Error("unable to move thumbnail file")
return nil, err
}
// Store the metadata in the database
err = db.StoreMediaMetadata(ctx, mediaMetaData)
if err != nil {
logger.WithError(err).Error("unable to store media metadata")
return nil, err
}
return mediaMetaData, nil
}
func getMetaFieldsFromHTML(resp *http.Response) map[string]string {
htmlTokens := html.NewTokenizer(resp.Body)
ogValues := map[string]string{}
fieldsToGet := []string{
"og:title",
"og:description",
"og:image",
"og:image:url",
"og:image:secure_url",
"og:image:width",
"og:image:height",
"og:image:type",
}
fieldsMap := make(map[string]bool, len(fieldsToGet))
for _, field := range fieldsToGet {
fieldsMap[field] = true
ogValues[field] = ""
}
headTagOpened := false
for {
tokenType := htmlTokens.Next()
if tokenType == html.ErrorToken {
break
}
token := htmlTokens.Token()
// Check if there was opened a head tag
if tokenType == html.StartTagToken && token.Data == "head" {
headTagOpened = true
}
// We search for meta tags only inside the head tag if it exists
if headTagOpened && tokenType == html.EndTagToken && token.Data == "head" {
break
}
if (tokenType == html.SelfClosingTagToken || tokenType == html.StartTagToken) && token.Data == "meta" {
var propertyName string
var propertyContent string
for _, attr := range token.Attr {
if attr.Key == "property" {
propertyName = attr.Val
}
if attr.Key == "content" {
propertyContent = attr.Val
}
if propertyName != "" && propertyContent != "" {
break
}
}
// Push the values to the map if they are in the required fields list
if propertyName != "" && propertyContent != "" {
if _, ok := fieldsMap[propertyName]; ok {
ogValues[propertyName] = propertyContent
}
}
}
}
return ogValues
}

View file

@ -19,6 +19,7 @@ package thumbnailer
import (
"context"
"errors"
"image"
"image/draw"
@ -42,6 +43,8 @@ import (
log "github.com/sirupsen/logrus"
)
var ErrThumbnailTooLarge = errors.New("thumbnail is larger than original")
// GenerateThumbnails generates the configured thumbnail sizes for the source file
func GenerateThumbnails(
ctx context.Context,
@ -274,3 +277,36 @@ func adjustSize(dst types.Path, img image.Image, w, h int, crop bool, logger *lo
return out.Bounds().Max.X, out.Bounds().Max.Y, nil
}
func CreateThumbnailFromFile(
src types.Path,
dst types.Path,
config types.ThumbnailSize,
logger *log.Entry,
) (err error) {
img, err := readFile(string(src))
if err != nil {
logger.WithError(err).WithFields(log.Fields{
"src": src,
}).Error("Failed to read src file")
return err
}
// Check if request is larger than original
if config.Width >= img.Bounds().Dx() && config.Height >= img.Bounds().Dy() {
return ErrThumbnailTooLarge
}
start := time.Now()
width, height, err := adjustSize(dst, img, config.Width, config.Height, config.ResizeMethod == types.Crop, logger)
if err != nil {
return err
}
logger.WithFields(log.Fields{
"ActualWidth": width,
"ActualHeight": height,
"processTime": time.Since(start),
}).Info("Generated thumbnail")
return nil
}

View file

@ -100,6 +100,38 @@ type ActiveThumbnailGeneration struct {
PathToResult map[string]*ThumbnailGenerationResult
}
type UrlPreviewCache struct {
Lock sync.Mutex
Records map[string]*UrlPreviewCacheRecord
}
type UrlPreviewCacheRecord struct {
Created int64
Preview *UrlPreview
Error error
}
type UrlPreview struct {
ImageSize FileSizeBytes `json:"matrix:image:size"`
Description string `json:"og:description"`
ImageUrl string `json:"og:image"`
ImageType ContentType `json:"og:image:type"`
ImageHeight int `json:"og:image:height"`
ImageWidth int `json:"og:image:width"`
Title string `json:"og:title"`
}
type UrlPreviewResult struct {
Cond *sync.Cond
Preview *UrlPreview
Error error
}
type ActiveUrlPreviewRequests struct {
sync.Mutex
Url map[string]*UrlPreviewResult
}
// Crop indicates we should crop the thumbnail on resize
const Crop = "crop"

View file

@ -30,6 +30,14 @@ type MediaAPI struct {
// A list of thumbnail sizes to be pre-generated for downloaded remote / uploaded content
ThumbnailSizes []ThumbnailSize `yaml:"thumbnail_sizes"`
// The time in seconds to cache URL previews for
UrlPreviewCacheTime int `yaml:"url_preview_cache_time"`
// The timeout in milliseconds for fetching URL previews
UrlPreviewTimeout int `yaml:"url_preview_timeout"`
UrlPreviewThumbnailSize ThumbnailSize `yaml:"url_preview_thumbnail_size"`
}
// DefaultMaxFileSizeBytes defines the default file size allowed in transfers
@ -38,6 +46,9 @@ var DefaultMaxFileSizeBytes = FileSizeBytes(10485760)
func (c *MediaAPI) Defaults(opts DefaultOpts) {
c.MaxFileSizeBytes = DefaultMaxFileSizeBytes
c.MaxThumbnailGenerators = 10
c.UrlPreviewCacheTime = 10
c.UrlPreviewTimeout = 10000
if opts.Generate {
c.ThumbnailSizes = []ThumbnailSize{
{
@ -61,6 +72,12 @@ func (c *MediaAPI) Defaults(opts DefaultOpts) {
}
c.BasePath = "./media_store"
}
c.UrlPreviewThumbnailSize = ThumbnailSize{
Width: 200,
Height: 200,
ResizeMethod: "scale",
}
}
func (c *MediaAPI) Verify(configErrs *ConfigErrors) {
@ -76,4 +93,11 @@ func (c *MediaAPI) Verify(configErrs *ConfigErrors) {
if c.Matrix.DatabaseOptions.ConnectionString == "" {
checkNotEmpty(configErrs, "media_api.database.connection_string", string(c.Database.ConnectionString))
}
// If MaxFileSizeBytes overflows int64, default to DefaultMaxFileSizeBytes
if c.MaxFileSizeBytes+1 <= 0 {
c.MaxFileSizeBytes = DefaultMaxFileSizeBytes
fmt.Printf("Configured MediaApi.MaxFileSizeBytes overflows int64, defaulting to %d bytes", DefaultMaxFileSizeBytes)
}
}