added storing response as file cache

Signed-off-by: Aleksandr Dubovikov <d.lexand@gmail.com>
This commit is contained in:
ad 2024-09-30 13:01:39 +02:00 committed by Aleksandr Dubovikov
parent 46473c1bf0
commit 6c8158b313
3 changed files with 181 additions and 49 deletions

View file

@ -112,17 +112,7 @@ func Setup(
v1mux.Handle("/download/{serverName}/{mediaId}", downloadHandlerAuthed).Methods(http.MethodGet, http.MethodOptions) v1mux.Handle("/download/{serverName}/{mediaId}", downloadHandlerAuthed).Methods(http.MethodGet, http.MethodOptions)
v1mux.Handle("/download/{serverName}/{mediaId}/{downloadName}", downloadHandlerAuthed).Methods(http.MethodGet, http.MethodOptions) v1mux.Handle("/download/{serverName}/{mediaId}/{downloadName}", downloadHandlerAuthed).Methods(http.MethodGet, http.MethodOptions)
// urlPreviewHandler := httputil.MakeAuthAPI( urlPreviewHandler := httputil.MakeAuthAPI("preview_url", userAPI, makeUrlPreviewHandler(&cfg.MediaAPI, rateLimits, db, activeThumbnailGeneration))
// "preview_url", userAPI,
// makeUrlPreviewHandler(&cfg.MediaAPI, rateLimits, db, client, activeThumbnailGeneration),
// )
f := makeUrlPreviewHandler(&cfg.MediaAPI, rateLimits, db, activeThumbnailGeneration)
urlPreviewHandler := httputil.MakeExternalAPI(
"preview_url",
func(req *http.Request) util.JSONResponse {
return f(req, nil)
},
)
v1mux.Handle("/preview_url", urlPreviewHandler).Methods(http.MethodGet, http.MethodOptions) v1mux.Handle("/preview_url", urlPreviewHandler).Methods(http.MethodGet, http.MethodOptions)
// That method is deprecated according to spec but still in use // That method is deprecated according to spec but still in use
v3mux.Handle("/preview_url", urlPreviewHandler).Methods(http.MethodGet, http.MethodOptions) v3mux.Handle("/preview_url", urlPreviewHandler).Methods(http.MethodGet, http.MethodOptions)

View file

@ -1,7 +1,11 @@
package routing package routing
import ( import (
"bytes"
"context" "context"
"crypto/sha256"
"encoding/base64"
"encoding/json"
"fmt" "fmt"
"io" "io"
"net/http" "net/http"
@ -32,6 +36,7 @@ var (
ErrorUnsupportedContentType = errors.New("unsupported content type") ErrorUnsupportedContentType = errors.New("unsupported content type")
ErrorFileTooLarge = errors.New("file too large") ErrorFileTooLarge = errors.New("file too large")
ErrorTimeoutThumbnailGenerator = errors.New("timeout waiting for thumbnail generator") ErrorTimeoutThumbnailGenerator = errors.New("timeout waiting for thumbnail generator")
ErrNoMetadataFound = errors.New("no metadata found")
) )
func makeUrlPreviewHandler( func makeUrlPreviewHandler(
@ -89,6 +94,30 @@ func makeUrlPreviewHandler(
} }
} }
hash := getHashFromString(pUrl)
// Check if we have a previously stored response
if urlPreviewCached, err := loadUrlPreviewResponse(req.Context(), cfg, db, hash, logger); err == nil {
logger.Debug("Loaded url preview from the cache")
// Put in into the cache for further usage
defer func() {
if _, ok := urlPreviewCache.Records[pUrl]; !ok {
urlPreviewCacheItem := &types.UrlPreviewCacheRecord{
Created: time.Now().Unix(),
Preview: urlPreviewCached,
}
urlPreviewCache.Lock.Lock()
urlPreviewCache.Records[pUrl] = urlPreviewCacheItem
defer urlPreviewCache.Lock.Unlock()
}
}()
return util.JSONResponse{
Code: http.StatusOK,
JSON: urlPreviewCached,
}
}
// Check if there is an active request // Check if there is an active request
activeUrlPreviewRequests.Lock() activeUrlPreviewRequests.Lock()
if activeUrlPreviewRequest, ok := activeUrlPreviewRequests.Url[pUrl]; ok { if activeUrlPreviewRequest, ok := activeUrlPreviewRequests.Url[pUrl]; ok {
@ -122,6 +151,11 @@ func makeUrlPreviewHandler(
urlPreviewCacheItem.Error = activeUrlPreviewRequest.Error urlPreviewCacheItem.Error = activeUrlPreviewRequest.Error
} else { } else {
urlPreviewCacheItem.Preview = activeUrlPreviewRequest.Preview urlPreviewCacheItem.Preview = activeUrlPreviewRequest.Preview
// Store the response file for further usage
err := storeUrlPreviewResponse(req.Context(), cfg, db, *device, hash, activeUrlPreviewRequest.Preview, logger)
if err != nil {
logger.WithError(err).Error("unable to store url preview response")
}
} }
urlPreviewCache.Lock.Lock() urlPreviewCache.Lock.Lock()
@ -141,47 +175,64 @@ func makeUrlPreviewHandler(
defer resp.Body.Close() defer resp.Body.Close()
var result *types.UrlPreview var result *types.UrlPreview
var err error var err, err2 error
var imgUrl *url.URL var imgUrl *url.URL
var imgReader *http.Response var imgReader *http.Response
var mediaData *types.MediaMetadata var mediaData *types.MediaMetadata
var width, height int
if strings.HasPrefix(resp.Header.Get("Content-Type"), "text/html") { if strings.HasPrefix(resp.Header.Get("Content-Type"), "text/html") {
// The url is a webpage - get data from the meta tags
result, err = getPreviewFromHTML(resp, pUrl) result, err = getPreviewFromHTML(resp, pUrl)
if err == nil && result.ImageUrl != "" { if err == nil && result.ImageUrl != "" {
if imgUrl, err = url.Parse(result.ImageUrl); err == nil { // The page has an og:image link
imgReader, err = downloadUrl(result.ImageUrl, time.Duration(cfg.UrlPreviewTimeout)*time.Second) if imgUrl, err2 = url.Parse(result.ImageUrl); err2 == nil {
if err == nil { imgReader, err2 = downloadUrl(result.ImageUrl, time.Duration(cfg.UrlPreviewTimeout)*time.Second)
mediaData, err = downloadAndStoreImage(imgUrl.Path, req.Context(), imgReader, cfg, device, db, activeThumbnailGeneration, logger) if err2 == nil {
if err == nil { // Download image and store it as a thumbnail
result.ImageUrl = fmt.Sprintf("mxc://%s/%s", mediaData.Origin, mediaData.MediaID) mediaData, width, height, err2 = downloadAndStoreImage(imgUrl.Path, req.Context(), imgReader, cfg, device, db, activeThumbnailGeneration, logger)
} else {
// We don't show the orginal URL as it is insecure for the room users
result.ImageUrl = ""
}
} }
} }
// In case of any error in image download
// we don't show the orginal URL as it is insecure for the room users
if err2 != nil {
result.ImageUrl = ""
}
} }
} else if strings.HasPrefix(resp.Header.Get("Content-Type"), "image/") { } else if strings.HasPrefix(resp.Header.Get("Content-Type"), "image/") {
mediaData, err := downloadAndStoreImage("somefile", req.Context(), resp, cfg, device, db, activeThumbnailGeneration, logger) // The url is an image link
mediaData, width, height, err = downloadAndStoreImage("somefile", req.Context(), resp, cfg, device, db, activeThumbnailGeneration, logger)
if err == nil { if err == nil {
result = &types.UrlPreview{ImageUrl: fmt.Sprintf("mxc://%s/%s", mediaData.Origin, mediaData.MediaID)} result = &types.UrlPreview{}
} }
} else { } else {
return util.ErrorResponse(errors.New("Unsupported content type")) return util.ErrorResponse(errors.New("Unsupported content type"))
} }
// In case of any error happened during the page/image download
// we store the error instead of the preview
if err != nil { if err != nil {
activeUrlPreviewRequest.Error = err activeUrlPreviewRequest.Error = err
} else { } else {
// We have a mediadata so we have an image in the preview
if mediaData != nil {
result.ImageUrl = fmt.Sprintf("mxc://%s/%s", mediaData.Origin, mediaData.MediaID)
result.ImageWidth = width
result.ImageHeight = height
result.ImageType = mediaData.ContentType
result.ImageSize = mediaData.FileSizeBytes
}
activeUrlPreviewRequest.Preview = result activeUrlPreviewRequest.Preview = result
} }
} }
// choose the answer based on the result // Return eather the error or the preview
if activeUrlPreviewRequest.Error != nil { if activeUrlPreviewRequest.Error != nil {
return util.ErrorResponse(activeUrlPreviewRequest.Error) return util.ErrorResponse(activeUrlPreviewRequest.Error)
} else { } else {
return util.JSONResponse{ return util.JSONResponse{
Code: http.StatusOK, Code: http.StatusOK,
JSON: activeUrlPreviewRequest.Preview, JSON: activeUrlPreviewRequest.Preview,
@ -248,7 +299,9 @@ func downloadAndStoreImage(
activeThumbnailGeneration *types.ActiveThumbnailGeneration, activeThumbnailGeneration *types.ActiveThumbnailGeneration,
logger *log.Entry, logger *log.Entry,
) (*types.MediaMetadata, error) { ) (*types.MediaMetadata, int, int, error) {
var width, height int
userid := types.MatrixUserID("user") userid := types.MatrixUserID("user")
if dev != nil { if dev != nil {
@ -264,13 +317,13 @@ func downloadAndStoreImage(
logger.WithError(err).WithFields(log.Fields{ logger.WithError(err).WithFields(log.Fields{
"MaxFileSizeBytes": cfg.MaxFileSizeBytes, "MaxFileSizeBytes": cfg.MaxFileSizeBytes,
}).Warn("Error while transferring file") }).Warn("Error while transferring file")
return nil, err return nil, width, height, err
} }
defer fileutils.RemoveDir(tmpDir, logger) defer fileutils.RemoveDir(tmpDir, logger)
// Check if temp file size exceeds max file size configuration // Check if temp file size exceeds max file size configuration
if cfg.MaxFileSizeBytes > 0 && bytesWritten > types.FileSizeBytes(cfg.MaxFileSizeBytes) { if cfg.MaxFileSizeBytes > 0 && bytesWritten > types.FileSizeBytes(cfg.MaxFileSizeBytes) {
return nil, ErrorFileTooLarge return nil, 0, 0, ErrorFileTooLarge
} }
// Check if we already have this file // Check if we already have this file
@ -280,13 +333,22 @@ func downloadAndStoreImage(
if err != nil { if err != nil {
logger.WithError(err).Error("unable to get media metadata by hash") logger.WithError(err).Error("unable to get media metadata by hash")
return nil, err return nil, width, height, err
} }
if existingMetadata != nil { if existingMetadata != nil {
logger.WithField("mediaID", existingMetadata.MediaID).Debug("media already exists") logger.WithField("mediaID", existingMetadata.MediaID).Debug("media already exists")
return existingMetadata, nil // Here we have to read the image to get it's size
filename, err := fileutils.GetPathFromBase64Hash(existingMetadata.Base64Hash, cfg.AbsBasePath)
if err != nil {
return nil, width, height, err
}
img, err := thumbnailer.ReadFile(string(filename))
if err != nil {
return nil, width, height, err
}
return existingMetadata, img.Bounds().Dx(), img.Bounds().Dy(), nil
} }
tmpFileName := filepath.Join(string(tmpDir), "content") tmpFileName := filepath.Join(string(tmpDir), "content")
@ -295,7 +357,7 @@ func downloadAndStoreImage(
file, err := os.Open(string(tmpFileName)) file, err := os.Open(string(tmpFileName))
if err != nil { if err != nil {
logger.WithError(err).Error("unable to open file") logger.WithError(err).Error("unable to open file")
return nil, err return nil, 0, 0, err
} }
defer file.Close() defer file.Close()
@ -304,13 +366,13 @@ func downloadAndStoreImage(
_, err = file.Read(buf) _, err = file.Read(buf)
if err != nil { if err != nil {
logger.WithError(err).Error("unable to read file") logger.WithError(err).Error("unable to read file")
return nil, err return nil, 0, 0, err
} }
fileType := http.DetectContentType(buf) fileType := http.DetectContentType(buf)
if !strings.HasPrefix(fileType, "image") { if !strings.HasPrefix(fileType, "image") {
logger.WithField("contentType", fileType).Debugf("uploaded file is not an image or can not be thumbnailed, not generating thumbnails") logger.WithField("contentType", fileType).Debugf("uploaded file is not an image or can not be thumbnailed, not generating thumbnails")
return nil, ErrorUnsupportedContentType return nil, 0, 0, ErrorUnsupportedContentType
} }
logger.WithField("contentType", fileType).Debug("uploaded file is an image") logger.WithField("contentType", fileType).Debug("uploaded file is an image")
@ -332,13 +394,13 @@ func downloadAndStoreImage(
activeThumbnailGeneration.Unlock() activeThumbnailGeneration.Unlock()
}() }()
err = thumbnailer.CreateThumbnailFromFile(types.Path(tmpFileName), types.Path(thumbnailPath), types.ThumbnailSize(cfg.UrlPreviewThumbnailSize), logger) width, height, err = thumbnailer.CreateThumbnailFromFile(types.Path(tmpFileName), types.Path(thumbnailPath), types.ThumbnailSize(cfg.UrlPreviewThumbnailSize), logger)
if err != nil { if err != nil {
if errors.Is(err, thumbnailer.ErrThumbnailTooLarge) { if errors.Is(err, thumbnailer.ErrThumbnailTooLarge) {
thumbnailPath = tmpFileName thumbnailPath = tmpFileName
} else { } else {
logger.WithError(err).Error("unable to create thumbnail") logger.WithError(err).Error("unable to create thumbnail")
return nil, err return nil, 0, 0, err
} }
} }
break break
@ -347,7 +409,7 @@ func downloadAndStoreImage(
select { select {
case <-timeout: case <-timeout:
logger.Error("timed out waiting for thumbnail generator") logger.Error("timed out waiting for thumbnail generator")
return nil, ErrorTimeoutThumbnailGenerator return nil, 0, 0, ErrorTimeoutThumbnailGenerator
default: default:
time.Sleep(time.Second) time.Sleep(time.Second)
} }
@ -356,7 +418,7 @@ func downloadAndStoreImage(
thumbnailFileInfo, err := os.Stat(string(thumbnailPath)) thumbnailFileInfo, err := os.Stat(string(thumbnailPath))
if err != nil { if err != nil {
logger.WithError(err).Error("unable to get thumbnail file info") logger.WithError(err).Error("unable to get thumbnail file info")
return nil, err return nil, width, height, err
} }
r := &uploadRequest{ r := &uploadRequest{
@ -370,7 +432,7 @@ func downloadAndStoreImage(
mediaID, err := r.generateMediaID(ctx, db) mediaID, err := r.generateMediaID(ctx, db)
if err != nil { if err != nil {
logger.WithError(err).Error("unable to generate media ID") logger.WithError(err).Error("unable to generate media ID")
return nil, err return nil, width, height, err
} }
mediaMetaData := &types.MediaMetadata{ mediaMetaData := &types.MediaMetadata{
MediaID: mediaID, MediaID: mediaID,
@ -386,21 +448,97 @@ func downloadAndStoreImage(
finalPath, err := fileutils.GetPathFromBase64Hash(mediaMetaData.Base64Hash, cfg.AbsBasePath) finalPath, err := fileutils.GetPathFromBase64Hash(mediaMetaData.Base64Hash, cfg.AbsBasePath)
if err != nil { if err != nil {
logger.WithError(err).Error("unable to get path from base64 hash") logger.WithError(err).Error("unable to get path from base64 hash")
return nil, err return nil, width, height, err
} }
err = fileutils.MoveFile(types.Path(thumbnailPath), types.Path(finalPath)) err = fileutils.MoveFile(types.Path(thumbnailPath), types.Path(finalPath))
if err != nil { if err != nil {
logger.WithError(err).Error("unable to move thumbnail file") logger.WithError(err).Error("unable to move thumbnail file")
return nil, err return nil, width, height, err
} }
// Store the metadata in the database // Store the metadata in the database
err = db.StoreMediaMetadata(ctx, mediaMetaData) err = db.StoreMediaMetadata(ctx, mediaMetaData)
if err != nil { if err != nil {
logger.WithError(err).Error("unable to store media metadata") logger.WithError(err).Error("unable to store media metadata")
return nil, err return nil, width, height, err
} }
return mediaMetaData, nil return mediaMetaData, width, height, nil
}
func storeUrlPreviewResponse(ctx context.Context, cfg *config.MediaAPI, db storage.Database, user userapi.Device, hash types.Base64Hash, preview *types.UrlPreview, logger *log.Entry) error {
jsonPreview, err := json.Marshal(preview)
if err != nil {
return err
}
_, bytesWritten, tmpDir, err := fileutils.WriteTempFile(ctx, bytes.NewReader(jsonPreview), cfg.AbsBasePath)
if err != nil {
return err
}
defer fileutils.RemoveDir(tmpDir, logger)
r := &uploadRequest{
MediaMetadata: &types.MediaMetadata{
Origin: cfg.Matrix.ServerName,
},
Logger: logger,
}
mediaID, err := r.generateMediaID(ctx, db)
if err != nil {
return err
}
mediaMetaData := &types.MediaMetadata{
MediaID: mediaID,
Origin: cfg.Matrix.ServerName,
ContentType: "application/json",
FileSizeBytes: types.FileSizeBytes(bytesWritten),
UploadName: types.Filename("url_preview.json"),
CreationTimestamp: spec.Timestamp(time.Now().Unix()),
Base64Hash: hash,
UserID: types.MatrixUserID(user.UserID),
}
_, _, err = fileutils.MoveFileWithHashCheck(tmpDir, mediaMetaData, cfg.AbsBasePath, logger)
if err != nil {
return err
}
err = db.StoreMediaMetadata(ctx, mediaMetaData)
if err != nil {
logger.WithError(err).Error("unable to store media metadata")
return err
}
return nil
}
func loadUrlPreviewResponse(ctx context.Context, cfg *config.MediaAPI, db storage.Database, hash types.Base64Hash, logger *log.Entry) (*types.UrlPreview, error) {
if mediaMetadata, err := db.GetMediaMetadataByHash(ctx, hash, cfg.Matrix.ServerName); err == nil && mediaMetadata != nil {
// Get the response file
filePath, err := fileutils.GetPathFromBase64Hash(mediaMetadata.Base64Hash, cfg.AbsBasePath)
if err != nil {
return nil, err
}
data, err := os.ReadFile(string(filePath))
if err != nil {
return nil, err
}
var preview types.UrlPreview
err = json.Unmarshal(data, &preview)
if err != nil {
return nil, err
}
return &preview, nil
}
return nil, ErrNoMetadataFound
}
func getHashFromString(s string) types.Base64Hash {
hasher := sha256.New()
hasher.Write([]byte(s))
return types.Base64Hash(base64.RawURLEncoding.EncodeToString(hasher.Sum(nil)))
} }
func getMetaFieldsFromHTML(resp *http.Response) map[string]string { func getMetaFieldsFromHTML(resp *http.Response) map[string]string {

View file

@ -283,24 +283,24 @@ func CreateThumbnailFromFile(
dst types.Path, dst types.Path,
config types.ThumbnailSize, config types.ThumbnailSize,
logger *log.Entry, logger *log.Entry,
) (err error) { ) (width int, height int, err error) {
img, err := readFile(string(src)) img, err := readFile(string(src))
if err != nil { if err != nil {
logger.WithError(err).WithFields(log.Fields{ logger.WithError(err).WithFields(log.Fields{
"src": src, "src": src,
}).Error("Failed to read src file") }).Error("Failed to read image")
return err return 0, 0, err
} }
// Check if request is larger than original // Check if request is larger than original
if config.Width >= img.Bounds().Dx() && config.Height >= img.Bounds().Dy() { if config.Width >= img.Bounds().Dx() && config.Height >= img.Bounds().Dy() {
return ErrThumbnailTooLarge return img.Bounds().Dx(), img.Bounds().Dy(), ErrThumbnailTooLarge
} }
start := time.Now() start := time.Now()
width, height, err := adjustSize(dst, img, config.Width, config.Height, config.ResizeMethod == types.Crop, logger) width, height, err = adjustSize(dst, img, config.Width, config.Height, config.ResizeMethod == types.Crop, logger)
if err != nil { if err != nil {
return err return 0, 0, err
} }
logger.WithFields(log.Fields{ logger.WithFields(log.Fields{
"ActualWidth": width, "ActualWidth": width,
@ -308,5 +308,9 @@ func CreateThumbnailFromFile(
"processTime": time.Since(start), "processTime": time.Since(start),
}).Info("Generated thumbnail") }).Info("Generated thumbnail")
return nil return width, height, nil
}
func ReadFile(src string) (image.Image, error) {
return readFile(src)
} }