added storing response as file cache

Signed-off-by: Aleksandr Dubovikov <d.lexand@gmail.com>
This commit is contained in:
ad 2024-09-30 13:01:39 +02:00 committed by Aleksandr Dubovikov
parent 46473c1bf0
commit 6c8158b313
3 changed files with 181 additions and 49 deletions

View file

@ -112,17 +112,7 @@ func Setup(
v1mux.Handle("/download/{serverName}/{mediaId}", downloadHandlerAuthed).Methods(http.MethodGet, http.MethodOptions)
v1mux.Handle("/download/{serverName}/{mediaId}/{downloadName}", downloadHandlerAuthed).Methods(http.MethodGet, http.MethodOptions)
// urlPreviewHandler := httputil.MakeAuthAPI(
// "preview_url", userAPI,
// makeUrlPreviewHandler(&cfg.MediaAPI, rateLimits, db, client, activeThumbnailGeneration),
// )
f := makeUrlPreviewHandler(&cfg.MediaAPI, rateLimits, db, activeThumbnailGeneration)
urlPreviewHandler := httputil.MakeExternalAPI(
"preview_url",
func(req *http.Request) util.JSONResponse {
return f(req, nil)
},
)
urlPreviewHandler := httputil.MakeAuthAPI("preview_url", userAPI, makeUrlPreviewHandler(&cfg.MediaAPI, rateLimits, db, activeThumbnailGeneration))
v1mux.Handle("/preview_url", urlPreviewHandler).Methods(http.MethodGet, http.MethodOptions)
// That method is deprecated according to spec but still in use
v3mux.Handle("/preview_url", urlPreviewHandler).Methods(http.MethodGet, http.MethodOptions)

View file

@ -1,7 +1,11 @@
package routing
import (
"bytes"
"context"
"crypto/sha256"
"encoding/base64"
"encoding/json"
"fmt"
"io"
"net/http"
@ -32,6 +36,7 @@ var (
ErrorUnsupportedContentType = errors.New("unsupported content type")
ErrorFileTooLarge = errors.New("file too large")
ErrorTimeoutThumbnailGenerator = errors.New("timeout waiting for thumbnail generator")
ErrNoMetadataFound = errors.New("no metadata found")
)
func makeUrlPreviewHandler(
@ -89,6 +94,30 @@ func makeUrlPreviewHandler(
}
}
hash := getHashFromString(pUrl)
// Check if we have a previously stored response
if urlPreviewCached, err := loadUrlPreviewResponse(req.Context(), cfg, db, hash, logger); err == nil {
logger.Debug("Loaded url preview from the cache")
// Put in into the cache for further usage
defer func() {
if _, ok := urlPreviewCache.Records[pUrl]; !ok {
urlPreviewCacheItem := &types.UrlPreviewCacheRecord{
Created: time.Now().Unix(),
Preview: urlPreviewCached,
}
urlPreviewCache.Lock.Lock()
urlPreviewCache.Records[pUrl] = urlPreviewCacheItem
defer urlPreviewCache.Lock.Unlock()
}
}()
return util.JSONResponse{
Code: http.StatusOK,
JSON: urlPreviewCached,
}
}
// Check if there is an active request
activeUrlPreviewRequests.Lock()
if activeUrlPreviewRequest, ok := activeUrlPreviewRequests.Url[pUrl]; ok {
@ -122,6 +151,11 @@ func makeUrlPreviewHandler(
urlPreviewCacheItem.Error = activeUrlPreviewRequest.Error
} else {
urlPreviewCacheItem.Preview = activeUrlPreviewRequest.Preview
// Store the response file for further usage
err := storeUrlPreviewResponse(req.Context(), cfg, db, *device, hash, activeUrlPreviewRequest.Preview, logger)
if err != nil {
logger.WithError(err).Error("unable to store url preview response")
}
}
urlPreviewCache.Lock.Lock()
@ -141,47 +175,64 @@ func makeUrlPreviewHandler(
defer resp.Body.Close()
var result *types.UrlPreview
var err error
var err, err2 error
var imgUrl *url.URL
var imgReader *http.Response
var mediaData *types.MediaMetadata
var width, height int
if strings.HasPrefix(resp.Header.Get("Content-Type"), "text/html") {
// The url is a webpage - get data from the meta tags
result, err = getPreviewFromHTML(resp, pUrl)
if err == nil && result.ImageUrl != "" {
if imgUrl, err = url.Parse(result.ImageUrl); err == nil {
imgReader, err = downloadUrl(result.ImageUrl, time.Duration(cfg.UrlPreviewTimeout)*time.Second)
if err == nil {
mediaData, err = downloadAndStoreImage(imgUrl.Path, req.Context(), imgReader, cfg, device, db, activeThumbnailGeneration, logger)
if err == nil {
result.ImageUrl = fmt.Sprintf("mxc://%s/%s", mediaData.Origin, mediaData.MediaID)
} else {
// We don't show the orginal URL as it is insecure for the room users
result.ImageUrl = ""
}
// The page has an og:image link
if imgUrl, err2 = url.Parse(result.ImageUrl); err2 == nil {
imgReader, err2 = downloadUrl(result.ImageUrl, time.Duration(cfg.UrlPreviewTimeout)*time.Second)
if err2 == nil {
// Download image and store it as a thumbnail
mediaData, width, height, err2 = downloadAndStoreImage(imgUrl.Path, req.Context(), imgReader, cfg, device, db, activeThumbnailGeneration, logger)
}
}
// In case of any error in image download
// we don't show the orginal URL as it is insecure for the room users
if err2 != nil {
result.ImageUrl = ""
}
}
} else if strings.HasPrefix(resp.Header.Get("Content-Type"), "image/") {
mediaData, err := downloadAndStoreImage("somefile", req.Context(), resp, cfg, device, db, activeThumbnailGeneration, logger)
// The url is an image link
mediaData, width, height, err = downloadAndStoreImage("somefile", req.Context(), resp, cfg, device, db, activeThumbnailGeneration, logger)
if err == nil {
result = &types.UrlPreview{ImageUrl: fmt.Sprintf("mxc://%s/%s", mediaData.Origin, mediaData.MediaID)}
result = &types.UrlPreview{}
}
} else {
return util.ErrorResponse(errors.New("Unsupported content type"))
}
// In case of any error happened during the page/image download
// we store the error instead of the preview
if err != nil {
activeUrlPreviewRequest.Error = err
} else {
// We have a mediadata so we have an image in the preview
if mediaData != nil {
result.ImageUrl = fmt.Sprintf("mxc://%s/%s", mediaData.Origin, mediaData.MediaID)
result.ImageWidth = width
result.ImageHeight = height
result.ImageType = mediaData.ContentType
result.ImageSize = mediaData.FileSizeBytes
}
activeUrlPreviewRequest.Preview = result
}
}
// choose the answer based on the result
// Return eather the error or the preview
if activeUrlPreviewRequest.Error != nil {
return util.ErrorResponse(activeUrlPreviewRequest.Error)
} else {
return util.JSONResponse{
Code: http.StatusOK,
JSON: activeUrlPreviewRequest.Preview,
@ -248,7 +299,9 @@ func downloadAndStoreImage(
activeThumbnailGeneration *types.ActiveThumbnailGeneration,
logger *log.Entry,
) (*types.MediaMetadata, error) {
) (*types.MediaMetadata, int, int, error) {
var width, height int
userid := types.MatrixUserID("user")
if dev != nil {
@ -264,13 +317,13 @@ func downloadAndStoreImage(
logger.WithError(err).WithFields(log.Fields{
"MaxFileSizeBytes": cfg.MaxFileSizeBytes,
}).Warn("Error while transferring file")
return nil, err
return nil, width, height, err
}
defer fileutils.RemoveDir(tmpDir, logger)
// Check if temp file size exceeds max file size configuration
if cfg.MaxFileSizeBytes > 0 && bytesWritten > types.FileSizeBytes(cfg.MaxFileSizeBytes) {
return nil, ErrorFileTooLarge
return nil, 0, 0, ErrorFileTooLarge
}
// Check if we already have this file
@ -280,13 +333,22 @@ func downloadAndStoreImage(
if err != nil {
logger.WithError(err).Error("unable to get media metadata by hash")
return nil, err
return nil, width, height, err
}
if existingMetadata != nil {
logger.WithField("mediaID", existingMetadata.MediaID).Debug("media already exists")
return existingMetadata, nil
// Here we have to read the image to get it's size
filename, err := fileutils.GetPathFromBase64Hash(existingMetadata.Base64Hash, cfg.AbsBasePath)
if err != nil {
return nil, width, height, err
}
img, err := thumbnailer.ReadFile(string(filename))
if err != nil {
return nil, width, height, err
}
return existingMetadata, img.Bounds().Dx(), img.Bounds().Dy(), nil
}
tmpFileName := filepath.Join(string(tmpDir), "content")
@ -295,7 +357,7 @@ func downloadAndStoreImage(
file, err := os.Open(string(tmpFileName))
if err != nil {
logger.WithError(err).Error("unable to open file")
return nil, err
return nil, 0, 0, err
}
defer file.Close()
@ -304,13 +366,13 @@ func downloadAndStoreImage(
_, err = file.Read(buf)
if err != nil {
logger.WithError(err).Error("unable to read file")
return nil, err
return nil, 0, 0, err
}
fileType := http.DetectContentType(buf)
if !strings.HasPrefix(fileType, "image") {
logger.WithField("contentType", fileType).Debugf("uploaded file is not an image or can not be thumbnailed, not generating thumbnails")
return nil, ErrorUnsupportedContentType
return nil, 0, 0, ErrorUnsupportedContentType
}
logger.WithField("contentType", fileType).Debug("uploaded file is an image")
@ -332,13 +394,13 @@ func downloadAndStoreImage(
activeThumbnailGeneration.Unlock()
}()
err = thumbnailer.CreateThumbnailFromFile(types.Path(tmpFileName), types.Path(thumbnailPath), types.ThumbnailSize(cfg.UrlPreviewThumbnailSize), logger)
width, height, err = thumbnailer.CreateThumbnailFromFile(types.Path(tmpFileName), types.Path(thumbnailPath), types.ThumbnailSize(cfg.UrlPreviewThumbnailSize), logger)
if err != nil {
if errors.Is(err, thumbnailer.ErrThumbnailTooLarge) {
thumbnailPath = tmpFileName
} else {
logger.WithError(err).Error("unable to create thumbnail")
return nil, err
return nil, 0, 0, err
}
}
break
@ -347,7 +409,7 @@ func downloadAndStoreImage(
select {
case <-timeout:
logger.Error("timed out waiting for thumbnail generator")
return nil, ErrorTimeoutThumbnailGenerator
return nil, 0, 0, ErrorTimeoutThumbnailGenerator
default:
time.Sleep(time.Second)
}
@ -356,7 +418,7 @@ func downloadAndStoreImage(
thumbnailFileInfo, err := os.Stat(string(thumbnailPath))
if err != nil {
logger.WithError(err).Error("unable to get thumbnail file info")
return nil, err
return nil, width, height, err
}
r := &uploadRequest{
@ -370,7 +432,7 @@ func downloadAndStoreImage(
mediaID, err := r.generateMediaID(ctx, db)
if err != nil {
logger.WithError(err).Error("unable to generate media ID")
return nil, err
return nil, width, height, err
}
mediaMetaData := &types.MediaMetadata{
MediaID: mediaID,
@ -386,21 +448,97 @@ func downloadAndStoreImage(
finalPath, err := fileutils.GetPathFromBase64Hash(mediaMetaData.Base64Hash, cfg.AbsBasePath)
if err != nil {
logger.WithError(err).Error("unable to get path from base64 hash")
return nil, err
return nil, width, height, err
}
err = fileutils.MoveFile(types.Path(thumbnailPath), types.Path(finalPath))
if err != nil {
logger.WithError(err).Error("unable to move thumbnail file")
return nil, err
return nil, width, height, err
}
// Store the metadata in the database
err = db.StoreMediaMetadata(ctx, mediaMetaData)
if err != nil {
logger.WithError(err).Error("unable to store media metadata")
return nil, err
return nil, width, height, err
}
return mediaMetaData, nil
return mediaMetaData, width, height, nil
}
func storeUrlPreviewResponse(ctx context.Context, cfg *config.MediaAPI, db storage.Database, user userapi.Device, hash types.Base64Hash, preview *types.UrlPreview, logger *log.Entry) error {
jsonPreview, err := json.Marshal(preview)
if err != nil {
return err
}
_, bytesWritten, tmpDir, err := fileutils.WriteTempFile(ctx, bytes.NewReader(jsonPreview), cfg.AbsBasePath)
if err != nil {
return err
}
defer fileutils.RemoveDir(tmpDir, logger)
r := &uploadRequest{
MediaMetadata: &types.MediaMetadata{
Origin: cfg.Matrix.ServerName,
},
Logger: logger,
}
mediaID, err := r.generateMediaID(ctx, db)
if err != nil {
return err
}
mediaMetaData := &types.MediaMetadata{
MediaID: mediaID,
Origin: cfg.Matrix.ServerName,
ContentType: "application/json",
FileSizeBytes: types.FileSizeBytes(bytesWritten),
UploadName: types.Filename("url_preview.json"),
CreationTimestamp: spec.Timestamp(time.Now().Unix()),
Base64Hash: hash,
UserID: types.MatrixUserID(user.UserID),
}
_, _, err = fileutils.MoveFileWithHashCheck(tmpDir, mediaMetaData, cfg.AbsBasePath, logger)
if err != nil {
return err
}
err = db.StoreMediaMetadata(ctx, mediaMetaData)
if err != nil {
logger.WithError(err).Error("unable to store media metadata")
return err
}
return nil
}
func loadUrlPreviewResponse(ctx context.Context, cfg *config.MediaAPI, db storage.Database, hash types.Base64Hash, logger *log.Entry) (*types.UrlPreview, error) {
if mediaMetadata, err := db.GetMediaMetadataByHash(ctx, hash, cfg.Matrix.ServerName); err == nil && mediaMetadata != nil {
// Get the response file
filePath, err := fileutils.GetPathFromBase64Hash(mediaMetadata.Base64Hash, cfg.AbsBasePath)
if err != nil {
return nil, err
}
data, err := os.ReadFile(string(filePath))
if err != nil {
return nil, err
}
var preview types.UrlPreview
err = json.Unmarshal(data, &preview)
if err != nil {
return nil, err
}
return &preview, nil
}
return nil, ErrNoMetadataFound
}
func getHashFromString(s string) types.Base64Hash {
hasher := sha256.New()
hasher.Write([]byte(s))
return types.Base64Hash(base64.RawURLEncoding.EncodeToString(hasher.Sum(nil)))
}
func getMetaFieldsFromHTML(resp *http.Response) map[string]string {

View file

@ -283,24 +283,24 @@ func CreateThumbnailFromFile(
dst types.Path,
config types.ThumbnailSize,
logger *log.Entry,
) (err error) {
) (width int, height int, err error) {
img, err := readFile(string(src))
if err != nil {
logger.WithError(err).WithFields(log.Fields{
"src": src,
}).Error("Failed to read src file")
return err
}).Error("Failed to read image")
return 0, 0, err
}
// Check if request is larger than original
if config.Width >= img.Bounds().Dx() && config.Height >= img.Bounds().Dy() {
return ErrThumbnailTooLarge
return img.Bounds().Dx(), img.Bounds().Dy(), ErrThumbnailTooLarge
}
start := time.Now()
width, height, err := adjustSize(dst, img, config.Width, config.Height, config.ResizeMethod == types.Crop, logger)
width, height, err = adjustSize(dst, img, config.Width, config.Height, config.ResizeMethod == types.Crop, logger)
if err != nil {
return err
return 0, 0, err
}
logger.WithFields(log.Fields{
"ActualWidth": width,
@ -308,5 +308,9 @@ func CreateThumbnailFromFile(
"processTime": time.Since(start),
}).Info("Generated thumbnail")
return nil
return width, height, nil
}
func ReadFile(src string) (image.Image, error) {
return readFile(src)
}