2017-05-26 02:57:09 -05:00
// Copyright 2017 Vector Creations Ltd
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
2017-10-11 12:16:53 -05:00
package routing
2017-05-26 02:57:09 -05:00
import (
2017-09-21 09:44:00 -05:00
"context"
2020-08-25 09:08:37 -05:00
"crypto/rand"
"encoding/hex"
2017-05-26 02:57:09 -05:00
"fmt"
2017-05-26 10:34:58 -05:00
"io"
2017-05-26 02:57:09 -05:00
"net/http"
"net/url"
2022-04-14 07:32:48 -05:00
"os"
2017-05-26 09:49:54 -05:00
"path"
2017-11-22 09:45:03 -06:00
"strings"
2017-05-26 02:57:09 -05:00
2017-05-26 09:49:54 -05:00
"github.com/matrix-org/dendrite/mediaapi/fileutils"
"github.com/matrix-org/dendrite/mediaapi/storage"
2017-06-06 18:12:49 -05:00
"github.com/matrix-org/dendrite/mediaapi/thumbnailer"
2017-05-26 02:57:09 -05:00
"github.com/matrix-org/dendrite/mediaapi/types"
2020-12-02 11:41:00 -06:00
"github.com/matrix-org/dendrite/setup/config"
2020-08-26 09:38:34 -05:00
userapi "github.com/matrix-org/dendrite/userapi/api"
2017-07-07 08:11:32 -05:00
"github.com/matrix-org/gomatrixserverlib"
2023-05-09 17:46:49 -05:00
"github.com/matrix-org/gomatrixserverlib/spec"
2017-05-26 02:57:09 -05:00
"github.com/matrix-org/util"
2017-11-15 05:13:09 -06:00
log "github.com/sirupsen/logrus"
2017-05-26 02:57:09 -05:00
)
// uploadRequest metadata included in or derivable from an upload request
// https://matrix.org/docs/spec/client_server/r0.2.0.html#post-matrix-media-r0-upload
// NOTE: The members come from HTTP request metadata such as headers, query parameters or can be derived from such
type uploadRequest struct {
MediaMetadata * types . MediaMetadata
Logger * log . Entry
}
2017-05-26 03:03:16 -05:00
// uploadResponse defines the format of the JSON response
2017-05-26 02:57:09 -05:00
// https://matrix.org/docs/spec/client_server/r0.2.0.html#post-matrix-media-r0-upload
type uploadResponse struct {
ContentURI string ` json:"content_uri" `
}
2019-07-18 02:40:10 -05:00
// Upload implements POST /upload
2017-05-26 02:57:09 -05:00
// This endpoint involves uploading potentially significant amounts of data to the homeserver.
// This implementation supports a configurable maximum file size limit in bytes. If a user tries to upload more than this, they will receive an error that their upload is too large.
// Uploaded files are processed piece-wise to avoid DoS attacks which would starve the server of memory.
// TODO: We should time out requests if they have not received any data within a configured timeout period.
2020-08-26 09:38:34 -05:00
func Upload ( req * http . Request , cfg * config . MediaAPI , dev * userapi . Device , db storage . Database , activeThumbnailGeneration * types . ActiveThumbnailGeneration ) util . JSONResponse {
r , resErr := parseAndValidateRequest ( req , cfg , dev )
2017-05-26 02:57:09 -05:00
if resErr != nil {
return * resErr
}
2017-09-21 09:44:00 -05:00
if resErr = r . doUpload ( req . Context ( ) , req . Body , cfg , db , activeThumbnailGeneration ) ; resErr != nil {
2017-05-26 10:34:58 -05:00
return * resErr
}
return util . JSONResponse {
2018-03-13 10:55:45 -05:00
Code : http . StatusOK ,
2017-05-26 10:34:58 -05:00
JSON : uploadResponse {
2017-06-19 09:21:04 -05:00
ContentURI : fmt . Sprintf ( "mxc://%s/%s" , cfg . Matrix . ServerName , r . MediaMetadata . MediaID ) ,
2017-05-26 10:34:58 -05:00
} ,
}
}
// parseAndValidateRequest parses the incoming upload request to validate and extract
// all the metadata about the media being uploaded.
// Returns either an uploadRequest or an error formatted as a util.JSONResponse
2020-08-26 09:38:34 -05:00
func parseAndValidateRequest ( req * http . Request , cfg * config . MediaAPI , dev * userapi . Device ) ( * uploadRequest , * util . JSONResponse ) {
2017-05-26 10:34:58 -05:00
r := & uploadRequest {
MediaMetadata : & types . MediaMetadata {
2017-06-19 09:21:04 -05:00
Origin : cfg . Matrix . ServerName ,
2017-05-26 10:34:58 -05:00
FileSizeBytes : types . FileSizeBytes ( req . ContentLength ) ,
ContentType : types . ContentType ( req . Header . Get ( "Content-Type" ) ) ,
UploadName : types . Filename ( url . PathEscape ( req . FormValue ( "filename" ) ) ) ,
2020-08-26 09:38:34 -05:00
UserID : types . MatrixUserID ( dev . UserID ) ,
2017-05-26 10:34:58 -05:00
} ,
2017-06-19 09:21:04 -05:00
Logger : util . GetLogger ( req . Context ( ) ) . WithField ( "Origin" , cfg . Matrix . ServerName ) ,
2017-05-26 10:34:58 -05:00
}
2022-05-02 03:47:16 -05:00
if resErr := r . Validate ( cfg . MaxFileSizeBytes ) ; resErr != nil {
2017-05-26 10:34:58 -05:00
return nil , resErr
}
return r , nil
}
2020-08-25 09:08:37 -05:00
func ( r * uploadRequest ) generateMediaID ( ctx context . Context , db storage . Database ) ( types . MediaID , error ) {
for {
// First try generating a meda ID. We'll do this by
// generating some random bytes and then hex-encoding.
mediaIDBytes := make ( [ ] byte , 32 )
_ , err := rand . Read ( mediaIDBytes )
if err != nil {
return "" , fmt . Errorf ( "rand.Read: %w" , err )
}
mediaID := types . MediaID ( hex . EncodeToString ( mediaIDBytes ) )
// Then we will check if this media ID already exists in
// our database. If it does then we had best generate a
// new one.
existingMetadata , err := db . GetMediaMetadata ( ctx , mediaID , r . MediaMetadata . Origin )
if err != nil {
return "" , fmt . Errorf ( "db.GetMediaMetadata: %w" , err )
}
if existingMetadata != nil {
// The media ID was already used - repeat the process
// and generate a new one instead.
continue
}
// The media ID was not already used - let's return that.
return mediaID , nil
}
}
2017-09-21 09:44:00 -05:00
func ( r * uploadRequest ) doUpload (
ctx context . Context ,
reqReader io . Reader ,
2020-08-10 08:18:04 -05:00
cfg * config . MediaAPI ,
2020-01-03 08:07:05 -06:00
db storage . Database ,
2017-09-21 09:44:00 -05:00
activeThumbnailGeneration * types . ActiveThumbnailGeneration ,
) * util . JSONResponse {
2017-05-26 09:49:54 -05:00
r . Logger . WithFields ( log . Fields {
2017-05-26 10:24:13 -05:00
"UploadName" : r . MediaMetadata . UploadName ,
"FileSizeBytes" : r . MediaMetadata . FileSizeBytes ,
2017-05-31 07:54:10 -05:00
"ContentType" : r . MediaMetadata . ContentType ,
2017-05-26 09:49:54 -05:00
} ) . Info ( "Uploading file" )
// The file data is hashed and the hash is used as the MediaID. The hash is useful as a
// method of deduplicating files to save storage, as well as a way to conduct
// integrity checks on the file data in the repository.
2017-05-31 00:10:01 -05:00
// Data is truncated to maxFileSizeBytes. Content-Length was reported as 0 < Content-Length <= maxFileSizeBytes so this is OK.
2020-08-26 09:38:34 -05:00
//
// TODO: This has a bad API shape where you either need to call:
// fileutils.RemoveDir(tmpDir, r.Logger)
// or call:
// r.storeFileAndMetadata(ctx, tmpDir, ...)
// before you return from doUpload else we will leak a temp file. We could make this nicer with a `WithTransaction` style of
// nested function to guarantee either storage or cleanup.
2022-05-02 03:47:16 -05:00
if cfg . MaxFileSizeBytes > 0 {
if cfg . MaxFileSizeBytes + 1 <= 0 {
2021-07-19 11:58:51 -05:00
r . Logger . WithFields ( log . Fields {
2022-05-02 03:47:16 -05:00
"MaxFileSizeBytes" : cfg . MaxFileSizeBytes ,
2021-07-19 11:58:51 -05:00
} ) . Warnf ( "Configured MaxFileSizeBytes overflows int64, defaulting to %d bytes" , config . DefaultMaxFileSizeBytes )
2022-05-02 03:47:16 -05:00
cfg . MaxFileSizeBytes = config . DefaultMaxFileSizeBytes
2021-06-14 08:12:03 -05:00
}
2022-05-02 03:47:16 -05:00
reqReader = io . LimitReader ( reqReader , int64 ( cfg . MaxFileSizeBytes ) + 1 )
2021-06-14 08:12:03 -05:00
}
2021-07-19 11:58:51 -05:00
hash , bytesWritten , tmpDir , err := fileutils . WriteTempFile ( ctx , reqReader , cfg . AbsBasePath )
2017-05-26 10:50:16 -05:00
if err != nil {
r . Logger . WithError ( err ) . WithFields ( log . Fields {
2022-05-02 03:47:16 -05:00
"MaxFileSizeBytes" : cfg . MaxFileSizeBytes ,
2017-05-26 10:50:16 -05:00
} ) . Warn ( "Error while transferring file" )
2017-05-26 10:34:58 -05:00
return & util . JSONResponse {
2018-03-13 10:55:45 -05:00
Code : http . StatusBadRequest ,
2023-05-09 17:46:49 -05:00
JSON : spec . Unknown ( "Failed to upload" ) ,
2017-05-26 09:49:54 -05:00
}
}
2021-04-14 04:53:24 -05:00
// Check if temp file size exceeds max file size configuration
2022-05-02 03:47:16 -05:00
if cfg . MaxFileSizeBytes > 0 && bytesWritten > types . FileSizeBytes ( cfg . MaxFileSizeBytes ) {
2021-04-14 04:53:24 -05:00
fileutils . RemoveDir ( tmpDir , r . Logger ) // delete temp file
2022-05-02 03:47:16 -05:00
return requestEntityTooLargeJSONResponse ( cfg . MaxFileSizeBytes )
2021-04-14 04:53:24 -05:00
}
2020-08-25 09:08:37 -05:00
// Look up the media by the file hash. If we already have the file but under a
// different media ID then we won't upload the file again - instead we'll just
// add a new metadata entry that refers to the same file.
existingMetadata , err := db . GetMediaMetadataByHash (
ctx , hash , r . MediaMetadata . Origin ,
2017-09-21 09:44:00 -05:00
)
2017-05-31 07:52:45 -05:00
if err != nil {
2020-08-26 09:38:34 -05:00
fileutils . RemoveDir ( tmpDir , r . Logger )
2020-08-25 09:08:37 -05:00
r . Logger . WithError ( err ) . Error ( "Error querying the database by hash." )
2023-05-16 19:33:27 -05:00
return & util . JSONResponse {
Code : http . StatusInternalServerError ,
JSON : spec . InternalServerError { } ,
}
2017-05-31 07:52:45 -05:00
}
2020-08-25 09:08:37 -05:00
if existingMetadata != nil {
2020-08-26 09:38:34 -05:00
// The file already exists, delete the uploaded temporary file.
defer fileutils . RemoveDir ( tmpDir , r . Logger )
2020-08-25 09:08:37 -05:00
// The file already exists. Make a new media ID up for it.
mediaID , merr := r . generateMediaID ( ctx , db )
if merr != nil {
r . Logger . WithError ( merr ) . Error ( "Failed to generate media ID for existing file" )
2023-05-16 19:33:27 -05:00
return & util . JSONResponse {
Code : http . StatusInternalServerError ,
JSON : spec . InternalServerError { } ,
}
2020-08-25 09:08:37 -05:00
}
2017-05-31 07:52:45 -05:00
2020-08-25 09:08:37 -05:00
// Then amend the upload metadata.
r . MediaMetadata = & types . MediaMetadata {
MediaID : mediaID ,
Origin : r . MediaMetadata . Origin ,
ContentType : r . MediaMetadata . ContentType ,
FileSizeBytes : r . MediaMetadata . FileSizeBytes ,
CreationTimestamp : r . MediaMetadata . CreationTimestamp ,
UploadName : r . MediaMetadata . UploadName ,
Base64Hash : hash ,
UserID : r . MediaMetadata . UserID ,
}
} else {
// The file doesn't exist. Update the request metadata.
r . MediaMetadata . FileSizeBytes = bytesWritten
r . MediaMetadata . Base64Hash = hash
r . MediaMetadata . MediaID , err = r . generateMediaID ( ctx , db )
if err != nil {
2020-08-26 09:38:34 -05:00
fileutils . RemoveDir ( tmpDir , r . Logger )
2020-08-25 09:08:37 -05:00
r . Logger . WithError ( err ) . Error ( "Failed to generate media ID for new upload" )
2023-05-16 19:33:27 -05:00
return & util . JSONResponse {
Code : http . StatusInternalServerError ,
JSON : spec . InternalServerError { } ,
}
2017-05-26 09:49:54 -05:00
}
}
2020-08-25 09:08:37 -05:00
r . Logger = r . Logger . WithField ( "media_id" , r . MediaMetadata . MediaID )
r . Logger . WithFields ( log . Fields {
"Base64Hash" : r . MediaMetadata . Base64Hash ,
"UploadName" : r . MediaMetadata . UploadName ,
"FileSizeBytes" : r . MediaMetadata . FileSizeBytes ,
"ContentType" : r . MediaMetadata . ContentType ,
} ) . Info ( "File uploaded" )
2017-11-15 05:13:09 -06:00
return r . storeFileAndMetadata (
2020-08-10 08:18:04 -05:00
ctx , tmpDir , cfg . AbsBasePath , db , cfg . ThumbnailSizes ,
activeThumbnailGeneration , cfg . MaxThumbnailGenerators ,
2017-11-15 05:13:09 -06:00
)
2017-05-26 02:57:09 -05:00
}
2021-04-14 04:53:24 -05:00
func requestEntityTooLargeJSONResponse ( maxFileSizeBytes config . FileSizeBytes ) * util . JSONResponse {
return & util . JSONResponse {
Code : http . StatusRequestEntityTooLarge ,
2023-05-09 17:46:49 -05:00
JSON : spec . Unknown ( fmt . Sprintf ( "HTTP Content-Length is greater than the maximum allowed upload size (%v)." , maxFileSizeBytes ) ) ,
2021-04-14 04:53:24 -05:00
}
}
2017-05-26 02:57:09 -05:00
// Validate validates the uploadRequest fields
2017-06-19 09:21:04 -05:00
func ( r * uploadRequest ) Validate ( maxFileSizeBytes config . FileSizeBytes ) * util . JSONResponse {
if maxFileSizeBytes > 0 && r . MediaMetadata . FileSizeBytes > types . FileSizeBytes ( maxFileSizeBytes ) {
2021-04-14 04:53:24 -05:00
return requestEntityTooLargeJSONResponse ( maxFileSizeBytes )
2017-05-26 02:57:09 -05:00
}
2017-11-22 09:45:03 -06:00
if strings . HasPrefix ( string ( r . MediaMetadata . UploadName ) , "~" ) {
2017-05-26 08:26:50 -05:00
return & util . JSONResponse {
2018-03-13 10:55:45 -05:00
Code : http . StatusBadRequest ,
2023-05-09 17:46:49 -05:00
JSON : spec . Unknown ( "File name must not begin with '~'." ) ,
2017-05-26 08:26:50 -05:00
}
}
2017-05-26 02:57:09 -05:00
// TODO: Validate filename - what are the valid characters?
if r . MediaMetadata . UserID != "" {
// TODO: We should put user ID parsing code into gomatrixserverlib and use that instead
// (see https://github.com/matrix-org/gomatrixserverlib/blob/3394e7c7003312043208aa73727d2256eea3d1f6/eventcontent.go#L347 )
// It should be a struct (with pointers into a single string to avoid copying) and
// we should update all refs to use UserID types rather than strings.
// https://github.com/matrix-org/synapse/blob/v0.19.2/synapse/types.py#L92
2017-07-07 08:11:32 -05:00
if _ , _ , err := gomatrixserverlib . SplitID ( '@' , string ( r . MediaMetadata . UserID ) ) ; err != nil {
2017-05-26 02:57:09 -05:00
return & util . JSONResponse {
2018-03-13 10:55:45 -05:00
Code : http . StatusBadRequest ,
2023-05-09 17:46:49 -05:00
JSON : spec . BadJSON ( "user id must be in the form @localpart:domain" ) ,
2017-05-26 02:57:09 -05:00
}
}
}
return nil
}
2017-05-26 09:49:54 -05:00
2017-05-26 10:42:08 -05:00
// storeFileAndMetadata moves the temporary file to its final path based on metadata and stores the metadata in the database
// See getPathFromMediaMetadata in fileutils for details of the final path.
// The order of operations is important as it avoids metadata entering the database before the file
// is ready, and if we fail to move the file, it never gets added to the database.
// Returns a util.JSONResponse error and cleans up directories in case of error.
2017-09-21 09:44:00 -05:00
func ( r * uploadRequest ) storeFileAndMetadata (
ctx context . Context ,
tmpDir types . Path ,
absBasePath config . Path ,
2020-01-03 08:07:05 -06:00
db storage . Database ,
2017-09-21 09:44:00 -05:00
thumbnailSizes [ ] config . ThumbnailSize ,
activeThumbnailGeneration * types . ActiveThumbnailGeneration ,
maxThumbnailGenerators int ,
) * util . JSONResponse {
2017-05-26 09:49:54 -05:00
finalPath , duplicate , err := fileutils . MoveFileWithHashCheck ( tmpDir , r . MediaMetadata , a bsBasePath , r . Logger )
if err != nil {
r . Logger . WithError ( err ) . Error ( "Failed to move file." )
return & util . JSONResponse {
2018-03-13 10:55:45 -05:00
Code : http . StatusBadRequest ,
2023-05-09 17:46:49 -05:00
JSON : spec . Unknown ( "Failed to upload" ) ,
2017-05-26 09:49:54 -05:00
}
}
if duplicate {
r . Logger . WithField ( "dst" , finalPath ) . Info ( "File was stored previously - discarding duplicate" )
}
2017-09-21 09:44:00 -05:00
if err = db . StoreMediaMetadata ( ctx , r . MediaMetadata ) ; err != nil {
2017-05-26 09:49:54 -05:00
r . Logger . WithError ( err ) . Warn ( "Failed to store metadata" )
// If the file is a duplicate (has the same hash as an existing file) then
// there is valid metadata in the database for that file. As such we only
// remove the file if it is not a duplicate.
2017-09-20 08:15:38 -05:00
if ! duplicate {
2017-05-26 10:15:54 -05:00
fileutils . RemoveDir ( types . Path ( path . Dir ( string ( finalPath ) ) ) , r . Logger )
2017-05-26 09:49:54 -05:00
}
return & util . JSONResponse {
2018-03-13 10:55:45 -05:00
Code : http . StatusBadRequest ,
2023-05-09 17:46:49 -05:00
JSON : spec . Unknown ( "Failed to upload" ) ,
2017-05-26 09:49:54 -05:00
}
}
2017-06-06 18:12:49 -05:00
go func ( ) {
2022-04-14 07:32:48 -05:00
file , err := os . Open ( string ( finalPath ) )
if err != nil {
r . Logger . WithError ( err ) . Error ( "unable to open file" )
return
}
defer file . Close ( ) // nolint: errcheck
// http.DetectContentType only needs 512 bytes
buf := make ( [ ] byte , 512 )
_ , err = file . Read ( buf )
if err != nil {
r . Logger . WithError ( err ) . Error ( "unable to read file" )
return
}
// Check if we need to generate thumbnails
fileType := http . DetectContentType ( buf )
if ! strings . HasPrefix ( fileType , "image" ) {
r . Logger . WithField ( "contentType" , fileType ) . Debugf ( "uploaded file is not an image or can not be thumbnailed, not generating thumbnails" )
return
}
2017-09-21 09:44:00 -05:00
busy , err := thumbnailer . GenerateThumbnails (
context . Background ( ) , finalPath , thumbnailSizes , r . MediaMetadata ,
activeThumbnailGeneration , maxThumbnailGenerators , db , r . Logger ,
)
2017-06-06 18:12:49 -05:00
if err != nil {
r . Logger . WithError ( err ) . Warn ( "Error generating thumbnails" )
}
if busy {
r . Logger . Warn ( "Maximum number of active thumbnail generators reached. Skipping pre-generation." )
}
} ( )
2017-05-26 09:49:54 -05:00
return nil
}