From 05e88d81cb44756f33cacdc0301ce33002fe37c0 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Fri, 26 May 2017 16:49:54 +0200 Subject: [PATCH 01/15] mediaapi: Add database storage and upload handler --- .../cmd/dendrite-media-api-server/main.go | 8 +- .../dendrite/mediaapi/fileutils/fileutils.go | 207 ++++++++++++++++++ .../dendrite/mediaapi/routing/routing.go | 6 +- .../storage/media_repository_table.go | 112 ++++++++++ .../dendrite/mediaapi/storage/prepare.go | 37 ++++ .../dendrite/mediaapi/storage/sql.go | 33 +++ .../dendrite/mediaapi/storage/storage.go | 56 +++++ .../dendrite/mediaapi/writers/upload.go | 109 ++++++++- 8 files changed, 561 insertions(+), 7 deletions(-) create mode 100644 src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go create mode 100644 src/github.com/matrix-org/dendrite/mediaapi/storage/media_repository_table.go create mode 100644 src/github.com/matrix-org/dendrite/mediaapi/storage/prepare.go create mode 100644 src/github.com/matrix-org/dendrite/mediaapi/storage/sql.go create mode 100644 src/github.com/matrix-org/dendrite/mediaapi/storage/storage.go diff --git a/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go b/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go index bfc1ee0ec..298762482 100644 --- a/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go +++ b/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go @@ -23,6 +23,7 @@ import ( "github.com/matrix-org/dendrite/common" "github.com/matrix-org/dendrite/mediaapi/config" "github.com/matrix-org/dendrite/mediaapi/routing" + "github.com/matrix-org/dendrite/mediaapi/storage" "github.com/matrix-org/dendrite/mediaapi/types" "github.com/matrix-org/gomatrixserverlib" @@ -69,6 +70,11 @@ func main() { DataSource: dataSource, } + db, err := storage.Open(cfg.DataSource) + if err != nil { + log.WithError(err).Panic("Failed to open database") + } + log.WithFields(log.Fields{ "BASE_PATH": absBasePath, "BIND_ADDRESS": bindAddr, @@ -78,6 +84,6 @@ func main() { "SERVER_NAME": serverName, }).Info("Starting mediaapi") - routing.Setup(http.DefaultServeMux, http.DefaultClient, cfg) + routing.Setup(http.DefaultServeMux, http.DefaultClient, cfg, db) log.Fatal(http.ListenAndServe(bindAddr, nil)) } diff --git a/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go b/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go new file mode 100644 index 000000000..1305f433e --- /dev/null +++ b/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go @@ -0,0 +1,207 @@ +// Copyright 2017 Vector Creations Ltd +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package fileutils + +import ( + "bufio" + "crypto/sha256" + "encoding/base64" + "fmt" + "io" + "io/ioutil" + "os" + "path" + "path/filepath" + "strings" + + log "github.com/Sirupsen/logrus" + "github.com/matrix-org/dendrite/mediaapi/types" +) + +// RemoveDir removes a directory and logs a warning in case of errors +func RemoveDir(dir types.Path, logger *log.Entry) { + dirErr := os.RemoveAll(string(dir)) + if dirErr != nil { + logger.WithError(dirErr).WithField("dir", dir).Warn("Failed to remove directory") + } +} + +// createTempDir creates a tmp/ directory within baseDirectory and returns its path +func createTempDir(baseDirectory types.Path) (types.Path, error) { + baseTmpDir := path.Join(string(baseDirectory), "tmp") + if err := os.MkdirAll(baseTmpDir, 0770); err != nil { + return "", fmt.Errorf("Failed to create base temp dir: %v", err) + } + tmpDir, err := ioutil.TempDir(baseTmpDir, "") + if err != nil { + return "", fmt.Errorf("Failed to create temp dir: %v", err) + } + return types.Path(tmpDir), nil +} + +// createFileWriter creates a buffered file writer with a new file at directory/filename +// Returns the file handle as it needs to be closed when writing is complete +func createFileWriter(directory types.Path, filename types.Filename) (*bufio.Writer, *os.File, error) { + filePath := path.Join(string(directory), string(filename)) + file, err := os.Create(filePath) + if err != nil { + return nil, nil, fmt.Errorf("Failed to create file: %v", err) + } + + return bufio.NewWriter(file), file, nil +} + +func createTempFileWriter(absBasePath types.Path) (*bufio.Writer, *os.File, types.Path, error) { + tmpDir, err := createTempDir(absBasePath) + if err != nil { + return nil, nil, "", fmt.Errorf("Failed to create temp dir: %q", err) + } + writer, tmpFile, err := createFileWriter(tmpDir, "content") + if err != nil { + return nil, nil, "", fmt.Errorf("Failed to create file writer: %q", err) + } + return writer, tmpFile, tmpDir, nil +} + +var ( + // ErrFileIsTooLarge indicates that the uploaded file is larger than the configured maximum file size + ErrFileIsTooLarge = fmt.Errorf("file is too large") + errRead = fmt.Errorf("failed to read response from remote server") + errResponse = fmt.Errorf("failed to write file data to response body") + errHash = fmt.Errorf("failed to hash file data") + errWrite = fmt.Errorf("failed to write file to disk") +) + +// WriteTempFile writes to a new temporary file +func WriteTempFile(reqReader io.Reader, maxFileSizeBytes types.FileSizeBytes, absBasePath types.Path) (types.Base64Hash, types.FileSizeBytes, types.Path, error) { + tmpFileWriter, tmpFile, tmpDir, err := createTempFileWriter(absBasePath) + if err != nil { + return "", -1, "", err + } + defer tmpFile.Close() + + limitedReader := io.LimitReader(reqReader, int64(maxFileSizeBytes)) + // Hash the file data. The hash will be returned. The hash is useful as a + // method of deduplicating files to save storage, as well as a way to conduct + // integrity checks on the file data in the repository. + hasher := sha256.New() + teeReader := io.TeeReader(limitedReader, hasher) + bytesWritten, err := io.Copy(tmpFileWriter, teeReader) + if err != nil && err != io.EOF { + return "", -1, "", err + } + + tmpFileWriter.Flush() + + hash := hasher.Sum(nil) + return types.Base64Hash(base64.URLEncoding.EncodeToString(hash[:])), types.FileSizeBytes(bytesWritten), tmpDir, nil +} + +// GetPathFromBase64Hash evaluates the path to a media file from its Base64Hash +// If the Base64Hash is long enough, we split it into pieces, creating up to 2 subdirectories +// for more manageable browsing and use the remainder as the file name. +// For example, if Base64Hash is 'qwerty', the path will be 'q/w/erty'. +func GetPathFromBase64Hash(base64Hash types.Base64Hash, absBasePath types.Path) (string, error) { + var subPath, fileName string + + hashLen := len(base64Hash) + + switch { + case hashLen < 1: + return "", fmt.Errorf("Invalid filePath (Base64Hash too short): %q", base64Hash) + case hashLen > 255: + return "", fmt.Errorf("Invalid filePath (Base64Hash too long - max 255 characters): %q", base64Hash) + case hashLen < 2: + subPath = "" + fileName = string(base64Hash) + case hashLen < 3: + subPath = string(base64Hash[0:1]) + fileName = string(base64Hash[1:]) + default: + subPath = path.Join( + string(base64Hash[0:1]), + string(base64Hash[1:2]), + ) + fileName = string(base64Hash[2:]) + } + + filePath, err := filepath.Abs(path.Join( + string(absBasePath), + subPath, + fileName, + )) + if err != nil { + return "", fmt.Errorf("Unable to construct filePath: %q", err) + } + + // check if the absolute absBasePath is a prefix of the absolute filePath + // if so, no directory escape has occurred and the filePath is valid + // Note: absBasePath is already absolute + if strings.HasPrefix(filePath, string(absBasePath)) == false { + return "", fmt.Errorf("Invalid filePath (not within absBasePath %v): %v", absBasePath, filePath) + } + + return filePath, nil +} + +// moveFile attempts to move the file src to dst +func moveFile(src types.Path, dst types.Path) error { + dstDir := path.Dir(string(dst)) + + err := os.MkdirAll(dstDir, 0770) + if err != nil { + return fmt.Errorf("Failed to make directory: %q", err) + } + err = os.Rename(string(src), string(dst)) + if err != nil { + return fmt.Errorf("Failed to move directory: %q", err) + } + return nil +} + +// MoveFileWithHashCheck checks for hash collisions when moving a temporary file to its destination based on metadata +// Check if destination file exists. As the destination is based on a hash of the file data, +// if it exists and the file size does not match then there is a hash collision for two different files. If +// it exists and the file size matches, it is believable that it is the same file and we can just +// discard the temporary file. +func MoveFileWithHashCheck(tmpDir types.Path, mediaMetadata *types.MediaMetadata, absBasePath types.Path, logger *log.Entry) (string, bool, error) { + duplicate := false + finalPath, err := GetPathFromBase64Hash(mediaMetadata.Base64Hash, absBasePath) + if err != nil { + RemoveDir(tmpDir, logger) + return "", duplicate, fmt.Errorf("failed to get file path from metadata: %q", err) + } + + var stat os.FileInfo + if stat, err = os.Stat(finalPath); os.IsExist(err) { + duplicate = true + if stat.Size() == int64(mediaMetadata.FileSizeBytes) { + RemoveDir(tmpDir, logger) + return finalPath, duplicate, nil + } + // Remove the tmpDir as we anyway cannot cache the file on disk due to the hash collision + RemoveDir(tmpDir, logger) + return "", duplicate, fmt.Errorf("downloaded file with hash collision but different file size (%v)", finalPath) + } + err = moveFile( + types.Path(path.Join(string(tmpDir), "content")), + types.Path(finalPath), + ) + if err != nil { + RemoveDir(tmpDir, logger) + return "", duplicate, fmt.Errorf("failed to move file to final destination (%v): %q", finalPath, err) + } + return finalPath, duplicate, nil +} diff --git a/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go b/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go index bc06d3320..666a102ab 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go @@ -21,6 +21,7 @@ import ( "github.com/gorilla/mux" "github.com/matrix-org/dendrite/common" "github.com/matrix-org/dendrite/mediaapi/config" + "github.com/matrix-org/dendrite/mediaapi/storage" "github.com/matrix-org/dendrite/mediaapi/types" "github.com/matrix-org/dendrite/mediaapi/writers" "github.com/matrix-org/gomatrixserverlib" @@ -32,11 +33,12 @@ const pathPrefixR0 = "/_matrix/media/v1" // Setup registers HTTP handlers with the given ServeMux. It also supplies the given http.Client // to clients which need to make outbound HTTP requests. -func Setup(servMux *http.ServeMux, httpClient *http.Client, cfg *config.MediaAPI) { +func Setup(servMux *http.ServeMux, httpClient *http.Client, cfg *config.MediaAPI, db *storage.Database) { apiMux := mux.NewRouter() r0mux := apiMux.PathPrefix(pathPrefixR0).Subrouter() + // FIXME: /upload should use common.MakeAuthAPI() r0mux.Handle("/upload", common.MakeAPI("upload", func(req *http.Request) util.JSONResponse { - return writers.Upload(req, cfg) + return writers.Upload(req, cfg, db) })) activeRemoteRequests := &types.ActiveRemoteRequests{ diff --git a/src/github.com/matrix-org/dendrite/mediaapi/storage/media_repository_table.go b/src/github.com/matrix-org/dendrite/mediaapi/storage/media_repository_table.go new file mode 100644 index 000000000..a3b1c7594 --- /dev/null +++ b/src/github.com/matrix-org/dendrite/mediaapi/storage/media_repository_table.go @@ -0,0 +1,112 @@ +// Copyright 2017 Vector Creations Ltd +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package storage + +import ( + "database/sql" + "time" + + "github.com/matrix-org/dendrite/mediaapi/types" + "github.com/matrix-org/gomatrixserverlib" +) + +const mediaSchema = ` +-- The media_repository table holds metadata for each media file stored and accessible to the local server, +-- the actual file is stored separately. +CREATE TABLE IF NOT EXISTS media_repository ( + -- The id used to refer to the media. + -- For uploads to this server this is a base64-encoded sha256 hash of the file data + -- For media from remote servers, this can be any unique identifier string + media_id TEXT NOT NULL, + -- The origin of the media as requested by the client. Should be a homeserver domain. + media_origin TEXT NOT NULL, + -- The MIME-type of the media file as specified when uploading. + content_type TEXT NOT NULL, + -- The HTTP Content-Disposition header for the media file as specified when uploading. + content_disposition TEXT NOT NULL, + -- Size of the media file in bytes. + file_size_bytes BIGINT NOT NULL, + -- When the content was uploaded in UNIX epoch ms. + creation_ts BIGINT NOT NULL, + -- The file name with which the media was uploaded. + upload_name TEXT NOT NULL, + -- A golang base64 URLEncoding string representation of a SHA-256 hash sum of the file data. + base64hash TEXT NOT NULL, + -- The user who uploaded the file. Should be a Matrix user ID. + user_id TEXT NOT NULL +); +CREATE UNIQUE INDEX IF NOT EXISTS media_repository_index ON media_repository (media_id, media_origin); +` + +const insertMediaSQL = ` +INSERT INTO media_repository (media_id, media_origin, content_type, content_disposition, file_size_bytes, creation_ts, upload_name, base64hash, user_id) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9) +` + +const selectMediaSQL = ` +SELECT content_type, content_disposition, file_size_bytes, creation_ts, upload_name, base64hash, user_id FROM media_repository WHERE media_id = $1 AND media_origin = $2 +` + +type mediaStatements struct { + insertMediaStmt *sql.Stmt + selectMediaStmt *sql.Stmt +} + +func (s *mediaStatements) prepare(db *sql.DB) (err error) { + _, err = db.Exec(mediaSchema) + if err != nil { + return + } + + return statementList{ + {&s.insertMediaStmt, insertMediaSQL}, + {&s.selectMediaStmt, selectMediaSQL}, + }.prepare(db) +} + +func (s *mediaStatements) insertMedia(mediaMetadata *types.MediaMetadata) error { + mediaMetadata.CreationTimestamp = types.UnixMs(time.Now().UnixNano() / 1000000) + _, err := s.insertMediaStmt.Exec( + mediaMetadata.MediaID, + mediaMetadata.Origin, + mediaMetadata.ContentType, + mediaMetadata.ContentDisposition, + mediaMetadata.FileSizeBytes, + mediaMetadata.CreationTimestamp, + mediaMetadata.UploadName, + mediaMetadata.Base64Hash, + mediaMetadata.UserID, + ) + return err +} + +func (s *mediaStatements) selectMedia(mediaID types.MediaID, mediaOrigin gomatrixserverlib.ServerName) (*types.MediaMetadata, error) { + mediaMetadata := types.MediaMetadata{ + MediaID: mediaID, + Origin: mediaOrigin, + } + err := s.selectMediaStmt.QueryRow( + mediaMetadata.MediaID, mediaMetadata.Origin, + ).Scan( + &mediaMetadata.ContentType, + &mediaMetadata.ContentDisposition, + &mediaMetadata.FileSizeBytes, + &mediaMetadata.CreationTimestamp, + &mediaMetadata.UploadName, + &mediaMetadata.Base64Hash, + &mediaMetadata.UserID, + ) + return &mediaMetadata, err +} diff --git a/src/github.com/matrix-org/dendrite/mediaapi/storage/prepare.go b/src/github.com/matrix-org/dendrite/mediaapi/storage/prepare.go new file mode 100644 index 000000000..a30586de4 --- /dev/null +++ b/src/github.com/matrix-org/dendrite/mediaapi/storage/prepare.go @@ -0,0 +1,37 @@ +// Copyright 2017 Vector Creations Ltd +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// FIXME: This should be made common! + +package storage + +import ( + "database/sql" +) + +// a statementList is a list of SQL statements to prepare and a pointer to where to store the resulting prepared statement. +type statementList []struct { + statement **sql.Stmt + sql string +} + +// prepare the SQL for each statement in the list and assign the result to the prepared statement. +func (s statementList) prepare(db *sql.DB) (err error) { + for _, statement := range s { + if *statement.statement, err = db.Prepare(statement.sql); err != nil { + return + } + } + return +} diff --git a/src/github.com/matrix-org/dendrite/mediaapi/storage/sql.go b/src/github.com/matrix-org/dendrite/mediaapi/storage/sql.go new file mode 100644 index 000000000..e992e073e --- /dev/null +++ b/src/github.com/matrix-org/dendrite/mediaapi/storage/sql.go @@ -0,0 +1,33 @@ +// Copyright 2017 Vector Creations Ltd +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package storage + +import ( + "database/sql" +) + +type statements struct { + mediaStatements +} + +func (s *statements) prepare(db *sql.DB) error { + var err error + + if err = s.mediaStatements.prepare(db); err != nil { + return err + } + + return nil +} diff --git a/src/github.com/matrix-org/dendrite/mediaapi/storage/storage.go b/src/github.com/matrix-org/dendrite/mediaapi/storage/storage.go new file mode 100644 index 000000000..630809cbe --- /dev/null +++ b/src/github.com/matrix-org/dendrite/mediaapi/storage/storage.go @@ -0,0 +1,56 @@ +// Copyright 2017 Vector Creations Ltd +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package storage + +import ( + "database/sql" + + // Import the postgres database driver. + _ "github.com/lib/pq" + "github.com/matrix-org/dendrite/mediaapi/types" + "github.com/matrix-org/gomatrixserverlib" +) + +// A Database is used to store metadata about a repository of media files. +type Database struct { + statements statements + db *sql.DB +} + +// Open a postgres database. +func Open(dataSourceName string) (*Database, error) { + var d Database + var err error + if d.db, err = sql.Open("postgres", dataSourceName); err != nil { + return nil, err + } + if err = d.statements.prepare(d.db); err != nil { + return nil, err + } + return &d, nil +} + +// StoreMediaMetadata inserts the metadata about the uploaded media into the database. +// Returns an error if the combination of MediaID and Origin are not unique in the table. +func (d *Database) StoreMediaMetadata(mediaMetadata *types.MediaMetadata) error { + return d.statements.insertMedia(mediaMetadata) +} + +// GetMediaMetadata returns metadata about media stored on this server. The media could +// have been uploaded to this server or fetched from another server and cached here. +// Returns sql.ErrNoRows if there is no metadata associated with this media. +func (d *Database) GetMediaMetadata(mediaID types.MediaID, mediaOrigin gomatrixserverlib.ServerName) (*types.MediaMetadata, error) { + return d.statements.selectMedia(mediaID, mediaOrigin) +} diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go index 3b0213577..7117f246d 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go @@ -15,14 +15,18 @@ package writers import ( + "database/sql" "fmt" "net/http" "net/url" + "path" "strings" log "github.com/Sirupsen/logrus" "github.com/matrix-org/dendrite/clientapi/jsonerror" "github.com/matrix-org/dendrite/mediaapi/config" + "github.com/matrix-org/dendrite/mediaapi/fileutils" + "github.com/matrix-org/dendrite/mediaapi/storage" "github.com/matrix-org/dendrite/mediaapi/types" "github.com/matrix-org/util" ) @@ -46,13 +50,75 @@ type uploadResponse struct { // This implementation supports a configurable maximum file size limit in bytes. If a user tries to upload more than this, they will receive an error that their upload is too large. // Uploaded files are processed piece-wise to avoid DoS attacks which would starve the server of memory. // TODO: We should time out requests if they have not received any data within a configured timeout period. -func Upload(req *http.Request, cfg *config.MediaAPI) util.JSONResponse { +func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util.JSONResponse { r, resErr := parseAndValidateRequest(req, cfg) if resErr != nil { return *resErr } - // doUpload + r.Logger.WithFields(log.Fields{ + "Origin": r.MediaMetadata.Origin, + "UploadName": r.MediaMetadata.UploadName, + "FileSizeBytes": r.MediaMetadata.FileSizeBytes, + "Content-Type": r.MediaMetadata.ContentType, + "Content-Disposition": r.MediaMetadata.ContentDisposition, + }).Info("Uploading file") + + // The file data is hashed and the hash is used as the MediaID. The hash is useful as a + // method of deduplicating files to save storage, as well as a way to conduct + // integrity checks on the file data in the repository. + hash, bytesWritten, tmpDir, copyError := fileutils.WriteTempFile(req.Body, cfg.MaxFileSizeBytes, cfg.AbsBasePath) + if copyError != nil { + logFields := log.Fields{ + "Origin": r.MediaMetadata.Origin, + "MediaID": r.MediaMetadata.MediaID, + } + if copyError == fileutils.ErrFileIsTooLarge { + logFields["MaxFileSizeBytes"] = cfg.MaxFileSizeBytes + } + r.Logger.WithError(copyError).WithFields(logFields).Warn("Error while transferring file") + fileutils.RemoveDir(tmpDir, r.Logger) + return util.JSONResponse{ + Code: 400, + JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload")), + } + } + + r.MediaMetadata.FileSizeBytes = bytesWritten + r.MediaMetadata.Base64Hash = hash + r.MediaMetadata.MediaID = types.MediaID(hash) + + r.Logger.WithFields(log.Fields{ + "MediaID": r.MediaMetadata.MediaID, + "Origin": r.MediaMetadata.Origin, + "Base64Hash": r.MediaMetadata.Base64Hash, + "UploadName": r.MediaMetadata.UploadName, + "FileSizeBytes": r.MediaMetadata.FileSizeBytes, + "Content-Type": r.MediaMetadata.ContentType, + "Content-Disposition": r.MediaMetadata.ContentDisposition, + }).Info("File uploaded") + + // check if we already have a record of the media in our database and if so, we can remove the temporary directory + mediaMetadata, err := db.GetMediaMetadata(r.MediaMetadata.MediaID, r.MediaMetadata.Origin) + if err == nil { + r.MediaMetadata = mediaMetadata + fileutils.RemoveDir(tmpDir, r.Logger) + return util.JSONResponse{ + Code: 200, + JSON: uploadResponse{ + ContentURI: fmt.Sprintf("mxc://%s/%s", cfg.ServerName, r.MediaMetadata.MediaID), + }, + } + } else if err != sql.ErrNoRows { + r.Logger.WithError(err).WithField("MediaID", r.MediaMetadata.MediaID).Warn("Failed to query database") + } + + // TODO: generate thumbnails + + resErr = r.storeFileAndMetadata(tmpDir, cfg.AbsBasePath, db) + if resErr != nil { + return *resErr + } return util.JSONResponse{ Code: 200, @@ -73,8 +139,6 @@ func parseAndValidateRequest(req *http.Request, cfg *config.MediaAPI) (*uploadRe } } - // authenticate user - r := &uploadRequest{ MediaMetadata: &types.MediaMetadata{ Origin: cfg.ServerName, @@ -149,3 +213,40 @@ func (r *uploadRequest) Validate(maxFileSizeBytes types.FileSizeBytes) *util.JSO } return nil } + +// storeFileAndMetadata first moves a temporary file named content from tmpDir to its +// final path (see getPathFromMediaMetadata for details.) Once the file is moved, the +// metadata about the file is written into the media repository database. This order +// of operations is important as it avoids metadata entering the database before the file +// is ready and if we fail to move the file, it never gets added to the database. +// In case of any error, appropriate files and directories are cleaned up a +// util.JSONResponse error is returned. +func (r *uploadRequest) storeFileAndMetadata(tmpDir types.Path, absBasePath types.Path, db *storage.Database) *util.JSONResponse { + finalPath, duplicate, err := fileutils.MoveFileWithHashCheck(tmpDir, r.MediaMetadata, absBasePath, r.Logger) + if err != nil { + r.Logger.WithError(err).Error("Failed to move file.") + return &util.JSONResponse{ + Code: 400, + JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload")), + } + } + if duplicate { + r.Logger.WithField("dst", finalPath).Info("File was stored previously - discarding duplicate") + } + + if err = db.StoreMediaMetadata(r.MediaMetadata); err != nil { + r.Logger.WithError(err).Warn("Failed to store metadata") + // If the file is a duplicate (has the same hash as an existing file) then + // there is valid metadata in the database for that file. As such we only + // remove the file if it is not a duplicate. + if duplicate == false { + fileutils.RemoveDir(types.Path(path.Dir(finalPath)), r.Logger) + } + return &util.JSONResponse{ + Code: 400, + JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload")), + } + } + + return nil +} From 731c10a41885300305bdd33b7ef0c833dc6731ec Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Fri, 26 May 2017 17:15:54 +0200 Subject: [PATCH 02/15] mediaapi/fileutils: Clean up Reorder functions to have public API functions in alphabetical order at the top, internal package functions at the bottom in call order. Use RawURLEncoding to avoid padding the hash with '='. Use stronger types for paths in public API. Simplify comments. --- .../dendrite/mediaapi/fileutils/fileutils.go | 199 +++++++++--------- .../dendrite/mediaapi/writers/upload.go | 2 +- 2 files changed, 100 insertions(+), 101 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go b/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go index 1305f433e..b4ad7c9d2 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go @@ -30,51 +30,7 @@ import ( "github.com/matrix-org/dendrite/mediaapi/types" ) -// RemoveDir removes a directory and logs a warning in case of errors -func RemoveDir(dir types.Path, logger *log.Entry) { - dirErr := os.RemoveAll(string(dir)) - if dirErr != nil { - logger.WithError(dirErr).WithField("dir", dir).Warn("Failed to remove directory") - } -} - -// createTempDir creates a tmp/ directory within baseDirectory and returns its path -func createTempDir(baseDirectory types.Path) (types.Path, error) { - baseTmpDir := path.Join(string(baseDirectory), "tmp") - if err := os.MkdirAll(baseTmpDir, 0770); err != nil { - return "", fmt.Errorf("Failed to create base temp dir: %v", err) - } - tmpDir, err := ioutil.TempDir(baseTmpDir, "") - if err != nil { - return "", fmt.Errorf("Failed to create temp dir: %v", err) - } - return types.Path(tmpDir), nil -} - -// createFileWriter creates a buffered file writer with a new file at directory/filename -// Returns the file handle as it needs to be closed when writing is complete -func createFileWriter(directory types.Path, filename types.Filename) (*bufio.Writer, *os.File, error) { - filePath := path.Join(string(directory), string(filename)) - file, err := os.Create(filePath) - if err != nil { - return nil, nil, fmt.Errorf("Failed to create file: %v", err) - } - - return bufio.NewWriter(file), file, nil -} - -func createTempFileWriter(absBasePath types.Path) (*bufio.Writer, *os.File, types.Path, error) { - tmpDir, err := createTempDir(absBasePath) - if err != nil { - return nil, nil, "", fmt.Errorf("Failed to create temp dir: %q", err) - } - writer, tmpFile, err := createFileWriter(tmpDir, "content") - if err != nil { - return nil, nil, "", fmt.Errorf("Failed to create file writer: %q", err) - } - return writer, tmpFile, tmpDir, nil -} - +// FIXME: make into error types var ( // ErrFileIsTooLarge indicates that the uploaded file is larger than the configured maximum file size ErrFileIsTooLarge = fmt.Errorf("file is too large") @@ -84,31 +40,6 @@ var ( errWrite = fmt.Errorf("failed to write file to disk") ) -// WriteTempFile writes to a new temporary file -func WriteTempFile(reqReader io.Reader, maxFileSizeBytes types.FileSizeBytes, absBasePath types.Path) (types.Base64Hash, types.FileSizeBytes, types.Path, error) { - tmpFileWriter, tmpFile, tmpDir, err := createTempFileWriter(absBasePath) - if err != nil { - return "", -1, "", err - } - defer tmpFile.Close() - - limitedReader := io.LimitReader(reqReader, int64(maxFileSizeBytes)) - // Hash the file data. The hash will be returned. The hash is useful as a - // method of deduplicating files to save storage, as well as a way to conduct - // integrity checks on the file data in the repository. - hasher := sha256.New() - teeReader := io.TeeReader(limitedReader, hasher) - bytesWritten, err := io.Copy(tmpFileWriter, teeReader) - if err != nil && err != io.EOF { - return "", -1, "", err - } - - tmpFileWriter.Flush() - - hash := hasher.Sum(nil) - return types.Base64Hash(base64.URLEncoding.EncodeToString(hash[:])), types.FileSizeBytes(bytesWritten), tmpDir, nil -} - // GetPathFromBase64Hash evaluates the path to a media file from its Base64Hash // If the Base64Hash is long enough, we split it into pieces, creating up to 2 subdirectories // for more manageable browsing and use the remainder as the file name. @@ -156,6 +87,71 @@ func GetPathFromBase64Hash(base64Hash types.Base64Hash, absBasePath types.Path) return filePath, nil } +// MoveFileWithHashCheck checks for hash collisions when moving a temporary file to its final path based on metadata +// The final path is based on the hash of the file. +// If the final path exists and the file size matches, the file does not need to be moved. +// In error cases where the file is not a duplicate, the caller may decide to remove the final path. +// Returns the final path of the file, whether it is a duplicate and an error. +func MoveFileWithHashCheck(tmpDir types.Path, mediaMetadata *types.MediaMetadata, absBasePath types.Path, logger *log.Entry) (types.Path, bool, error) { + // Note: in all error and success cases, we need to remove the temporary directory + defer RemoveDir(tmpDir, logger) + duplicate := false + finalPath, err := GetPathFromBase64Hash(mediaMetadata.Base64Hash, absBasePath) + if err != nil { + return "", duplicate, fmt.Errorf("failed to get file path from metadata: %q", err) + } + + var stat os.FileInfo + if stat, err = os.Stat(finalPath); os.IsExist(err) { + duplicate = true + if stat.Size() == int64(mediaMetadata.FileSizeBytes) { + return types.Path(finalPath), duplicate, nil + } + return "", duplicate, fmt.Errorf("downloaded file with hash collision but different file size (%v)", finalPath) + } + err = moveFile( + types.Path(path.Join(string(tmpDir), "content")), + types.Path(finalPath), + ) + if err != nil { + return "", duplicate, fmt.Errorf("failed to move file to final destination (%v): %q", finalPath, err) + } + return types.Path(finalPath), duplicate, nil +} + +// RemoveDir removes a directory and logs a warning in case of errors +func RemoveDir(dir types.Path, logger *log.Entry) { + dirErr := os.RemoveAll(string(dir)) + if dirErr != nil { + logger.WithError(dirErr).WithField("dir", dir).Warn("Failed to remove directory") + } +} + +// WriteTempFile writes to a new temporary file +func WriteTempFile(reqReader io.Reader, maxFileSizeBytes types.FileSizeBytes, absBasePath types.Path) (types.Base64Hash, types.FileSizeBytes, types.Path, error) { + tmpFileWriter, tmpFile, tmpDir, err := createTempFileWriter(absBasePath) + if err != nil { + return "", -1, "", err + } + defer tmpFile.Close() + + limitedReader := io.LimitReader(reqReader, int64(maxFileSizeBytes)) + // Hash the file data. The hash will be returned. The hash is useful as a + // method of deduplicating files to save storage, as well as a way to conduct + // integrity checks on the file data in the repository. + hasher := sha256.New() + teeReader := io.TeeReader(limitedReader, hasher) + bytesWritten, err := io.Copy(tmpFileWriter, teeReader) + if err != nil && err != io.EOF { + return "", -1, "", err + } + + tmpFileWriter.Flush() + + hash := hasher.Sum(nil) + return types.Base64Hash(base64.RawURLEncoding.EncodeToString(hash[:])), types.FileSizeBytes(bytesWritten), tmpDir, nil +} + // moveFile attempts to move the file src to dst func moveFile(src types.Path, dst types.Path) error { dstDir := path.Dir(string(dst)) @@ -171,37 +167,40 @@ func moveFile(src types.Path, dst types.Path) error { return nil } -// MoveFileWithHashCheck checks for hash collisions when moving a temporary file to its destination based on metadata -// Check if destination file exists. As the destination is based on a hash of the file data, -// if it exists and the file size does not match then there is a hash collision for two different files. If -// it exists and the file size matches, it is believable that it is the same file and we can just -// discard the temporary file. -func MoveFileWithHashCheck(tmpDir types.Path, mediaMetadata *types.MediaMetadata, absBasePath types.Path, logger *log.Entry) (string, bool, error) { - duplicate := false - finalPath, err := GetPathFromBase64Hash(mediaMetadata.Base64Hash, absBasePath) +func createTempFileWriter(absBasePath types.Path) (*bufio.Writer, *os.File, types.Path, error) { + tmpDir, err := createTempDir(absBasePath) if err != nil { - RemoveDir(tmpDir, logger) - return "", duplicate, fmt.Errorf("failed to get file path from metadata: %q", err) + return nil, nil, "", fmt.Errorf("Failed to create temp dir: %q", err) + } + writer, tmpFile, err := createFileWriter(tmpDir, "content") + if err != nil { + return nil, nil, "", fmt.Errorf("Failed to create file writer: %q", err) + } + return writer, tmpFile, tmpDir, nil +} + +// createTempDir creates a tmp/ directory within baseDirectory and returns its path +func createTempDir(baseDirectory types.Path) (types.Path, error) { + baseTmpDir := path.Join(string(baseDirectory), "tmp") + if err := os.MkdirAll(baseTmpDir, 0770); err != nil { + return "", fmt.Errorf("Failed to create base temp dir: %v", err) + } + tmpDir, err := ioutil.TempDir(baseTmpDir, "") + if err != nil { + return "", fmt.Errorf("Failed to create temp dir: %v", err) + } + return types.Path(tmpDir), nil +} + +// createFileWriter creates a buffered file writer with a new file at directory/filename +// The caller should flush the writer before closing the file. +// Returns the file handle as it needs to be closed when writing is complete +func createFileWriter(directory types.Path, filename types.Filename) (*bufio.Writer, *os.File, error) { + filePath := path.Join(string(directory), string(filename)) + file, err := os.Create(filePath) + if err != nil { + return nil, nil, fmt.Errorf("Failed to create file: %v", err) } - var stat os.FileInfo - if stat, err = os.Stat(finalPath); os.IsExist(err) { - duplicate = true - if stat.Size() == int64(mediaMetadata.FileSizeBytes) { - RemoveDir(tmpDir, logger) - return finalPath, duplicate, nil - } - // Remove the tmpDir as we anyway cannot cache the file on disk due to the hash collision - RemoveDir(tmpDir, logger) - return "", duplicate, fmt.Errorf("downloaded file with hash collision but different file size (%v)", finalPath) - } - err = moveFile( - types.Path(path.Join(string(tmpDir), "content")), - types.Path(finalPath), - ) - if err != nil { - RemoveDir(tmpDir, logger) - return "", duplicate, fmt.Errorf("failed to move file to final destination (%v): %q", finalPath, err) - } - return finalPath, duplicate, nil + return bufio.NewWriter(file), file, nil } diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go index 7117f246d..bcaaa2556 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go @@ -240,7 +240,7 @@ func (r *uploadRequest) storeFileAndMetadata(tmpDir types.Path, absBasePath type // there is valid metadata in the database for that file. As such we only // remove the file if it is not a duplicate. if duplicate == false { - fileutils.RemoveDir(types.Path(path.Dir(finalPath)), r.Logger) + fileutils.RemoveDir(types.Path(path.Dir(string(finalPath))), r.Logger) } return &util.JSONResponse{ Code: 400, From d83359dd51041cc652408d11ff5b0f2052bcad27 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Fri, 26 May 2017 17:24:13 +0200 Subject: [PATCH 03/15] mediaapi: Remove unnecessary ContentDisposition Content-Disposition is only used for communicating the filename. It does not need to be stored in the database as we have upload_name anyway. It does not need to be in types.MediaMetadata and does not need to be logged. --- .../storage/media_repository_table.go | 10 ++--- .../dendrite/mediaapi/types/types.go | 20 ++++------ .../dendrite/mediaapi/writers/upload.go | 37 +++++++------------ 3 files changed, 25 insertions(+), 42 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/storage/media_repository_table.go b/src/github.com/matrix-org/dendrite/mediaapi/storage/media_repository_table.go index a3b1c7594..e4a4cbf83 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/storage/media_repository_table.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/storage/media_repository_table.go @@ -34,8 +34,6 @@ CREATE TABLE IF NOT EXISTS media_repository ( media_origin TEXT NOT NULL, -- The MIME-type of the media file as specified when uploading. content_type TEXT NOT NULL, - -- The HTTP Content-Disposition header for the media file as specified when uploading. - content_disposition TEXT NOT NULL, -- Size of the media file in bytes. file_size_bytes BIGINT NOT NULL, -- When the content was uploaded in UNIX epoch ms. @@ -51,12 +49,12 @@ CREATE UNIQUE INDEX IF NOT EXISTS media_repository_index ON media_repository (me ` const insertMediaSQL = ` -INSERT INTO media_repository (media_id, media_origin, content_type, content_disposition, file_size_bytes, creation_ts, upload_name, base64hash, user_id) - VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9) +INSERT INTO media_repository (media_id, media_origin, content_type, file_size_bytes, creation_ts, upload_name, base64hash, user_id) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8) ` const selectMediaSQL = ` -SELECT content_type, content_disposition, file_size_bytes, creation_ts, upload_name, base64hash, user_id FROM media_repository WHERE media_id = $1 AND media_origin = $2 +SELECT content_type, file_size_bytes, creation_ts, upload_name, base64hash, user_id FROM media_repository WHERE media_id = $1 AND media_origin = $2 ` type mediaStatements struct { @@ -82,7 +80,6 @@ func (s *mediaStatements) insertMedia(mediaMetadata *types.MediaMetadata) error mediaMetadata.MediaID, mediaMetadata.Origin, mediaMetadata.ContentType, - mediaMetadata.ContentDisposition, mediaMetadata.FileSizeBytes, mediaMetadata.CreationTimestamp, mediaMetadata.UploadName, @@ -101,7 +98,6 @@ func (s *mediaStatements) selectMedia(mediaID types.MediaID, mediaOrigin gomatri mediaMetadata.MediaID, mediaMetadata.Origin, ).Scan( &mediaMetadata.ContentType, - &mediaMetadata.ContentDisposition, &mediaMetadata.FileSizeBytes, &mediaMetadata.CreationTimestamp, &mediaMetadata.UploadName, diff --git a/src/github.com/matrix-org/dendrite/mediaapi/types/types.go b/src/github.com/matrix-org/dendrite/mediaapi/types/types.go index a3065bf8d..ac18f5fe2 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/types/types.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/types/types.go @@ -20,9 +20,6 @@ import ( "github.com/matrix-org/gomatrixserverlib" ) -// ContentDisposition is an HTTP Content-Disposition header string -type ContentDisposition string - // FileSizeBytes is a file size in bytes type FileSizeBytes int64 @@ -52,15 +49,14 @@ type UnixMs int64 // MediaMetadata is metadata associated with a media file type MediaMetadata struct { - MediaID MediaID - Origin gomatrixserverlib.ServerName - ContentType ContentType - ContentDisposition ContentDisposition - FileSizeBytes FileSizeBytes - CreationTimestamp UnixMs - UploadName Filename - Base64Hash Base64Hash - UserID MatrixUserID + MediaID MediaID + Origin gomatrixserverlib.ServerName + ContentType ContentType + FileSizeBytes FileSizeBytes + CreationTimestamp UnixMs + UploadName Filename + Base64Hash Base64Hash + UserID MatrixUserID } // ActiveRemoteRequests is a lockable map of media URIs requested from remote homeservers diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go index bcaaa2556..5b2532f0a 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go @@ -57,11 +57,10 @@ func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util. } r.Logger.WithFields(log.Fields{ - "Origin": r.MediaMetadata.Origin, - "UploadName": r.MediaMetadata.UploadName, - "FileSizeBytes": r.MediaMetadata.FileSizeBytes, - "Content-Type": r.MediaMetadata.ContentType, - "Content-Disposition": r.MediaMetadata.ContentDisposition, + "Origin": r.MediaMetadata.Origin, + "UploadName": r.MediaMetadata.UploadName, + "FileSizeBytes": r.MediaMetadata.FileSizeBytes, + "Content-Type": r.MediaMetadata.ContentType, }).Info("Uploading file") // The file data is hashed and the hash is used as the MediaID. The hash is useful as a @@ -89,13 +88,12 @@ func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util. r.MediaMetadata.MediaID = types.MediaID(hash) r.Logger.WithFields(log.Fields{ - "MediaID": r.MediaMetadata.MediaID, - "Origin": r.MediaMetadata.Origin, - "Base64Hash": r.MediaMetadata.Base64Hash, - "UploadName": r.MediaMetadata.UploadName, - "FileSizeBytes": r.MediaMetadata.FileSizeBytes, - "Content-Type": r.MediaMetadata.ContentType, - "Content-Disposition": r.MediaMetadata.ContentDisposition, + "MediaID": r.MediaMetadata.MediaID, + "Origin": r.MediaMetadata.Origin, + "Base64Hash": r.MediaMetadata.Base64Hash, + "UploadName": r.MediaMetadata.UploadName, + "FileSizeBytes": r.MediaMetadata.FileSizeBytes, + "Content-Type": r.MediaMetadata.ContentType, }).Info("File uploaded") // check if we already have a record of the media in our database and if so, we can remove the temporary directory @@ -141,11 +139,10 @@ func parseAndValidateRequest(req *http.Request, cfg *config.MediaAPI) (*uploadRe r := &uploadRequest{ MediaMetadata: &types.MediaMetadata{ - Origin: cfg.ServerName, - ContentDisposition: types.ContentDisposition(req.Header.Get("Content-Disposition")), - FileSizeBytes: types.FileSizeBytes(req.ContentLength), - ContentType: types.ContentType(req.Header.Get("Content-Type")), - UploadName: types.Filename(url.PathEscape(req.FormValue("filename"))), + Origin: cfg.ServerName, + FileSizeBytes: types.FileSizeBytes(req.ContentLength), + ContentType: types.ContentType(req.Header.Get("Content-Type")), + UploadName: types.Filename(url.PathEscape(req.FormValue("filename"))), }, Logger: util.GetLogger(req.Context()), } @@ -154,12 +151,6 @@ func parseAndValidateRequest(req *http.Request, cfg *config.MediaAPI) (*uploadRe return nil, resErr } - if len(r.MediaMetadata.UploadName) > 0 { - r.MediaMetadata.ContentDisposition = types.ContentDisposition( - "inline; filename*=utf-8''" + string(r.MediaMetadata.UploadName), - ) - } - return r, nil } From 9ecf620ad9f97f9726c2fa0bf71e722a923b7892 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Fri, 26 May 2017 17:34:58 +0200 Subject: [PATCH 04/15] mediaapi/writers/upload: Factor out doUpload from Upload --- .../dendrite/mediaapi/writers/upload.go | 126 ++++++++++-------- 1 file changed, 67 insertions(+), 59 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go index 5b2532f0a..9168f8bd6 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go @@ -17,6 +17,7 @@ package writers import ( "database/sql" "fmt" + "io" "net/http" "net/url" "path" @@ -56,65 +57,7 @@ func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util. return *resErr } - r.Logger.WithFields(log.Fields{ - "Origin": r.MediaMetadata.Origin, - "UploadName": r.MediaMetadata.UploadName, - "FileSizeBytes": r.MediaMetadata.FileSizeBytes, - "Content-Type": r.MediaMetadata.ContentType, - }).Info("Uploading file") - - // The file data is hashed and the hash is used as the MediaID. The hash is useful as a - // method of deduplicating files to save storage, as well as a way to conduct - // integrity checks on the file data in the repository. - hash, bytesWritten, tmpDir, copyError := fileutils.WriteTempFile(req.Body, cfg.MaxFileSizeBytes, cfg.AbsBasePath) - if copyError != nil { - logFields := log.Fields{ - "Origin": r.MediaMetadata.Origin, - "MediaID": r.MediaMetadata.MediaID, - } - if copyError == fileutils.ErrFileIsTooLarge { - logFields["MaxFileSizeBytes"] = cfg.MaxFileSizeBytes - } - r.Logger.WithError(copyError).WithFields(logFields).Warn("Error while transferring file") - fileutils.RemoveDir(tmpDir, r.Logger) - return util.JSONResponse{ - Code: 400, - JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload")), - } - } - - r.MediaMetadata.FileSizeBytes = bytesWritten - r.MediaMetadata.Base64Hash = hash - r.MediaMetadata.MediaID = types.MediaID(hash) - - r.Logger.WithFields(log.Fields{ - "MediaID": r.MediaMetadata.MediaID, - "Origin": r.MediaMetadata.Origin, - "Base64Hash": r.MediaMetadata.Base64Hash, - "UploadName": r.MediaMetadata.UploadName, - "FileSizeBytes": r.MediaMetadata.FileSizeBytes, - "Content-Type": r.MediaMetadata.ContentType, - }).Info("File uploaded") - - // check if we already have a record of the media in our database and if so, we can remove the temporary directory - mediaMetadata, err := db.GetMediaMetadata(r.MediaMetadata.MediaID, r.MediaMetadata.Origin) - if err == nil { - r.MediaMetadata = mediaMetadata - fileutils.RemoveDir(tmpDir, r.Logger) - return util.JSONResponse{ - Code: 200, - JSON: uploadResponse{ - ContentURI: fmt.Sprintf("mxc://%s/%s", cfg.ServerName, r.MediaMetadata.MediaID), - }, - } - } else if err != sql.ErrNoRows { - r.Logger.WithError(err).WithField("MediaID", r.MediaMetadata.MediaID).Warn("Failed to query database") - } - - // TODO: generate thumbnails - - resErr = r.storeFileAndMetadata(tmpDir, cfg.AbsBasePath, db) - if resErr != nil { + if resErr = r.doUpload(req.Body, cfg, db); resErr != nil { return *resErr } @@ -154,6 +97,71 @@ func parseAndValidateRequest(req *http.Request, cfg *config.MediaAPI) (*uploadRe return r, nil } +func (r *uploadRequest) doUpload(reqReader io.Reader, cfg *config.MediaAPI, db *storage.Database) *util.JSONResponse { + r.Logger.WithFields(log.Fields{ + "Origin": r.MediaMetadata.Origin, + "UploadName": r.MediaMetadata.UploadName, + "FileSizeBytes": r.MediaMetadata.FileSizeBytes, + "Content-Type": r.MediaMetadata.ContentType, + }).Info("Uploading file") + + // The file data is hashed and the hash is used as the MediaID. The hash is useful as a + // method of deduplicating files to save storage, as well as a way to conduct + // integrity checks on the file data in the repository. + hash, bytesWritten, tmpDir, copyError := fileutils.WriteTempFile(reqReader, cfg.MaxFileSizeBytes, cfg.AbsBasePath) + if copyError != nil { + logFields := log.Fields{ + "Origin": r.MediaMetadata.Origin, + "MediaID": r.MediaMetadata.MediaID, + } + if copyError == fileutils.ErrFileIsTooLarge { + logFields["MaxFileSizeBytes"] = cfg.MaxFileSizeBytes + } + r.Logger.WithError(copyError).WithFields(logFields).Warn("Error while transferring file") + fileutils.RemoveDir(tmpDir, r.Logger) + return &util.JSONResponse{ + Code: 400, + JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload")), + } + } + + r.MediaMetadata.FileSizeBytes = bytesWritten + r.MediaMetadata.Base64Hash = hash + r.MediaMetadata.MediaID = types.MediaID(hash) + + r.Logger.WithFields(log.Fields{ + "MediaID": r.MediaMetadata.MediaID, + "Origin": r.MediaMetadata.Origin, + "Base64Hash": r.MediaMetadata.Base64Hash, + "UploadName": r.MediaMetadata.UploadName, + "FileSizeBytes": r.MediaMetadata.FileSizeBytes, + "Content-Type": r.MediaMetadata.ContentType, + }).Info("File uploaded") + + // check if we already have a record of the media in our database and if so, we can remove the temporary directory + mediaMetadata, err := db.GetMediaMetadata(r.MediaMetadata.MediaID, r.MediaMetadata.Origin) + if err == nil { + r.MediaMetadata = mediaMetadata + fileutils.RemoveDir(tmpDir, r.Logger) + return &util.JSONResponse{ + Code: 200, + JSON: uploadResponse{ + ContentURI: fmt.Sprintf("mxc://%s/%s", cfg.ServerName, r.MediaMetadata.MediaID), + }, + } + } else if err != sql.ErrNoRows { + r.Logger.WithError(err).WithField("MediaID", r.MediaMetadata.MediaID).Warn("Failed to query database") + } + + // TODO: generate thumbnails + + if resErr := r.storeFileAndMetadata(tmpDir, cfg.AbsBasePath, db); resErr != nil { + return resErr + } + + return nil +} + // Validate validates the uploadRequest fields func (r *uploadRequest) Validate(maxFileSizeBytes types.FileSizeBytes) *util.JSONResponse { if r.MediaMetadata.FileSizeBytes < 1 { From 9678cb6ea15dd8ef1ce78fd1ab9bbf8865f3edd7 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Fri, 26 May 2017 17:42:08 +0200 Subject: [PATCH 05/15] mediaapi/writers/upload: Simplify storeFileAndMetadata description --- .../matrix-org/dendrite/mediaapi/writers/upload.go | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go index 9168f8bd6..6ef368f0c 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go @@ -213,13 +213,11 @@ func (r *uploadRequest) Validate(maxFileSizeBytes types.FileSizeBytes) *util.JSO return nil } -// storeFileAndMetadata first moves a temporary file named content from tmpDir to its -// final path (see getPathFromMediaMetadata for details.) Once the file is moved, the -// metadata about the file is written into the media repository database. This order -// of operations is important as it avoids metadata entering the database before the file -// is ready and if we fail to move the file, it never gets added to the database. -// In case of any error, appropriate files and directories are cleaned up a -// util.JSONResponse error is returned. +// storeFileAndMetadata moves the temporary file to its final path based on metadata and stores the metadata in the database +// See getPathFromMediaMetadata in fileutils for details of the final path. +// The order of operations is important as it avoids metadata entering the database before the file +// is ready, and if we fail to move the file, it never gets added to the database. +// Returns a util.JSONResponse error and cleans up directories in case of error. func (r *uploadRequest) storeFileAndMetadata(tmpDir types.Path, absBasePath types.Path, db *storage.Database) *util.JSONResponse { finalPath, duplicate, err := fileutils.MoveFileWithHashCheck(tmpDir, r.MediaMetadata, absBasePath, r.Logger) if err != nil { From 4f2d9a3b695d8355dbfbb9ed7ea6ab4628256986 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Fri, 26 May 2017 17:44:43 +0200 Subject: [PATCH 06/15] mediaapi/storage: Simplify descriptions --- .../matrix-org/dendrite/mediaapi/storage/storage.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/storage/storage.go b/src/github.com/matrix-org/dendrite/mediaapi/storage/storage.go index 630809cbe..4b86967fb 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/storage/storage.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/storage/storage.go @@ -23,13 +23,13 @@ import ( "github.com/matrix-org/gomatrixserverlib" ) -// A Database is used to store metadata about a repository of media files. +// Database is used to store metadata about a repository of media files. type Database struct { statements statements db *sql.DB } -// Open a postgres database. +// Open opens a postgres database. func Open(dataSourceName string) (*Database, error) { var d Database var err error @@ -48,8 +48,8 @@ func (d *Database) StoreMediaMetadata(mediaMetadata *types.MediaMetadata) error return d.statements.insertMedia(mediaMetadata) } -// GetMediaMetadata returns metadata about media stored on this server. The media could -// have been uploaded to this server or fetched from another server and cached here. +// GetMediaMetadata returns metadata about media stored on this server. +// The media could have been uploaded to this server or fetched from another server and cached here. // Returns sql.ErrNoRows if there is no metadata associated with this media. func (d *Database) GetMediaMetadata(mediaID types.MediaID, mediaOrigin gomatrixserverlib.ServerName) (*types.MediaMetadata, error) { return d.statements.selectMedia(mediaID, mediaOrigin) From 6fc6499848ad70390da048cbd7a6a3a006eb59f8 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Fri, 26 May 2017 17:50:16 +0200 Subject: [PATCH 07/15] mediaapi/fileutils: Remove obsolete error variables --- .../dendrite/mediaapi/fileutils/fileutils.go | 10 ---------- .../dendrite/mediaapi/writers/upload.go | 17 +++++++---------- 2 files changed, 7 insertions(+), 20 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go b/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go index b4ad7c9d2..3e4031243 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go @@ -30,16 +30,6 @@ import ( "github.com/matrix-org/dendrite/mediaapi/types" ) -// FIXME: make into error types -var ( - // ErrFileIsTooLarge indicates that the uploaded file is larger than the configured maximum file size - ErrFileIsTooLarge = fmt.Errorf("file is too large") - errRead = fmt.Errorf("failed to read response from remote server") - errResponse = fmt.Errorf("failed to write file data to response body") - errHash = fmt.Errorf("failed to hash file data") - errWrite = fmt.Errorf("failed to write file to disk") -) - // GetPathFromBase64Hash evaluates the path to a media file from its Base64Hash // If the Base64Hash is long enough, we split it into pieces, creating up to 2 subdirectories // for more manageable browsing and use the remainder as the file name. diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go index 6ef368f0c..991c509b9 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go @@ -108,16 +108,13 @@ func (r *uploadRequest) doUpload(reqReader io.Reader, cfg *config.MediaAPI, db * // The file data is hashed and the hash is used as the MediaID. The hash is useful as a // method of deduplicating files to save storage, as well as a way to conduct // integrity checks on the file data in the repository. - hash, bytesWritten, tmpDir, copyError := fileutils.WriteTempFile(reqReader, cfg.MaxFileSizeBytes, cfg.AbsBasePath) - if copyError != nil { - logFields := log.Fields{ - "Origin": r.MediaMetadata.Origin, - "MediaID": r.MediaMetadata.MediaID, - } - if copyError == fileutils.ErrFileIsTooLarge { - logFields["MaxFileSizeBytes"] = cfg.MaxFileSizeBytes - } - r.Logger.WithError(copyError).WithFields(logFields).Warn("Error while transferring file") + hash, bytesWritten, tmpDir, err := fileutils.WriteTempFile(reqReader, cfg.MaxFileSizeBytes, cfg.AbsBasePath) + if err != nil { + r.Logger.WithError(err).WithFields(log.Fields{ + "Origin": r.MediaMetadata.Origin, + "MediaID": r.MediaMetadata.MediaID, + "MaxFileSizeBytes": cfg.MaxFileSizeBytes, + }).Warn("Error while transferring file") fileutils.RemoveDir(tmpDir, r.Logger) return &util.JSONResponse{ Code: 400, From 8c6f30eadc045629232c5cef7d8e38f85a9cad04 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Wed, 31 May 2017 07:05:07 +0200 Subject: [PATCH 08/15] mediaapi/config: Remove obsolete proxying comment and add default comment --- src/github.com/matrix-org/dendrite/mediaapi/config/config.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/config/config.go b/src/github.com/matrix-org/dendrite/mediaapi/config/config.go index a2d8f43c6..5c514194a 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/config/config.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/config/config.go @@ -26,8 +26,8 @@ type MediaAPI struct { // The absolute base path to where media files will be stored. AbsBasePath types.Path `yaml:"abs_base_path"` // The maximum file size in bytes that is allowed to be stored on this server. - // Note that remote files larger than this can still be proxied to a client, they will just not be cached. // Note: if MaxFileSizeBytes is set to 0, the size is unlimited. + // Note: if max_file_size_bytes is not set, it will default to 10485760 (10MB) MaxFileSizeBytes types.FileSizeBytes `yaml:"max_file_size_bytes"` // The postgres connection config for connecting to the database e.g a postgres:// URI DataSource string `yaml:"database"` From 0ca2931b6239177508c1c6cabea6aadc287f41bb Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Wed, 31 May 2017 07:06:42 +0200 Subject: [PATCH 09/15] mediaapi/fileutils: Change path schema to put file in subdir of hash --- .../dendrite/mediaapi/fileutils/fileutils.go | 35 ++++++------------- 1 file changed, 10 insertions(+), 25 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go b/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go index 3e4031243..2594d40a3 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go @@ -31,37 +31,22 @@ import ( ) // GetPathFromBase64Hash evaluates the path to a media file from its Base64Hash -// If the Base64Hash is long enough, we split it into pieces, creating up to 2 subdirectories -// for more manageable browsing and use the remainder as the file name. -// For example, if Base64Hash is 'qwerty', the path will be 'q/w/erty'. +// 3 subdirectories are created for more manageable browsing and use the remainder as the file name. +// For example, if Base64Hash is 'qwerty', the path will be 'q/w/erty/file'. func GetPathFromBase64Hash(base64Hash types.Base64Hash, absBasePath types.Path) (string, error) { - var subPath, fileName string - - hashLen := len(base64Hash) - - switch { - case hashLen < 1: - return "", fmt.Errorf("Invalid filePath (Base64Hash too short): %q", base64Hash) - case hashLen > 255: + if len(base64Hash) < 3 { + return "", fmt.Errorf("Invalid filePath (Base64Hash too short - min 3 characters): %q", base64Hash) + } + if len(base64Hash) > 255 { return "", fmt.Errorf("Invalid filePath (Base64Hash too long - max 255 characters): %q", base64Hash) - case hashLen < 2: - subPath = "" - fileName = string(base64Hash) - case hashLen < 3: - subPath = string(base64Hash[0:1]) - fileName = string(base64Hash[1:]) - default: - subPath = path.Join( - string(base64Hash[0:1]), - string(base64Hash[1:2]), - ) - fileName = string(base64Hash[2:]) } filePath, err := filepath.Abs(path.Join( string(absBasePath), - subPath, - fileName, + string(base64Hash[0:1]), + string(base64Hash[1:2]), + string(base64Hash[2:]), + "file", )) if err != nil { return "", fmt.Errorf("Unable to construct filePath: %q", err) From 63ccd770c6d4e743acfd1e845f37d717ffcf13e4 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Wed, 31 May 2017 07:07:48 +0200 Subject: [PATCH 10/15] mediaapi/fileutils: Use filepath not path for filesystem paths --- .../dendrite/mediaapi/fileutils/fileutils.go | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go b/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go index 2594d40a3..975dd4308 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go @@ -22,7 +22,6 @@ import ( "io" "io/ioutil" "os" - "path" "path/filepath" "strings" @@ -41,7 +40,7 @@ func GetPathFromBase64Hash(base64Hash types.Base64Hash, absBasePath types.Path) return "", fmt.Errorf("Invalid filePath (Base64Hash too long - max 255 characters): %q", base64Hash) } - filePath, err := filepath.Abs(path.Join( + filePath, err := filepath.Abs(filepath.Join( string(absBasePath), string(base64Hash[0:1]), string(base64Hash[1:2]), @@ -85,7 +84,7 @@ func MoveFileWithHashCheck(tmpDir types.Path, mediaMetadata *types.MediaMetadata return "", duplicate, fmt.Errorf("downloaded file with hash collision but different file size (%v)", finalPath) } err = moveFile( - types.Path(path.Join(string(tmpDir), "content")), + types.Path(filepath.Join(string(tmpDir), "content")), types.Path(finalPath), ) if err != nil { @@ -129,7 +128,7 @@ func WriteTempFile(reqReader io.Reader, maxFileSizeBytes types.FileSizeBytes, ab // moveFile attempts to move the file src to dst func moveFile(src types.Path, dst types.Path) error { - dstDir := path.Dir(string(dst)) + dstDir := filepath.Dir(string(dst)) err := os.MkdirAll(dstDir, 0770) if err != nil { @@ -156,7 +155,7 @@ func createTempFileWriter(absBasePath types.Path) (*bufio.Writer, *os.File, type // createTempDir creates a tmp/ directory within baseDirectory and returns its path func createTempDir(baseDirectory types.Path) (types.Path, error) { - baseTmpDir := path.Join(string(baseDirectory), "tmp") + baseTmpDir := filepath.Join(string(baseDirectory), "tmp") if err := os.MkdirAll(baseTmpDir, 0770); err != nil { return "", fmt.Errorf("Failed to create base temp dir: %v", err) } @@ -171,7 +170,7 @@ func createTempDir(baseDirectory types.Path) (types.Path, error) { // The caller should flush the writer before closing the file. // Returns the file handle as it needs to be closed when writing is complete func createFileWriter(directory types.Path, filename types.Filename) (*bufio.Writer, *os.File, error) { - filePath := path.Join(string(directory), string(filename)) + filePath := filepath.Join(string(directory), string(filename)) file, err := os.Create(filePath) if err != nil { return nil, nil, fmt.Errorf("Failed to create file: %v", err) From a4300eefc4f33ead873bb9dd41437e633ea0491f Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Wed, 31 May 2017 07:08:21 +0200 Subject: [PATCH 11/15] mediaapi/fileutils: Fix and comment os.IsNotExist bug --- .../matrix-org/dendrite/mediaapi/fileutils/fileutils.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go b/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go index 975dd4308..77d88640a 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go @@ -76,7 +76,9 @@ func MoveFileWithHashCheck(tmpDir types.Path, mediaMetadata *types.MediaMetadata } var stat os.FileInfo - if stat, err = os.Stat(finalPath); os.IsExist(err) { + // Note: The double-negative is intentional as os.IsExist(err) != !os.IsNotExist(err). + // The functions are error checkers to be used in different cases. + if stat, err = os.Stat(finalPath); !os.IsNotExist(err) { duplicate = true if stat.Size() == int64(mediaMetadata.FileSizeBytes) { return types.Path(finalPath), duplicate, nil From 61329ee380f9028cdc5e2b867180f28df8a13138 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Wed, 31 May 2017 07:10:01 +0200 Subject: [PATCH 12/15] mediaapi/fileutils: Comment truncation of data when reading --- .../matrix-org/dendrite/mediaapi/fileutils/fileutils.go | 1 + src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go | 1 + 2 files changed, 2 insertions(+) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go b/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go index 77d88640a..a166f4765 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go @@ -111,6 +111,7 @@ func WriteTempFile(reqReader io.Reader, maxFileSizeBytes types.FileSizeBytes, ab } defer tmpFile.Close() + // The amount of data read is limited to maxFileSizeBytes. At this point, if there is more data it will be truncated. limitedReader := io.LimitReader(reqReader, int64(maxFileSizeBytes)) // Hash the file data. The hash will be returned. The hash is useful as a // method of deduplicating files to save storage, as well as a way to conduct diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go index 991c509b9..dc5e1b6f5 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go @@ -108,6 +108,7 @@ func (r *uploadRequest) doUpload(reqReader io.Reader, cfg *config.MediaAPI, db * // The file data is hashed and the hash is used as the MediaID. The hash is useful as a // method of deduplicating files to save storage, as well as a way to conduct // integrity checks on the file data in the repository. + // Data is truncated to maxFileSizeBytes. Content-Length was reported as 0 < Content-Length <= maxFileSizeBytes so this is OK. hash, bytesWritten, tmpDir, err := fileutils.WriteTempFile(reqReader, cfg.MaxFileSizeBytes, cfg.AbsBasePath) if err != nil { r.Logger.WithError(err).WithFields(log.Fields{ From 523303277edd16399c3d220da71be83c309b647d Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Wed, 31 May 2017 07:11:00 +0200 Subject: [PATCH 13/15] mediaapi/storage: Refer to RFC instead of golang for base64 format --- .../dendrite/mediaapi/storage/media_repository_table.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/storage/media_repository_table.go b/src/github.com/matrix-org/dendrite/mediaapi/storage/media_repository_table.go index e4a4cbf83..99df713df 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/storage/media_repository_table.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/storage/media_repository_table.go @@ -40,7 +40,7 @@ CREATE TABLE IF NOT EXISTS media_repository ( creation_ts BIGINT NOT NULL, -- The file name with which the media was uploaded. upload_name TEXT NOT NULL, - -- A golang base64 URLEncoding string representation of a SHA-256 hash sum of the file data. + -- Alternate RFC 4648 unpadded base64 encoding string representation of a SHA-256 hash sum of the file data. base64hash TEXT NOT NULL, -- The user who uploaded the file. Should be a Matrix user ID. user_id TEXT NOT NULL From 08d1eb96699e642ee812c57c6886370ad23998cd Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Wed, 31 May 2017 07:11:21 +0200 Subject: [PATCH 14/15] mediaapi/upload: Improve HTTP status codes for error cases --- .../matrix-org/dendrite/mediaapi/writers/upload.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go index dc5e1b6f5..5ae4936e2 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go @@ -75,7 +75,7 @@ func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util. func parseAndValidateRequest(req *http.Request, cfg *config.MediaAPI) (*uploadRequest, *util.JSONResponse) { if req.Method != "POST" { return nil, &util.JSONResponse{ - Code: 400, + Code: 405, JSON: jsonerror.Unknown("HTTP request method must be POST."), } } @@ -164,13 +164,13 @@ func (r *uploadRequest) doUpload(reqReader io.Reader, cfg *config.MediaAPI, db * func (r *uploadRequest) Validate(maxFileSizeBytes types.FileSizeBytes) *util.JSONResponse { if r.MediaMetadata.FileSizeBytes < 1 { return &util.JSONResponse{ - Code: 400, + Code: 411, JSON: jsonerror.Unknown("HTTP Content-Length request header must be greater than zero."), } } if maxFileSizeBytes > 0 && r.MediaMetadata.FileSizeBytes > maxFileSizeBytes { return &util.JSONResponse{ - Code: 400, + Code: 413, JSON: jsonerror.Unknown(fmt.Sprintf("HTTP Content-Length is greater than the maximum allowed upload size (%v).", maxFileSizeBytes)), } } From a0eae6922df7ff5587a4c860136a40d258d3cfde Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Wed, 31 May 2017 07:12:22 +0200 Subject: [PATCH 15/15] mediaapi/writers: Remove unnecessary fmt.Sprintf --- .../matrix-org/dendrite/mediaapi/writers/upload.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go index 5ae4936e2..dc525ee44 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go @@ -119,7 +119,7 @@ func (r *uploadRequest) doUpload(reqReader io.Reader, cfg *config.MediaAPI, db * fileutils.RemoveDir(tmpDir, r.Logger) return &util.JSONResponse{ Code: 400, - JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload")), + JSON: jsonerror.Unknown("Failed to upload"), } } @@ -222,7 +222,7 @@ func (r *uploadRequest) storeFileAndMetadata(tmpDir types.Path, absBasePath type r.Logger.WithError(err).Error("Failed to move file.") return &util.JSONResponse{ Code: 400, - JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload")), + JSON: jsonerror.Unknown("Failed to upload"), } } if duplicate { @@ -239,7 +239,7 @@ func (r *uploadRequest) storeFileAndMetadata(tmpDir types.Path, absBasePath type } return &util.JSONResponse{ Code: 400, - JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload")), + JSON: jsonerror.Unknown("Failed to upload"), } }