From 8684f80ebd00ed31e14b7bd57db27f46c02f9b97 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Mon, 22 May 2017 14:12:11 +0200 Subject: [PATCH] mediaapi/fileutils: Move fileutils to its own package --- .../{writers => fileutils}/fileutils.go | 34 ++++++++++--------- .../dendrite/mediaapi/writers/download.go | 13 +++---- .../dendrite/mediaapi/writers/upload.go | 13 +++---- 3 files changed, 32 insertions(+), 28 deletions(-) rename src/github.com/matrix-org/dendrite/mediaapi/{writers => fileutils}/fileutils.go (91%) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go b/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go similarity index 91% rename from src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go rename to src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go index b1123bc4a..99d6dbd2c 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package writers +package fileutils import ( "bufio" @@ -32,7 +32,8 @@ import ( "github.com/matrix-org/dendrite/mediaapi/types" ) -func removeDir(dir types.Path, logger *log.Entry) { +// RemoveDir removes a directory and logs a warning in case of errors +func RemoveDir(dir types.Path, logger *log.Entry) { dirErr := os.RemoveAll(string(dir)) if dirErr != nil { logger.WithError(dirErr).WithField("dir", dir).Warn("Failed to remove directory") @@ -77,7 +78,8 @@ func createTempFileWriter(absBasePath types.Path) (*bufio.Writer, *os.File, type } var ( - errFileIsTooLarge = fmt.Errorf("file is too large") + // ErrFileIsTooLarge indicates that the uploaded file is larger than the configured maximum file size + ErrFileIsTooLarge = fmt.Errorf("file is too large") errRead = fmt.Errorf("failed to read response from remote server") errResponse = fmt.Errorf("failed to write file data to response body") errHash = fmt.Errorf("failed to hash file data") @@ -104,7 +106,7 @@ func writeToResponse(respWriter http.ResponseWriter, buffer []byte, bytesToWrite func writeToDiskAndHasher(tmpFileWriter *bufio.Writer, hasher hash.Hash, bytesWritten int64, maxFileSizeBytes types.ContentLength, buffer []byte, bytesToWrite int) (int64, error) { // if larger than maxFileSizeBytes then stop writing to disk and discard cached file if bytesWritten+int64(bytesToWrite) > int64(maxFileSizeBytes) { - return 0, errFileIsTooLarge + return 0, ErrFileIsTooLarge } // write to hasher and to disk bytesTemp, writeErr := tmpFileWriter.Write(buffer[:bytesToWrite]) @@ -117,7 +119,7 @@ func writeToDiskAndHasher(tmpFileWriter *bufio.Writer, hasher hash.Hash, bytesWr return int64(bytesTemp), nil } -// readAndHashAndWriteWithLimit works like io.Copy except it copies from the reqReader to the +// ReadAndHashAndWriteWithLimit works like io.Copy except it copies from the reqReader to the // optionally-supplied respWriter and a temporary file named 'content' using a bufio.Writer. // The data written to disk is hashed using the SHA-256 algorithm. // If there is an error with the reqReader or the respWriter, that is considered an error. @@ -127,7 +129,7 @@ func writeToDiskAndHasher(tmpFileWriter *bufio.Writer, hasher hash.Hash, bytesWr // If a respWriter is provided, all the data will be proxied from the reqReader to // the respWriter, regardless of errors or limits on writing to disk. // Returns all of the hash sum, bytes written to disk, and temporary directory path, or an error. -func readAndHashAndWriteWithLimit(reqReader io.Reader, maxFileSizeBytes types.ContentLength, absBasePath types.Path, respWriter http.ResponseWriter) (types.Base64Hash, types.ContentLength, types.ContentLength, types.Path, error) { +func ReadAndHashAndWriteWithLimit(reqReader io.Reader, maxFileSizeBytes types.ContentLength, absBasePath types.Path, respWriter http.ResponseWriter) (types.Base64Hash, types.ContentLength, types.ContentLength, types.Path, error) { // create the temporary file writer tmpFileWriter, tmpFile, tmpDir, err := createTempFileWriter(absBasePath) if err != nil { @@ -157,7 +159,7 @@ func readAndHashAndWriteWithLimit(reqReader io.Reader, maxFileSizeBytes types.Co bytesTemp, copyError = writeToResponse(respWriter, buffer, bytesRead) bytesResponded += bytesTemp if copyError == nil { - // Note: if we get here then copyError != errFileIsTooLarge && copyError != errWrite + // Note: if we get here then copyError != ErrFileIsTooLarge && copyError != errWrite // as if copyError == errResponse || copyError == errWrite then we would have broken // out of the loop and there are no other cases bytesTemp, copyError = writeToDiskAndHasher(tmpFileWriter, hasher, bytesWritten, maxFileSizeBytes, buffer, (bytesRead)) @@ -186,7 +188,7 @@ func readAndHashAndWriteWithLimit(reqReader io.Reader, maxFileSizeBytes types.Co return types.Base64Hash(base64.URLEncoding.EncodeToString(hash[:])), types.ContentLength(bytesResponded), types.ContentLength(bytesWritten), tmpDir, nil } -// getPathFromMediaMetadata validates and constructs the on-disk path to the media +// GetPathFromMediaMetadata validates and constructs the on-disk path to the media // based on its origin and mediaID // If a mediaID is too short, which could happen for other homeserver implementations, // place it into a short-id subdirectory of the origin directory @@ -195,7 +197,7 @@ func readAndHashAndWriteWithLimit(reqReader io.Reader, maxFileSizeBytes types.Co // mediaID is 'qwerty', we create subdirectories 'q', 'w' within 'q' and place the file // in 'q/w' calling it 'erty'. If the mediaID is shorter than 3 characters, the last // character is the file name and the preceding character, if any, is a subdirectory name. -func getPathFromMediaMetadata(m *types.MediaMetadata, absBasePath types.Path) (string, error) { +func GetPathFromMediaMetadata(m *types.MediaMetadata, absBasePath types.Path) (string, error) { var subPath, fileName string hashLen := len(m.Base64Hash) @@ -253,16 +255,16 @@ func moveFile(src types.Path, dst types.Path) error { return nil } -// moveFileWithHashCheck attempts to move the file src to dst and checks for hash collisions based on metadata +// MoveFileWithHashCheck attempts to move the file src to dst and checks for hash collisions based on metadata // Check if destination file exists. As the destination is based on a hash of the file data, // if it exists and the content length does not match then there is a hash collision for two different files. If // it exists and the content length matches, it is believable that it is the same file and we can just // discard the temporary file. -func moveFileWithHashCheck(tmpDir types.Path, mediaMetadata *types.MediaMetadata, absBasePath types.Path, logger *log.Entry) (string, bool, error) { +func MoveFileWithHashCheck(tmpDir types.Path, mediaMetadata *types.MediaMetadata, absBasePath types.Path, logger *log.Entry) (string, bool, error) { duplicate := false - finalPath, err := getPathFromMediaMetadata(mediaMetadata, absBasePath) + finalPath, err := GetPathFromMediaMetadata(mediaMetadata, absBasePath) if err != nil { - removeDir(tmpDir, logger) + RemoveDir(tmpDir, logger) return "", duplicate, fmt.Errorf("failed to get file path from metadata: %q", err) } @@ -270,11 +272,11 @@ func moveFileWithHashCheck(tmpDir types.Path, mediaMetadata *types.MediaMetadata if stat, err = os.Stat(finalPath); os.IsExist(err) { duplicate = true if stat.Size() == int64(mediaMetadata.ContentLength) { - removeDir(tmpDir, logger) + RemoveDir(tmpDir, logger) return finalPath, duplicate, nil } // Remove the tmpDir as we anyway cannot cache the file on disk due to the hash collision - removeDir(tmpDir, logger) + RemoveDir(tmpDir, logger) return "", duplicate, fmt.Errorf("downloaded file with hash collision but different file size (%v)", finalPath) } err = moveFile( @@ -282,7 +284,7 @@ func moveFileWithHashCheck(tmpDir types.Path, mediaMetadata *types.MediaMetadata types.Path(finalPath), ) if err != nil { - removeDir(tmpDir, logger) + RemoveDir(tmpDir, logger) return "", duplicate, fmt.Errorf("failed to move file to final destination (%v): %q", finalPath, err) } return finalPath, duplicate, nil diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index ab713fa5f..eed3bb8a7 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -30,6 +30,7 @@ import ( log "github.com/Sirupsen/logrus" "github.com/matrix-org/dendrite/clientapi/jsonerror" "github.com/matrix-org/dendrite/mediaapi/config" + "github.com/matrix-org/dendrite/mediaapi/fileutils" "github.com/matrix-org/dendrite/mediaapi/storage" "github.com/matrix-org/dendrite/mediaapi/types" "github.com/matrix-org/gomatrixserverlib" @@ -183,7 +184,7 @@ func (r *downloadRequest) respondFromLocalFile(w http.ResponseWriter, absBasePat "Content-Disposition": r.MediaMetadata.ContentDisposition, }).Infof("Downloading file") - filePath, err := getPathFromMediaMetadata(r.MediaMetadata, absBasePath) + filePath, err := fileutils.GetPathFromMediaMetadata(r.MediaMetadata, absBasePath) if err != nil { // FIXME: Remove erroneous file from database? r.Logger.WithError(err).Warn("Failed to get file path from metadata") @@ -341,7 +342,7 @@ func (r *downloadRequest) commitFileAndMetadata(tmpDir types.Path, absBasePath t }).Info("Storing file metadata to media repository database") // The database is the source of truth so we need to have moved the file first - finalPath, duplicate, err := moveFileWithHashCheck(tmpDir, r.MediaMetadata, absBasePath, r.Logger) + finalPath, duplicate, err := fileutils.MoveFileWithHashCheck(tmpDir, r.MediaMetadata, absBasePath, r.Logger) if err != nil { r.Logger.WithError(err).Error("Failed to move file.") return updateActiveRemoteRequests @@ -367,7 +368,7 @@ func (r *downloadRequest) commitFileAndMetadata(tmpDir types.Path, absBasePath t // remove the file if it is not a duplicate. if duplicate == false { finalDir := path.Dir(finalPath) - removeDir(types.Path(finalDir), r.Logger) + fileutils.RemoveDir(types.Path(finalDir), r.Logger) } completeRemoteRequest(activeRemoteRequests, mxcURL) return updateActiveRemoteRequests @@ -447,18 +448,18 @@ func (r *downloadRequest) respondFromRemoteFile(w http.ResponseWriter, absBasePa // integrity checks on the file data in the repository. // bytesResponded is the total number of bytes written to the response to the client request // bytesWritten is the total number of bytes written to disk - hash, bytesResponded, bytesWritten, tmpDir, copyError := readAndHashAndWriteWithLimit(resp.Body, maxFileSizeBytes, absBasePath, w) + hash, bytesResponded, bytesWritten, tmpDir, copyError := fileutils.ReadAndHashAndWriteWithLimit(resp.Body, maxFileSizeBytes, absBasePath, w) if copyError != nil { logFields := log.Fields{ "MediaID": r.MediaMetadata.MediaID, "Origin": r.MediaMetadata.Origin, } - if copyError == errFileIsTooLarge { + if copyError == fileutils.ErrFileIsTooLarge { logFields["MaxFileSizeBytes"] = maxFileSizeBytes } r.Logger.WithError(copyError).WithFields(logFields).Warn("Error while transferring file") - removeDir(tmpDir, r.Logger) + fileutils.RemoveDir(tmpDir, r.Logger) // Note: if we have responded with any data in the body at all then we have already sent 200 OK and we can only abort at this point if bytesResponded < 1 { r.jsonErrorResponse(w, util.JSONResponse{ diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go index 947d7d94b..497cae79e 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go @@ -26,6 +26,7 @@ import ( "github.com/matrix-org/dendrite/clientapi/auth" "github.com/matrix-org/dendrite/clientapi/jsonerror" "github.com/matrix-org/dendrite/mediaapi/config" + "github.com/matrix-org/dendrite/mediaapi/fileutils" "github.com/matrix-org/dendrite/mediaapi/storage" "github.com/matrix-org/dendrite/mediaapi/types" "github.com/matrix-org/util" @@ -142,7 +143,7 @@ func parseAndValidateRequest(req *http.Request, cfg *config.MediaAPI) (*uploadRe // In case of any error, appropriate files and directories are cleaned up a // util.JSONResponse error is returned. func storeFileAndMetadata(tmpDir types.Path, absBasePath types.Path, mediaMetadata *types.MediaMetadata, db *storage.Database, logger *log.Entry) *util.JSONResponse { - finalPath, duplicate, err := moveFileWithHashCheck(tmpDir, mediaMetadata, absBasePath, logger) + finalPath, duplicate, err := fileutils.MoveFileWithHashCheck(tmpDir, mediaMetadata, absBasePath, logger) if err != nil { logger.WithError(err).Error("Failed to move file.") return &util.JSONResponse{ @@ -160,7 +161,7 @@ func storeFileAndMetadata(tmpDir types.Path, absBasePath types.Path, mediaMetada // there is valid metadata in the database for that file. As such we only // remove the file if it is not a duplicate. if duplicate == false { - removeDir(types.Path(path.Dir(finalPath)), logger) + fileutils.RemoveDir(types.Path(path.Dir(finalPath)), logger) } return &util.JSONResponse{ Code: 400, @@ -197,18 +198,18 @@ func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util. // method of deduplicating files to save storage, as well as a way to conduct // integrity checks on the file data in the repository. // bytesWritten is the total number of bytes written to disk - hash, _, bytesWritten, tmpDir, copyError := readAndHashAndWriteWithLimit(req.Body, cfg.MaxFileSizeBytes, cfg.AbsBasePath, nil) + hash, _, bytesWritten, tmpDir, copyError := fileutils.ReadAndHashAndWriteWithLimit(req.Body, cfg.MaxFileSizeBytes, cfg.AbsBasePath, nil) if copyError != nil { logFields := log.Fields{ "Origin": r.MediaMetadata.Origin, "MediaID": r.MediaMetadata.MediaID, } - if copyError == errFileIsTooLarge { + if copyError == fileutils.ErrFileIsTooLarge { logFields["MaxFileSizeBytes"] = cfg.MaxFileSizeBytes } logger.WithError(copyError).WithFields(logFields).Warn("Error while transferring file") - removeDir(tmpDir, logger) + fileutils.RemoveDir(tmpDir, logger) return util.JSONResponse{ Code: 400, JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload")), @@ -233,7 +234,7 @@ func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util. mediaMetadata, err := db.GetMediaMetadata(r.MediaMetadata.MediaID, r.MediaMetadata.Origin) if err == nil { r.MediaMetadata = mediaMetadata - removeDir(tmpDir, logger) + fileutils.RemoveDir(tmpDir, logger) return util.JSONResponse{ Code: 200, JSON: uploadResponse{