1
0
Fork 0
mirror of https://github.com/matrix-org/dendrite.git synced 2025-03-28 04:24:28 -05:00

mediaapi/writers: Factor out common moveFileWithHashCheck functionality

This commit is contained in:
Robert Swain 2017-05-22 12:20:07 +02:00
parent 5d4b5ef1bc
commit 73b419a126
3 changed files with 57 additions and 30 deletions
src/github.com/matrix-org/dendrite/mediaapi/writers

View file

@ -341,21 +341,14 @@ func (r *downloadRequest) commitFileAndMetadata(tmpDir types.Path, absBasePath t
}).Info("Storing file metadata to media repository database")
// The database is the source of truth so we need to have moved the file first
finalPath, err := getPathFromMediaMetadata(r.MediaMetadata, absBasePath)
finalPath, duplicate, err := moveFileWithHashCheck(tmpDir, r.MediaMetadata, absBasePath, r.Logger)
if err != nil {
r.Logger.WithError(err).Warn("Failed to get file path from metadata")
removeDir(tmpDir, r.Logger)
r.Logger.WithError(err).Error("Failed to move file.")
return updateActiveRemoteRequests
}
err = moveFile(
types.Path(path.Join(string(tmpDir), "content")),
types.Path(finalPath),
)
if err != nil {
r.Logger.WithError(err).WithField("dst", finalPath).Warn("Failed to move file to final destination")
removeDir(tmpDir, r.Logger)
return updateActiveRemoteRequests
if duplicate == true {
r.Logger.WithField("dst", finalPath).Info("File was stored previously - discarding duplicate")
// Continue on to store the metadata in the database
}
// Writing the metadata to the media repository database and removing the mxcURL from activeRemoteRequests needs to be atomic.
@ -369,8 +362,13 @@ func (r *downloadRequest) commitFileAndMetadata(tmpDir types.Path, absBasePath t
// if written to disk, add to db
err = db.StoreMediaMetadata(r.MediaMetadata)
if err != nil {
finalDir := path.Dir(finalPath)
removeDir(types.Path(finalDir), r.Logger)
// If the file is a duplicate (has the same hash as an existing file) then
// there is valid metadata in the database for that file. As such we only
// remove the file if it is not a duplicate.
if duplicate == false {
finalDir := path.Dir(finalPath)
removeDir(types.Path(finalDir), r.Logger)
}
completeRemoteRequest(activeRemoteRequests, mxcURL)
return updateActiveRemoteRequests
}

View file

@ -250,3 +250,38 @@ func moveFile(src types.Path, dst types.Path) error {
}
return nil
}
// moveFileWithHashCheck attempts to move the file src to dst and checks for hash collisions based on metadata
// Check if destination file exists. As the destination is based on a hash of the file data,
// if it exists and the content length does not match then there is a hash collision for two different files. If
// it exists and the content length matches, it is believable that it is the same file and we can just
// discard the temporary file.
func moveFileWithHashCheck(tmpDir types.Path, mediaMetadata *types.MediaMetadata, absBasePath types.Path, logger *log.Entry) (string, bool, error) {
duplicate := false
finalPath, err := getPathFromMediaMetadata(mediaMetadata, absBasePath)
if err != nil {
removeDir(tmpDir, logger)
return "", duplicate, fmt.Errorf("failed to get file path from metadata: %q", err)
}
var stat os.FileInfo
if stat, err = os.Stat(finalPath); os.IsExist(err) {
duplicate = true
if stat.Size() == int64(mediaMetadata.ContentLength) {
removeDir(tmpDir, logger)
return finalPath, duplicate, nil
}
// Remove the tmpDir as we anyway cannot cache the file on disk due to the hash collision
removeDir(tmpDir, logger)
return "", duplicate, fmt.Errorf("downloaded file with hash collision but different file size (%v)", finalPath)
}
err = moveFile(
types.Path(path.Join(string(tmpDir), "content")),
types.Path(finalPath),
)
if err != nil {
removeDir(tmpDir, logger)
return "", duplicate, fmt.Errorf("failed to move file to final destination (%v): %q", finalPath, err)
}
return finalPath, duplicate, nil
}

View file

@ -142,32 +142,26 @@ func parseAndValidateRequest(req *http.Request, cfg *config.MediaAPI) (*uploadRe
// In case of any error, appropriate files and directories are cleaned up a
// util.JSONResponse error is returned.
func storeFileAndMetadata(tmpDir types.Path, absBasePath types.Path, mediaMetadata *types.MediaMetadata, db *storage.Database, logger *log.Entry) *util.JSONResponse {
finalPath, err := getPathFromMediaMetadata(mediaMetadata, absBasePath)
finalPath, duplicate, err := moveFileWithHashCheck(tmpDir, mediaMetadata, absBasePath, logger)
if err != nil {
logger.WithError(err).Warn("Failed to get file path from metadata")
removeDir(tmpDir, logger)
logger.WithError(err).Error("Failed to move file.")
return &util.JSONResponse{
Code: 400,
JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload")),
}
}
err = moveFile(
types.Path(path.Join(string(tmpDir), "content")),
types.Path(finalPath),
)
if err != nil {
logger.WithError(err).WithField("dst", finalPath).Warn("Failed to move file to final destination")
removeDir(tmpDir, logger)
return &util.JSONResponse{
Code: 400,
JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload")),
}
if duplicate == true {
logger.WithField("dst", finalPath).Info("File was stored previously - discarding duplicate")
}
if err = db.StoreMediaMetadata(mediaMetadata); err != nil {
logger.WithError(err).Warn("Failed to store metadata")
removeDir(types.Path(path.Dir(finalPath)), logger)
// If the file is a duplicate (has the same hash as an existing file) then
// there is valid metadata in the database for that file. As such we only
// remove the file if it is not a duplicate.
if duplicate == false {
removeDir(types.Path(path.Dir(finalPath)), logger)
}
return &util.JSONResponse{
Code: 400,
JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload")),