mirror of
https://github.com/matrix-org/dendrite.git
synced 2025-12-08 15:33:09 -06:00
mediaapi/fileutils: Move fileutils to its own package
This commit is contained in:
parent
7bdcece102
commit
8684f80ebd
|
|
@ -12,7 +12,7 @@
|
|||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package writers
|
||||
package fileutils
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
|
|
@ -32,7 +32,8 @@ import (
|
|||
"github.com/matrix-org/dendrite/mediaapi/types"
|
||||
)
|
||||
|
||||
func removeDir(dir types.Path, logger *log.Entry) {
|
||||
// RemoveDir removes a directory and logs a warning in case of errors
|
||||
func RemoveDir(dir types.Path, logger *log.Entry) {
|
||||
dirErr := os.RemoveAll(string(dir))
|
||||
if dirErr != nil {
|
||||
logger.WithError(dirErr).WithField("dir", dir).Warn("Failed to remove directory")
|
||||
|
|
@ -77,7 +78,8 @@ func createTempFileWriter(absBasePath types.Path) (*bufio.Writer, *os.File, type
|
|||
}
|
||||
|
||||
var (
|
||||
errFileIsTooLarge = fmt.Errorf("file is too large")
|
||||
// ErrFileIsTooLarge indicates that the uploaded file is larger than the configured maximum file size
|
||||
ErrFileIsTooLarge = fmt.Errorf("file is too large")
|
||||
errRead = fmt.Errorf("failed to read response from remote server")
|
||||
errResponse = fmt.Errorf("failed to write file data to response body")
|
||||
errHash = fmt.Errorf("failed to hash file data")
|
||||
|
|
@ -104,7 +106,7 @@ func writeToResponse(respWriter http.ResponseWriter, buffer []byte, bytesToWrite
|
|||
func writeToDiskAndHasher(tmpFileWriter *bufio.Writer, hasher hash.Hash, bytesWritten int64, maxFileSizeBytes types.ContentLength, buffer []byte, bytesToWrite int) (int64, error) {
|
||||
// if larger than maxFileSizeBytes then stop writing to disk and discard cached file
|
||||
if bytesWritten+int64(bytesToWrite) > int64(maxFileSizeBytes) {
|
||||
return 0, errFileIsTooLarge
|
||||
return 0, ErrFileIsTooLarge
|
||||
}
|
||||
// write to hasher and to disk
|
||||
bytesTemp, writeErr := tmpFileWriter.Write(buffer[:bytesToWrite])
|
||||
|
|
@ -117,7 +119,7 @@ func writeToDiskAndHasher(tmpFileWriter *bufio.Writer, hasher hash.Hash, bytesWr
|
|||
return int64(bytesTemp), nil
|
||||
}
|
||||
|
||||
// readAndHashAndWriteWithLimit works like io.Copy except it copies from the reqReader to the
|
||||
// ReadAndHashAndWriteWithLimit works like io.Copy except it copies from the reqReader to the
|
||||
// optionally-supplied respWriter and a temporary file named 'content' using a bufio.Writer.
|
||||
// The data written to disk is hashed using the SHA-256 algorithm.
|
||||
// If there is an error with the reqReader or the respWriter, that is considered an error.
|
||||
|
|
@ -127,7 +129,7 @@ func writeToDiskAndHasher(tmpFileWriter *bufio.Writer, hasher hash.Hash, bytesWr
|
|||
// If a respWriter is provided, all the data will be proxied from the reqReader to
|
||||
// the respWriter, regardless of errors or limits on writing to disk.
|
||||
// Returns all of the hash sum, bytes written to disk, and temporary directory path, or an error.
|
||||
func readAndHashAndWriteWithLimit(reqReader io.Reader, maxFileSizeBytes types.ContentLength, absBasePath types.Path, respWriter http.ResponseWriter) (types.Base64Hash, types.ContentLength, types.ContentLength, types.Path, error) {
|
||||
func ReadAndHashAndWriteWithLimit(reqReader io.Reader, maxFileSizeBytes types.ContentLength, absBasePath types.Path, respWriter http.ResponseWriter) (types.Base64Hash, types.ContentLength, types.ContentLength, types.Path, error) {
|
||||
// create the temporary file writer
|
||||
tmpFileWriter, tmpFile, tmpDir, err := createTempFileWriter(absBasePath)
|
||||
if err != nil {
|
||||
|
|
@ -157,7 +159,7 @@ func readAndHashAndWriteWithLimit(reqReader io.Reader, maxFileSizeBytes types.Co
|
|||
bytesTemp, copyError = writeToResponse(respWriter, buffer, bytesRead)
|
||||
bytesResponded += bytesTemp
|
||||
if copyError == nil {
|
||||
// Note: if we get here then copyError != errFileIsTooLarge && copyError != errWrite
|
||||
// Note: if we get here then copyError != ErrFileIsTooLarge && copyError != errWrite
|
||||
// as if copyError == errResponse || copyError == errWrite then we would have broken
|
||||
// out of the loop and there are no other cases
|
||||
bytesTemp, copyError = writeToDiskAndHasher(tmpFileWriter, hasher, bytesWritten, maxFileSizeBytes, buffer, (bytesRead))
|
||||
|
|
@ -186,7 +188,7 @@ func readAndHashAndWriteWithLimit(reqReader io.Reader, maxFileSizeBytes types.Co
|
|||
return types.Base64Hash(base64.URLEncoding.EncodeToString(hash[:])), types.ContentLength(bytesResponded), types.ContentLength(bytesWritten), tmpDir, nil
|
||||
}
|
||||
|
||||
// getPathFromMediaMetadata validates and constructs the on-disk path to the media
|
||||
// GetPathFromMediaMetadata validates and constructs the on-disk path to the media
|
||||
// based on its origin and mediaID
|
||||
// If a mediaID is too short, which could happen for other homeserver implementations,
|
||||
// place it into a short-id subdirectory of the origin directory
|
||||
|
|
@ -195,7 +197,7 @@ func readAndHashAndWriteWithLimit(reqReader io.Reader, maxFileSizeBytes types.Co
|
|||
// mediaID is 'qwerty', we create subdirectories 'q', 'w' within 'q' and place the file
|
||||
// in 'q/w' calling it 'erty'. If the mediaID is shorter than 3 characters, the last
|
||||
// character is the file name and the preceding character, if any, is a subdirectory name.
|
||||
func getPathFromMediaMetadata(m *types.MediaMetadata, absBasePath types.Path) (string, error) {
|
||||
func GetPathFromMediaMetadata(m *types.MediaMetadata, absBasePath types.Path) (string, error) {
|
||||
var subPath, fileName string
|
||||
|
||||
hashLen := len(m.Base64Hash)
|
||||
|
|
@ -253,16 +255,16 @@ func moveFile(src types.Path, dst types.Path) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// moveFileWithHashCheck attempts to move the file src to dst and checks for hash collisions based on metadata
|
||||
// MoveFileWithHashCheck attempts to move the file src to dst and checks for hash collisions based on metadata
|
||||
// Check if destination file exists. As the destination is based on a hash of the file data,
|
||||
// if it exists and the content length does not match then there is a hash collision for two different files. If
|
||||
// it exists and the content length matches, it is believable that it is the same file and we can just
|
||||
// discard the temporary file.
|
||||
func moveFileWithHashCheck(tmpDir types.Path, mediaMetadata *types.MediaMetadata, absBasePath types.Path, logger *log.Entry) (string, bool, error) {
|
||||
func MoveFileWithHashCheck(tmpDir types.Path, mediaMetadata *types.MediaMetadata, absBasePath types.Path, logger *log.Entry) (string, bool, error) {
|
||||
duplicate := false
|
||||
finalPath, err := getPathFromMediaMetadata(mediaMetadata, absBasePath)
|
||||
finalPath, err := GetPathFromMediaMetadata(mediaMetadata, absBasePath)
|
||||
if err != nil {
|
||||
removeDir(tmpDir, logger)
|
||||
RemoveDir(tmpDir, logger)
|
||||
return "", duplicate, fmt.Errorf("failed to get file path from metadata: %q", err)
|
||||
}
|
||||
|
||||
|
|
@ -270,11 +272,11 @@ func moveFileWithHashCheck(tmpDir types.Path, mediaMetadata *types.MediaMetadata
|
|||
if stat, err = os.Stat(finalPath); os.IsExist(err) {
|
||||
duplicate = true
|
||||
if stat.Size() == int64(mediaMetadata.ContentLength) {
|
||||
removeDir(tmpDir, logger)
|
||||
RemoveDir(tmpDir, logger)
|
||||
return finalPath, duplicate, nil
|
||||
}
|
||||
// Remove the tmpDir as we anyway cannot cache the file on disk due to the hash collision
|
||||
removeDir(tmpDir, logger)
|
||||
RemoveDir(tmpDir, logger)
|
||||
return "", duplicate, fmt.Errorf("downloaded file with hash collision but different file size (%v)", finalPath)
|
||||
}
|
||||
err = moveFile(
|
||||
|
|
@ -282,7 +284,7 @@ func moveFileWithHashCheck(tmpDir types.Path, mediaMetadata *types.MediaMetadata
|
|||
types.Path(finalPath),
|
||||
)
|
||||
if err != nil {
|
||||
removeDir(tmpDir, logger)
|
||||
RemoveDir(tmpDir, logger)
|
||||
return "", duplicate, fmt.Errorf("failed to move file to final destination (%v): %q", finalPath, err)
|
||||
}
|
||||
return finalPath, duplicate, nil
|
||||
|
|
@ -30,6 +30,7 @@ import (
|
|||
log "github.com/Sirupsen/logrus"
|
||||
"github.com/matrix-org/dendrite/clientapi/jsonerror"
|
||||
"github.com/matrix-org/dendrite/mediaapi/config"
|
||||
"github.com/matrix-org/dendrite/mediaapi/fileutils"
|
||||
"github.com/matrix-org/dendrite/mediaapi/storage"
|
||||
"github.com/matrix-org/dendrite/mediaapi/types"
|
||||
"github.com/matrix-org/gomatrixserverlib"
|
||||
|
|
@ -183,7 +184,7 @@ func (r *downloadRequest) respondFromLocalFile(w http.ResponseWriter, absBasePat
|
|||
"Content-Disposition": r.MediaMetadata.ContentDisposition,
|
||||
}).Infof("Downloading file")
|
||||
|
||||
filePath, err := getPathFromMediaMetadata(r.MediaMetadata, absBasePath)
|
||||
filePath, err := fileutils.GetPathFromMediaMetadata(r.MediaMetadata, absBasePath)
|
||||
if err != nil {
|
||||
// FIXME: Remove erroneous file from database?
|
||||
r.Logger.WithError(err).Warn("Failed to get file path from metadata")
|
||||
|
|
@ -341,7 +342,7 @@ func (r *downloadRequest) commitFileAndMetadata(tmpDir types.Path, absBasePath t
|
|||
}).Info("Storing file metadata to media repository database")
|
||||
|
||||
// The database is the source of truth so we need to have moved the file first
|
||||
finalPath, duplicate, err := moveFileWithHashCheck(tmpDir, r.MediaMetadata, absBasePath, r.Logger)
|
||||
finalPath, duplicate, err := fileutils.MoveFileWithHashCheck(tmpDir, r.MediaMetadata, absBasePath, r.Logger)
|
||||
if err != nil {
|
||||
r.Logger.WithError(err).Error("Failed to move file.")
|
||||
return updateActiveRemoteRequests
|
||||
|
|
@ -367,7 +368,7 @@ func (r *downloadRequest) commitFileAndMetadata(tmpDir types.Path, absBasePath t
|
|||
// remove the file if it is not a duplicate.
|
||||
if duplicate == false {
|
||||
finalDir := path.Dir(finalPath)
|
||||
removeDir(types.Path(finalDir), r.Logger)
|
||||
fileutils.RemoveDir(types.Path(finalDir), r.Logger)
|
||||
}
|
||||
completeRemoteRequest(activeRemoteRequests, mxcURL)
|
||||
return updateActiveRemoteRequests
|
||||
|
|
@ -447,18 +448,18 @@ func (r *downloadRequest) respondFromRemoteFile(w http.ResponseWriter, absBasePa
|
|||
// integrity checks on the file data in the repository.
|
||||
// bytesResponded is the total number of bytes written to the response to the client request
|
||||
// bytesWritten is the total number of bytes written to disk
|
||||
hash, bytesResponded, bytesWritten, tmpDir, copyError := readAndHashAndWriteWithLimit(resp.Body, maxFileSizeBytes, absBasePath, w)
|
||||
hash, bytesResponded, bytesWritten, tmpDir, copyError := fileutils.ReadAndHashAndWriteWithLimit(resp.Body, maxFileSizeBytes, absBasePath, w)
|
||||
|
||||
if copyError != nil {
|
||||
logFields := log.Fields{
|
||||
"MediaID": r.MediaMetadata.MediaID,
|
||||
"Origin": r.MediaMetadata.Origin,
|
||||
}
|
||||
if copyError == errFileIsTooLarge {
|
||||
if copyError == fileutils.ErrFileIsTooLarge {
|
||||
logFields["MaxFileSizeBytes"] = maxFileSizeBytes
|
||||
}
|
||||
r.Logger.WithError(copyError).WithFields(logFields).Warn("Error while transferring file")
|
||||
removeDir(tmpDir, r.Logger)
|
||||
fileutils.RemoveDir(tmpDir, r.Logger)
|
||||
// Note: if we have responded with any data in the body at all then we have already sent 200 OK and we can only abort at this point
|
||||
if bytesResponded < 1 {
|
||||
r.jsonErrorResponse(w, util.JSONResponse{
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@ import (
|
|||
"github.com/matrix-org/dendrite/clientapi/auth"
|
||||
"github.com/matrix-org/dendrite/clientapi/jsonerror"
|
||||
"github.com/matrix-org/dendrite/mediaapi/config"
|
||||
"github.com/matrix-org/dendrite/mediaapi/fileutils"
|
||||
"github.com/matrix-org/dendrite/mediaapi/storage"
|
||||
"github.com/matrix-org/dendrite/mediaapi/types"
|
||||
"github.com/matrix-org/util"
|
||||
|
|
@ -142,7 +143,7 @@ func parseAndValidateRequest(req *http.Request, cfg *config.MediaAPI) (*uploadRe
|
|||
// In case of any error, appropriate files and directories are cleaned up a
|
||||
// util.JSONResponse error is returned.
|
||||
func storeFileAndMetadata(tmpDir types.Path, absBasePath types.Path, mediaMetadata *types.MediaMetadata, db *storage.Database, logger *log.Entry) *util.JSONResponse {
|
||||
finalPath, duplicate, err := moveFileWithHashCheck(tmpDir, mediaMetadata, absBasePath, logger)
|
||||
finalPath, duplicate, err := fileutils.MoveFileWithHashCheck(tmpDir, mediaMetadata, absBasePath, logger)
|
||||
if err != nil {
|
||||
logger.WithError(err).Error("Failed to move file.")
|
||||
return &util.JSONResponse{
|
||||
|
|
@ -160,7 +161,7 @@ func storeFileAndMetadata(tmpDir types.Path, absBasePath types.Path, mediaMetada
|
|||
// there is valid metadata in the database for that file. As such we only
|
||||
// remove the file if it is not a duplicate.
|
||||
if duplicate == false {
|
||||
removeDir(types.Path(path.Dir(finalPath)), logger)
|
||||
fileutils.RemoveDir(types.Path(path.Dir(finalPath)), logger)
|
||||
}
|
||||
return &util.JSONResponse{
|
||||
Code: 400,
|
||||
|
|
@ -197,18 +198,18 @@ func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util.
|
|||
// method of deduplicating files to save storage, as well as a way to conduct
|
||||
// integrity checks on the file data in the repository.
|
||||
// bytesWritten is the total number of bytes written to disk
|
||||
hash, _, bytesWritten, tmpDir, copyError := readAndHashAndWriteWithLimit(req.Body, cfg.MaxFileSizeBytes, cfg.AbsBasePath, nil)
|
||||
hash, _, bytesWritten, tmpDir, copyError := fileutils.ReadAndHashAndWriteWithLimit(req.Body, cfg.MaxFileSizeBytes, cfg.AbsBasePath, nil)
|
||||
|
||||
if copyError != nil {
|
||||
logFields := log.Fields{
|
||||
"Origin": r.MediaMetadata.Origin,
|
||||
"MediaID": r.MediaMetadata.MediaID,
|
||||
}
|
||||
if copyError == errFileIsTooLarge {
|
||||
if copyError == fileutils.ErrFileIsTooLarge {
|
||||
logFields["MaxFileSizeBytes"] = cfg.MaxFileSizeBytes
|
||||
}
|
||||
logger.WithError(copyError).WithFields(logFields).Warn("Error while transferring file")
|
||||
removeDir(tmpDir, logger)
|
||||
fileutils.RemoveDir(tmpDir, logger)
|
||||
return util.JSONResponse{
|
||||
Code: 400,
|
||||
JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload")),
|
||||
|
|
@ -233,7 +234,7 @@ func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util.
|
|||
mediaMetadata, err := db.GetMediaMetadata(r.MediaMetadata.MediaID, r.MediaMetadata.Origin)
|
||||
if err == nil {
|
||||
r.MediaMetadata = mediaMetadata
|
||||
removeDir(tmpDir, logger)
|
||||
fileutils.RemoveDir(tmpDir, logger)
|
||||
return util.JSONResponse{
|
||||
Code: 200,
|
||||
JSON: uploadResponse{
|
||||
|
|
|
|||
Loading…
Reference in a new issue