1
0
Fork 0
mirror of https://github.com/matrix-org/dendrite.git synced 2025-04-06 17:04:31 -05:00

mediaapi/writers: Add base64hash to media_repository table

A SHA-256 hash sum in golang base64 URLEncoding format (contains only
URL-safe characters) is now calculated and stored for every file
transferred to this server.

Uploads to the server use this hash as the MediaID. Downloads from
remote servers retain their MediaID from the remote server, but can use
the hash for local deduplication and integrity checking purposes.
This commit is contained in:
Robert Swain 2017-05-22 10:24:03 +02:00
parent 370cb74d2d
commit 5536fec902
4 changed files with 15 additions and 3 deletions
src/github.com/matrix-org/dendrite/mediaapi

View file

@ -42,6 +42,8 @@ CREATE TABLE IF NOT EXISTS media_repository (
creation_ts BIGINT NOT NULL,
-- The file name with which the media was uploaded.
upload_name TEXT NOT NULL,
-- A golang base64 URLEncoding string representation of a SHA-256 hash sum of the file data.
base64hash TEXT NOT NULL,
-- The user who uploaded the file. Should be a Matrix user ID.
user_id TEXT NOT NULL
);
@ -49,12 +51,12 @@ CREATE UNIQUE INDEX IF NOT EXISTS media_repository_index ON media_repository (me
`
const insertMediaSQL = `
INSERT INTO media_repository (media_id, media_origin, content_type, content_disposition, content_length, creation_ts, upload_name, user_id)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
INSERT INTO media_repository (media_id, media_origin, content_type, content_disposition, content_length, creation_ts, upload_name, base64hash, user_id)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
`
const selectMediaSQL = `
SELECT content_type, content_disposition, content_length, creation_ts, upload_name, user_id FROM media_repository WHERE media_id = $1 AND media_origin = $2
SELECT content_type, content_disposition, content_length, creation_ts, upload_name, base64hash, user_id FROM media_repository WHERE media_id = $1 AND media_origin = $2
`
type mediaStatements struct {
@ -84,6 +86,7 @@ func (s *mediaStatements) insertMedia(mediaMetadata *types.MediaMetadata) error
mediaMetadata.ContentLength,
mediaMetadata.CreationTimestamp,
mediaMetadata.UploadName,
mediaMetadata.Base64Hash,
mediaMetadata.UserID,
)
return err
@ -102,6 +105,7 @@ func (s *mediaStatements) selectMedia(mediaID types.MediaID, mediaOrigin gomatri
&mediaMetadata.ContentLength,
&mediaMetadata.CreationTimestamp,
&mediaMetadata.UploadName,
&mediaMetadata.Base64Hash,
&mediaMetadata.UserID,
)
return &mediaMetadata, err

View file

@ -32,6 +32,9 @@ type ContentType string
// Filename is a string representing the name of a file
type Filename string
// Base64Hash is a base64 URLEncoding string representation of a SHA-256 hash sum
type Base64Hash string
// Path is an absolute or relative UNIX filesystem path
type Path string
@ -56,6 +59,7 @@ type MediaMetadata struct {
ContentLength ContentLength
CreationTimestamp UnixMs
UploadName Filename
Base64Hash Base64Hash
UserID MatrixUserID
}

View file

@ -332,6 +332,7 @@ func (r *downloadRequest) commitFileAndMetadata(tmpDir types.Path, absBasePath t
r.Logger.WithFields(log.Fields{
"MediaID": r.MediaMetadata.MediaID,
"Origin": r.MediaMetadata.Origin,
"Base64Hash": r.MediaMetadata.Base64Hash,
"UploadName": r.MediaMetadata.UploadName,
"Content-Length": r.MediaMetadata.ContentLength,
"Content-Type": r.MediaMetadata.ContentType,
@ -483,6 +484,7 @@ func (r *downloadRequest) respondFromRemoteFile(w http.ResponseWriter, absBasePa
// request's response. bytesWritten is therefore used as it is what would be sent to clients when reading from the local
// file.
r.MediaMetadata.ContentLength = types.ContentLength(bytesWritten)
r.MediaMetadata.Base64Hash = hash
r.MediaMetadata.UserID = types.MatrixUserID("@:" + string(r.MediaMetadata.Origin))
updateActiveRemoteRequests = r.commitFileAndMetadata(tmpDir, absBasePath, activeRemoteRequests, db, mxcURL)

View file

@ -220,11 +220,13 @@ func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util.
}
r.MediaMetadata.ContentLength = bytesWritten
r.MediaMetadata.Base64Hash = hash
r.MediaMetadata.MediaID = types.MediaID(hash)
logger.WithFields(log.Fields{
"MediaID": r.MediaMetadata.MediaID,
"Origin": r.MediaMetadata.Origin,
"Base64Hash": r.MediaMetadata.Base64Hash,
"UploadName": r.MediaMetadata.UploadName,
"Content-Length": r.MediaMetadata.ContentLength,
"Content-Type": r.MediaMetadata.ContentType,