From 4d1bff2f61b48b1209e1d2aadc9b1d0fb15b2bbf Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Mon, 24 Apr 2017 18:31:44 +0200 Subject: [PATCH 001/108] mediaapi: Initial commit for /upload HTTP infra --- .../cmd/dendrite-media-api-server/main.go | 54 +++++++ .../matrix-org/dendrite/mediaapi/README.md | 3 + .../dendrite/mediaapi/config/config.go | 25 ++++ .../dendrite/mediaapi/routing/routing.go | 45 ++++++ .../dendrite/mediaapi/storage/media.go | 47 ++++++ .../dendrite/mediaapi/storage/prepare.go | 37 +++++ .../dendrite/mediaapi/storage/sql.go | 33 +++++ .../dendrite/mediaapi/storage/storage.go | 38 +++++ .../dendrite/mediaapi/writers/upload.go | 139 ++++++++++++++++++ 9 files changed, 421 insertions(+) create mode 100644 src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go create mode 100644 src/github.com/matrix-org/dendrite/mediaapi/README.md create mode 100644 src/github.com/matrix-org/dendrite/mediaapi/config/config.go create mode 100644 src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go create mode 100644 src/github.com/matrix-org/dendrite/mediaapi/storage/media.go create mode 100644 src/github.com/matrix-org/dendrite/mediaapi/storage/prepare.go create mode 100644 src/github.com/matrix-org/dendrite/mediaapi/storage/sql.go create mode 100644 src/github.com/matrix-org/dendrite/mediaapi/storage/storage.go create mode 100644 src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go diff --git a/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go b/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go new file mode 100644 index 000000000..884eac80f --- /dev/null +++ b/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go @@ -0,0 +1,54 @@ +// Copyright 2017 Vector Creations Ltd +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "net/http" + "os" + + "github.com/matrix-org/dendrite/common" + "github.com/matrix-org/dendrite/mediaapi/config" + "github.com/matrix-org/dendrite/mediaapi/routing" + + log "github.com/Sirupsen/logrus" +) + +var ( + bindAddr = os.Getenv("BIND_ADDRESS") + database = os.Getenv("DATABASE") + logDir = os.Getenv("LOG_DIR") +) + +func main() { + common.SetupLogging(logDir) + if bindAddr == "" { + log.Panic("No BIND_ADDRESS environment variable found.") + } + // db, err := storage.Open(database) + // if err != nil { + // panic(err) + // } + + cfg := config.MediaAPI{ + ServerName: "localhost", + BasePath: "/Users/robertsw/dendrite", + DataSource: database, + } + + log.Info("Starting mediaapi") + + routing.Setup(http.DefaultServeMux, http.DefaultClient, cfg) + log.Fatal(http.ListenAndServe(bindAddr, nil)) +} diff --git a/src/github.com/matrix-org/dendrite/mediaapi/README.md b/src/github.com/matrix-org/dendrite/mediaapi/README.md new file mode 100644 index 000000000..2f51e8fe6 --- /dev/null +++ b/src/github.com/matrix-org/dendrite/mediaapi/README.md @@ -0,0 +1,3 @@ +# Media API + +This server is responsible for serving `/media` requests diff --git a/src/github.com/matrix-org/dendrite/mediaapi/config/config.go b/src/github.com/matrix-org/dendrite/mediaapi/config/config.go new file mode 100644 index 000000000..5900d9d56 --- /dev/null +++ b/src/github.com/matrix-org/dendrite/mediaapi/config/config.go @@ -0,0 +1,25 @@ +// Copyright 2017 Vector Creations Ltd +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package config + +// MediaAPI contains the config information necessary to spin up a mediaapi process. +type MediaAPI struct { + // The name of the server. This is usually the domain name, e.g 'matrix.org', 'localhost'. + ServerName string `yaml:"server_name"` + // The base path to where media files will be stored. + BasePath string `yaml:"base_path"` + // The postgres connection config for connecting to the database e.g a postgres:// URI + DataSource string `yaml:"database"` +} diff --git a/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go b/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go new file mode 100644 index 000000000..fd5ff7384 --- /dev/null +++ b/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go @@ -0,0 +1,45 @@ +// Copyright 2017 Vector Creations Ltd +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package routing + +import ( + "net/http" + + "github.com/gorilla/mux" + "github.com/matrix-org/dendrite/mediaapi/config" + "github.com/matrix-org/dendrite/mediaapi/writers" + "github.com/matrix-org/util" + "github.com/prometheus/client_golang/prometheus" +) + +const pathPrefixR0 = "/_matrix/media/v1" + +// Setup registers HTTP handlers with the given ServeMux. It also supplies the given http.Client +// to clients which need to make outbound HTTP requests. +func Setup(servMux *http.ServeMux, httpClient *http.Client, cfg config.MediaAPI) { + apiMux := mux.NewRouter() + r0mux := apiMux.PathPrefix(pathPrefixR0).Subrouter() + r0mux.Handle("/upload", make("upload", util.NewJSONRequestHandler(func(req *http.Request) util.JSONResponse { + return writers.Upload(req, cfg) + }))) + + servMux.Handle("/metrics", prometheus.Handler()) + servMux.Handle("/api/", http.StripPrefix("/api", apiMux)) +} + +// make a util.JSONRequestHandler into an http.Handler +func make(metricsName string, h util.JSONRequestHandler) http.Handler { + return prometheus.InstrumentHandler(metricsName, util.MakeJSONAPI(h)) +} diff --git a/src/github.com/matrix-org/dendrite/mediaapi/storage/media.go b/src/github.com/matrix-org/dendrite/mediaapi/storage/media.go new file mode 100644 index 000000000..f87401d73 --- /dev/null +++ b/src/github.com/matrix-org/dendrite/mediaapi/storage/media.go @@ -0,0 +1,47 @@ +// Copyright 2017 Vector Creations Ltd +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package storage + +import ( + "database/sql" +) + +const mediaSchema = ` +` + +const insertMediaSQL = "" + + "INSERT INTO events (room_nid, event_type_nid, event_state_key_nid, event_id, reference_sha256, auth_event_nids)" + + " VALUES ($1, $2, $3, $4, $5, $6)" + + " ON CONFLICT ON CONSTRAINT event_id_unique" + + " DO NOTHING" + + " RETURNING event_nid, state_snapshot_nid" + +type mediaStatements struct { + insertMediaStmt *sql.Stmt +} + +func (s *mediaStatements) prepare(db *sql.DB) (err error) { + _, err = db.Exec(mediaSchema) + if err != nil { + return + } + + return statementList{ + {&s.insertMediaStmt, insertMediaSQL}, + }.prepare(db) +} + +func (s *mediaStatements) insertMedia() { +} diff --git a/src/github.com/matrix-org/dendrite/mediaapi/storage/prepare.go b/src/github.com/matrix-org/dendrite/mediaapi/storage/prepare.go new file mode 100644 index 000000000..a30586de4 --- /dev/null +++ b/src/github.com/matrix-org/dendrite/mediaapi/storage/prepare.go @@ -0,0 +1,37 @@ +// Copyright 2017 Vector Creations Ltd +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// FIXME: This should be made common! + +package storage + +import ( + "database/sql" +) + +// a statementList is a list of SQL statements to prepare and a pointer to where to store the resulting prepared statement. +type statementList []struct { + statement **sql.Stmt + sql string +} + +// prepare the SQL for each statement in the list and assign the result to the prepared statement. +func (s statementList) prepare(db *sql.DB) (err error) { + for _, statement := range s { + if *statement.statement, err = db.Prepare(statement.sql); err != nil { + return + } + } + return +} diff --git a/src/github.com/matrix-org/dendrite/mediaapi/storage/sql.go b/src/github.com/matrix-org/dendrite/mediaapi/storage/sql.go new file mode 100644 index 000000000..e992e073e --- /dev/null +++ b/src/github.com/matrix-org/dendrite/mediaapi/storage/sql.go @@ -0,0 +1,33 @@ +// Copyright 2017 Vector Creations Ltd +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package storage + +import ( + "database/sql" +) + +type statements struct { + mediaStatements +} + +func (s *statements) prepare(db *sql.DB) error { + var err error + + if err = s.mediaStatements.prepare(db); err != nil { + return err + } + + return nil +} diff --git a/src/github.com/matrix-org/dendrite/mediaapi/storage/storage.go b/src/github.com/matrix-org/dendrite/mediaapi/storage/storage.go new file mode 100644 index 000000000..0c881a480 --- /dev/null +++ b/src/github.com/matrix-org/dendrite/mediaapi/storage/storage.go @@ -0,0 +1,38 @@ +// Copyright 2017 Vector Creations Ltd +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package storage + +import ( + "database/sql" +) + +// A Database is used to store room events and stream offsets. +type Database struct { + statements statements + db *sql.DB +} + +// Open a postgres database. +func Open(dataSourceName string) (*Database, error) { + var d Database + var err error + if d.db, err = sql.Open("postgres", dataSourceName); err != nil { + return nil, err + } + if err = d.statements.prepare(d.db); err != nil { + return nil, err + } + return &d, nil +} diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go new file mode 100644 index 000000000..0e0d16c8b --- /dev/null +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go @@ -0,0 +1,139 @@ +// Copyright 2017 Vector Creations Ltd +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package writers + +import ( + "net/http" + "strconv" + "strings" + + log "github.com/Sirupsen/logrus" + "github.com/matrix-org/dendrite/clientapi/auth" + "github.com/matrix-org/dendrite/clientapi/jsonerror" + "github.com/matrix-org/dendrite/mediaapi/config" + "github.com/matrix-org/util" +) + +// https://matrix.org/docs/spec/client_server/r0.2.0.html#post-matrix-media-r0-upload +// NOTE: ContentType is an HTTP request header and Filename is passed as a query parameter +type uploadRequest struct { + ContentDisposition string + ContentLength int + ContentType string + Filename string + Method string + UserID string +} + +func (r uploadRequest) Validate() *util.JSONResponse { + // TODO: Any validation to be done on ContentDisposition? + if r.ContentLength < 1 { + return &util.JSONResponse{ + Code: 400, + JSON: jsonerror.BadJSON("HTTP Content-Length request header must be greater than zero."), + } + } + // TODO: Check if the Content-Type is a valid type? + if r.ContentType == "" { + return &util.JSONResponse{ + Code: 400, + JSON: jsonerror.BadJSON("HTTP Content-Type request header must be set."), + } + } + // TODO: Validate filename - what are the valid characters? + if r.Method != "POST" { + return &util.JSONResponse{ + Code: 400, + JSON: jsonerror.BadJSON("HTTP request method must be POST."), + } + } + if r.UserID != "" { + // TODO: We should put user ID parsing code into gomatrixserverlib and use that instead + // (see https://github.com/matrix-org/gomatrixserverlib/blob/3394e7c7003312043208aa73727d2256eea3d1f6/eventcontent.go#L347 ) + // It should be a struct (with pointers into a single string to avoid copying) and + // we should update all refs to use UserID types rather than strings. + // https://github.com/matrix-org/synapse/blob/v0.19.2/synapse/types.py#L92 + if len(r.UserID) == 0 || r.UserID[0] != '@' { + return &util.JSONResponse{ + Code: 400, + JSON: jsonerror.BadJSON("user id must start with '@'"), + } + } + parts := strings.SplitN(r.UserID[1:], ":", 2) + if len(parts) != 2 { + return &util.JSONResponse{ + Code: 400, + JSON: jsonerror.BadJSON("user id must be in the form @localpart:domain"), + } + } + } + return nil +} + +// https://matrix.org/docs/spec/client_server/r0.2.0.html#post-matrix-media-r0-upload +type uploadResponse struct { + ContentURI string `json:"content_uri"` +} + +// Upload implements /upload +func Upload(req *http.Request, cfg config.MediaAPI) util.JSONResponse { + logger := util.GetLogger(req.Context()) + + // FIXME: This will require querying some other component/db but currently + // just accepts a user id for auth + userID, resErr := auth.VerifyAccessToken(req) + if resErr != nil { + return *resErr + } + + // req.Header.Get() returns "" if no header + // strconv.Atoi() returns 0 when parsing "" + contentLength, _ := strconv.Atoi(req.Header.Get("Content-Length")) + + r := uploadRequest{ + ContentDisposition: req.Header.Get("Content-Disposition"), + ContentLength: contentLength, + ContentType: req.Header.Get("Content-Type"), + Filename: req.FormValue("filename"), + Method: req.Method, + UserID: userID, + } + + if resErr = r.Validate(); resErr != nil { + return *resErr + } + + logger.WithFields(log.Fields{ + "ContentType": r.ContentType, + "Filename": r.Filename, + "UserID": r.UserID, + }).Info("Uploading file") + + // TODO: Store file to disk + // - make path to file + // - progressive writing (could support Content-Length 0 and cut off + // after some max upload size is exceeded) + // - generate id (ideally a hash but a random string to start with) + // - generate thumbnails + // TODO: Write metadata to database + // TODO: Respond to request + + return util.JSONResponse{ + Code: 200, + JSON: uploadResponse{ + ContentURI: "mxc://example.com/AQwafuaFswefuhsfAFAgsw", + }, + } +} From d9ee22d04312fbdef021c8030edd44d0fe84ff31 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Wed, 26 Apr 2017 12:11:22 +0200 Subject: [PATCH 002/108] mediaapi: Hack in SQL db storage and Erik's gotest file upload code After this, upload in a usual case now works but the code surely needs cleanup. --- .../cmd/dendrite-media-api-server/main.go | 18 +- .../dendrite/mediaapi/routing/routing.go | 5 +- .../dendrite/mediaapi/storage/fileio.go | 92 ++++++ .../dendrite/mediaapi/storage/media.go | 41 ++- .../dendrite/mediaapi/storage/repository.go | 283 ++++++++++++++++++ .../dendrite/mediaapi/storage/storage.go | 8 + .../dendrite/mediaapi/writers/upload.go | 67 ++++- 7 files changed, 485 insertions(+), 29 deletions(-) create mode 100644 src/github.com/matrix-org/dendrite/mediaapi/storage/fileio.go create mode 100644 src/github.com/matrix-org/dendrite/mediaapi/storage/repository.go diff --git a/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go b/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go index 884eac80f..31d8bfb62 100644 --- a/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go +++ b/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go @@ -21,6 +21,7 @@ import ( "github.com/matrix-org/dendrite/common" "github.com/matrix-org/dendrite/mediaapi/config" "github.com/matrix-org/dendrite/mediaapi/routing" + "github.com/matrix-org/dendrite/mediaapi/storage" log "github.com/Sirupsen/logrus" ) @@ -33,13 +34,10 @@ var ( func main() { common.SetupLogging(logDir) + if bindAddr == "" { log.Panic("No BIND_ADDRESS environment variable found.") } - // db, err := storage.Open(database) - // if err != nil { - // panic(err) - // } cfg := config.MediaAPI{ ServerName: "localhost", @@ -47,8 +45,18 @@ func main() { DataSource: database, } + db, err := storage.Open(cfg.DataSource) + if err != nil { + log.Panicln("Failed to open database:", err) + } + + repo := &storage.Repository{ + StorePrefix: cfg.BasePath, + MaxBytes: 61440, + } + log.Info("Starting mediaapi") - routing.Setup(http.DefaultServeMux, http.DefaultClient, cfg) + routing.Setup(http.DefaultServeMux, http.DefaultClient, cfg, db, repo) log.Fatal(http.ListenAndServe(bindAddr, nil)) } diff --git a/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go b/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go index fd5ff7384..562dc8e84 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go @@ -19,6 +19,7 @@ import ( "github.com/gorilla/mux" "github.com/matrix-org/dendrite/mediaapi/config" + "github.com/matrix-org/dendrite/mediaapi/storage" "github.com/matrix-org/dendrite/mediaapi/writers" "github.com/matrix-org/util" "github.com/prometheus/client_golang/prometheus" @@ -28,11 +29,11 @@ const pathPrefixR0 = "/_matrix/media/v1" // Setup registers HTTP handlers with the given ServeMux. It also supplies the given http.Client // to clients which need to make outbound HTTP requests. -func Setup(servMux *http.ServeMux, httpClient *http.Client, cfg config.MediaAPI) { +func Setup(servMux *http.ServeMux, httpClient *http.Client, cfg config.MediaAPI, db *storage.Database, repo *storage.Repository) { apiMux := mux.NewRouter() r0mux := apiMux.PathPrefix(pathPrefixR0).Subrouter() r0mux.Handle("/upload", make("upload", util.NewJSONRequestHandler(func(req *http.Request) util.JSONResponse { - return writers.Upload(req, cfg) + return writers.Upload(req, cfg, db, repo) }))) servMux.Handle("/metrics", prometheus.Handler()) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/storage/fileio.go b/src/github.com/matrix-org/dendrite/mediaapi/storage/fileio.go new file mode 100644 index 000000000..5bd87ff8c --- /dev/null +++ b/src/github.com/matrix-org/dendrite/mediaapi/storage/fileio.go @@ -0,0 +1,92 @@ +// Copyright 2017 Vector Creations Ltd +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package storage + +import ( + "fmt" + "os" + + log "github.com/Sirupsen/logrus" +) + +// LimitedFileWriter writes only a limited number of bytes to a file. +// +// If the callee attempts to write more bytes the file is deleted and further +// writes are silently discarded. +// +// This isn't thread safe. +type LimitedFileWriter struct { + filePath string + file *os.File + writtenBytes uint64 + maxBytes uint64 +} + +// NewLimitedFileWriter creates a new LimitedFileWriter at the given location. +// +// If a file already exists at the location it is immediately truncated. +// +// A maxBytes of 0 or negative is treated as no limit. +func NewLimitedFileWriter(filePath string, maxBytes uint64) (*LimitedFileWriter, error) { + file, err := os.Create(filePath) + if err != nil { + return nil, err + } + + writer := LimitedFileWriter{ + filePath: filePath, + file: file, + maxBytes: maxBytes, + } + + return &writer, nil +} + +// Close closes the underlying file descriptor, if its open. +// +// Any error comes from File.Close +func (writer *LimitedFileWriter) Close() error { + if writer.file != nil { + file := writer.file + writer.file = nil + return file.Close() + } + return nil +} + +func (writer *LimitedFileWriter) Write(p []byte) (n int, err error) { + if writer.maxBytes > 0 && uint64(len(p))+writer.writtenBytes > writer.maxBytes { + if writer.file != nil { + writer.Close() + err = os.Remove(writer.filePath) + if err != nil { + log.Printf("Failed to delete file %v\n", err) + } + } + + return 0, fmt.Errorf("Reached limit") + } + + if writer.file != nil { + n, err = writer.file.Write(p) + writer.writtenBytes += uint64(n) + + if err != nil { + log.Printf("Failed to write to file %v\n", err) + } + } + + return +} diff --git a/src/github.com/matrix-org/dendrite/mediaapi/storage/media.go b/src/github.com/matrix-org/dendrite/mediaapi/storage/media.go index f87401d73..b46ab8181 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/storage/media.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/storage/media.go @@ -16,17 +16,38 @@ package storage import ( "database/sql" + "time" ) const mediaSchema = ` +-- The events table holds metadata for each media upload to the local server, +-- the actual file is stored separately. +CREATE TABLE IF NOT EXISTS media_repository ( + -- The id used to refer to the media. + -- This is a base64-encoded sha256 hash of the file data + media_id TEXT PRIMARY KEY, + -- The origin of the media as requested by the client. + media_origin TEXT NOT NULL, + -- The MIME-type of the media file. + content_type TEXT NOT NULL, + -- The HTTP Content-Disposition header for the media file. + content_disposition TEXT NOT NULL DEFAULT 'inline', + -- Size of the media file in bytes. + file_size BIGINT NOT NULL, + -- When the content was uploaded in ms. + created_ts BIGINT NOT NULL, + -- The name with which the media was uploaded. + upload_name TEXT NOT NULL, + -- The user who uploaded the file. + user_id TEXT NOT NULL, + UNIQUE(media_id, media_origin) +); ` -const insertMediaSQL = "" + - "INSERT INTO events (room_nid, event_type_nid, event_state_key_nid, event_id, reference_sha256, auth_event_nids)" + - " VALUES ($1, $2, $3, $4, $5, $6)" + - " ON CONFLICT ON CONSTRAINT event_id_unique" + - " DO NOTHING" + - " RETURNING event_nid, state_snapshot_nid" +const insertMediaSQL = ` +INSERT INTO media_repository (media_id, media_origin, content_type, content_disposition, file_size, created_ts, upload_name, user_id) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8) +` type mediaStatements struct { insertMediaStmt *sql.Stmt @@ -43,5 +64,11 @@ func (s *mediaStatements) prepare(db *sql.DB) (err error) { }.prepare(db) } -func (s *mediaStatements) insertMedia() { +func (s *mediaStatements) insertMedia(mediaID string, mediaOrigin string, contentType string, + contentDisposition string, fileSize int64, uploadName string, userID string) error { + _, err := s.insertMediaStmt.Exec( + mediaID, mediaOrigin, contentType, contentDisposition, fileSize, + int64(time.Now().UnixNano()/1000000), uploadName, userID, + ) + return err } diff --git a/src/github.com/matrix-org/dendrite/mediaapi/storage/repository.go b/src/github.com/matrix-org/dendrite/mediaapi/storage/repository.go new file mode 100644 index 000000000..2378646ae --- /dev/null +++ b/src/github.com/matrix-org/dendrite/mediaapi/storage/repository.go @@ -0,0 +1,283 @@ +// Copyright 2017 Vector Creations Ltd +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package storage + +import ( + "crypto/sha256" + "encoding/base64" + "hash" + "io" + "io/ioutil" + "os" + "path" + + log "github.com/Sirupsen/logrus" +) + +// Description contains various attributes for an image. +type Description struct { + Type string + Length int64 +} + +type repositoryPaths struct { + contentPath string + typePath string +} + +// Repository stores locally uploaded media, and caches remote media that has +// been requested. +type Repository struct { + StorePrefix string + MaxBytes uint64 +} + +// ReaderFromRemoteCache returns a io.ReadCloser with the cached remote content, +// if it exists. Use IsNotExist to check if the error was due to it not existing +// in the cache +func (repo Repository) ReaderFromRemoteCache(host, name string) (io.ReadCloser, *Description, error) { + mediaDir := repo.getDirForRemoteMedia(host, name) + repoPaths := getPathsForMedia(mediaDir) + + return repo.readerFromRepository(repoPaths) +} + +// ReaderFromLocalRepo returns a io.ReadCloser with the locally uploaded content, +// if it exists. Use IsNotExist to check if the error was due to it not existing +// in the cache +func (repo Repository) ReaderFromLocalRepo(name string) (io.ReadCloser, *Description, error) { + mediaDir := repo.getDirForLocalMedia(name) + repoPaths := getPathsForMedia(mediaDir) + + return repo.readerFromRepository(repoPaths) +} + +func (repo Repository) readerFromRepository(repoPaths repositoryPaths) (io.ReadCloser, *Description, error) { + contentTypeBytes, err := ioutil.ReadFile(repoPaths.typePath) + if err != nil { + return nil, nil, err + } + + contentType := string(contentTypeBytes) + + file, err := os.Open(repoPaths.contentPath) + if err != nil { + return nil, nil, err + } + + stat, err := file.Stat() + if err != nil { + return nil, nil, err + } + + descr := Description{ + Type: contentType, + Length: stat.Size(), + } + + return file, &descr, nil +} + +// WriterToLocalRepository returns a RepositoryWriter for writing newly uploaded +// content into the repository. +// +// The returned RepositoryWriter will fail if more than MaxBytes tries to be +// written. +func (repo Repository) WriterToLocalRepository(descr Description) (RepositoryWriter, error) { + return newLocalRepositoryWriter(repo, descr) +} + +// WriterToRemoteCache returns a RepositoryWriter for caching newly downloaded +// remote content. +// +// The returned RepositoryWriter will silently stop writing if more than MaxBytes +// tries to be written and does *not* return an error. +func (repo Repository) WriterToRemoteCache(host, name string, descr Description) (RepositoryWriter, error) { + return newRemoteRepositoryWriter(repo, host, name, descr) +} + +func (repo *Repository) makeTempDir() (string, error) { + tmpDir := path.Join(repo.StorePrefix, "tmp") + os.MkdirAll(tmpDir, 0770) + return ioutil.TempDir(tmpDir, "") +} + +func (repo *Repository) getDirForLocalMedia(name string) string { + return path.Join(repo.StorePrefix, "local", name[:3], name[3:]) +} + +func (repo *Repository) getDirForRemoteMedia(host, sanitizedName string) string { + return path.Join(repo.StorePrefix, "remote", host, sanitizedName[:3], sanitizedName[3:]) +} + +// Get the actual paths for the data and metadata associated with remote media. +func getPathsForMedia(dir string) repositoryPaths { + contentPath := path.Join(dir, "content") + typePath := path.Join(dir, "type") + return repositoryPaths{ + contentPath: contentPath, + typePath: typePath, + } +} + +// IsNotExists check if error was due to content not existing in cache. +func IsNotExists(err error) bool { return os.IsNotExist(err) } + +// RepositoryWriter is used to either store into the repository newly uploaded +// media or to cache recently fetched remote media. +type RepositoryWriter interface { + io.WriteCloser + + // Finished should be called when successfully finished writing; otherwise + // the written content will not be committed to the repository. + Finished() (string, error) +} + +type remoteRepositoryWriter struct { + tmpDir string + finalDir string + name string + file io.WriteCloser + erred bool +} + +func newRemoteRepositoryWriter(repo Repository, host, name string, descr Description) (*remoteRepositoryWriter, error) { + tmpFile, tmpDir, err := getTempWriter(repo, descr) + if err != nil { + log.Printf("Failed to create writer: %v\n", err) + return nil, err + } + + return &remoteRepositoryWriter{ + tmpDir: tmpDir, + finalDir: repo.getDirForRemoteMedia(host, name), + name: name, + file: tmpFile, + erred: false, + }, nil +} + +func (writer remoteRepositoryWriter) Write(p []byte) (int, error) { + // Its OK to fail when writing to the remote repo. We just hide the error + // from the layers above + if !writer.erred { + if _, err := writer.file.Write(p); err != nil { + writer.erred = true + } + } + return len(p), nil +} + +func (writer remoteRepositoryWriter) Close() error { + os.RemoveAll(writer.tmpDir) + writer.file.Close() + return nil +} + +func (writer remoteRepositoryWriter) Finished() (string, error) { + var err error + if !writer.erred { + os.MkdirAll(path.Dir(writer.finalDir), 0770) + err = os.Rename(writer.tmpDir, writer.finalDir) + if err != nil { + return "", err + } + } + err = writer.Close() + return writer.name, err +} + +type localRepositoryWriter struct { + repo Repository + tmpDir string + hasher hash.Hash + file io.WriteCloser + finished bool +} + +func newLocalRepositoryWriter(repo Repository, descr Description) (*localRepositoryWriter, error) { + tmpFile, tmpDir, err := getTempWriter(repo, descr) + if err != nil { + return nil, err + } + + return &localRepositoryWriter{ + repo: repo, + tmpDir: tmpDir, + hasher: sha256.New(), + file: tmpFile, + finished: false, + }, nil +} + +func (writer localRepositoryWriter) Write(p []byte) (int, error) { + writer.hasher.Write(p) // Never errors. + n, err := writer.file.Write(p) + if err != nil { + writer.Close() + } + return n, err +} + +func (writer localRepositoryWriter) Close() error { + var err error + if !writer.finished { + err = os.RemoveAll(writer.tmpDir) + if err != nil { + return err + } + } + + err = writer.file.Close() + return err +} + +func (writer localRepositoryWriter) Finished() (string, error) { + hash := writer.hasher.Sum(nil) + name := base64.URLEncoding.EncodeToString(hash[:]) + finalDir := writer.repo.getDirForLocalMedia(name) + os.MkdirAll(path.Dir(finalDir), 0770) + err := os.Rename(writer.tmpDir, finalDir) + if err != nil { + log.Println("Failed to move temp directory:", writer.tmpDir, finalDir, err) + return "", err + } + writer.finished = true + writer.Close() + return name, nil +} + +func getTempWriter(repo Repository, descr Description) (io.WriteCloser, string, error) { + tmpDir, err := repo.makeTempDir() + if err != nil { + log.Printf("Failed to create temp dir: %v\n", err) + return nil, "", err + } + + repoPaths := getPathsForMedia(tmpDir) + + if err = ioutil.WriteFile(repoPaths.typePath, []byte(descr.Type), 0660); err != nil { + log.Printf("Failed to create typeFile: %q\n", err) + return nil, "", err + } + + tmpFile, err := NewLimitedFileWriter(repoPaths.contentPath, repo.MaxBytes) + if err != nil { + log.Printf("Failed to create limited file: %v\n", err) + return nil, "", err + } + + return tmpFile, tmpDir, nil +} diff --git a/src/github.com/matrix-org/dendrite/mediaapi/storage/storage.go b/src/github.com/matrix-org/dendrite/mediaapi/storage/storage.go index 0c881a480..dc987e44d 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/storage/storage.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/storage/storage.go @@ -16,6 +16,9 @@ package storage import ( "database/sql" + + // Import the postgres database driver. + _ "github.com/lib/pq" ) // A Database is used to store room events and stream offsets. @@ -36,3 +39,8 @@ func Open(dataSourceName string) (*Database, error) { } return &d, nil } + +// CreateMedia inserts the metadata about the uploaded media into the database. +func (d *Database) CreateMedia(mediaID string, mediaOrigin string, contentType string, contentDisposition string, fileSize int64, uploadName string, userID string) error { + return d.statements.insertMedia(mediaID, mediaOrigin, contentType, contentDisposition, fileSize, uploadName, userID) +} diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go index 0e0d16c8b..f68168840 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go @@ -15,29 +15,34 @@ package writers import ( + "fmt" + "io" "net/http" - "strconv" "strings" log "github.com/Sirupsen/logrus" "github.com/matrix-org/dendrite/clientapi/auth" "github.com/matrix-org/dendrite/clientapi/jsonerror" "github.com/matrix-org/dendrite/mediaapi/config" + "github.com/matrix-org/dendrite/mediaapi/storage" "github.com/matrix-org/util" ) +// UploadRequest metadata included in or derivable from an upload request // https://matrix.org/docs/spec/client_server/r0.2.0.html#post-matrix-media-r0-upload // NOTE: ContentType is an HTTP request header and Filename is passed as a query parameter -type uploadRequest struct { +type UploadRequest struct { ContentDisposition string - ContentLength int + ContentLength int64 ContentType string Filename string + Base64FileHash string Method string UserID string } -func (r uploadRequest) Validate() *util.JSONResponse { +// Validate validates the UploadRequest fields +func (r UploadRequest) Validate() *util.JSONResponse { // TODO: Any validation to be done on ContentDisposition? if r.ContentLength < 1 { return &util.JSONResponse{ @@ -88,7 +93,7 @@ type uploadResponse struct { } // Upload implements /upload -func Upload(req *http.Request, cfg config.MediaAPI) util.JSONResponse { +func Upload(req *http.Request, cfg config.MediaAPI, db *storage.Database, repo *storage.Repository) util.JSONResponse { logger := util.GetLogger(req.Context()) // FIXME: This will require querying some other component/db but currently @@ -98,13 +103,9 @@ func Upload(req *http.Request, cfg config.MediaAPI) util.JSONResponse { return *resErr } - // req.Header.Get() returns "" if no header - // strconv.Atoi() returns 0 when parsing "" - contentLength, _ := strconv.Atoi(req.Header.Get("Content-Length")) - - r := uploadRequest{ + r := &UploadRequest{ ContentDisposition: req.Header.Get("Content-Disposition"), - ContentLength: contentLength, + ContentLength: req.ContentLength, ContentType: req.Header.Get("Content-Type"), Filename: req.FormValue("filename"), Method: req.Method, @@ -126,14 +127,50 @@ func Upload(req *http.Request, cfg config.MediaAPI) util.JSONResponse { // - progressive writing (could support Content-Length 0 and cut off // after some max upload size is exceeded) // - generate id (ideally a hash but a random string to start with) - // - generate thumbnails - // TODO: Write metadata to database - // TODO: Respond to request + writer, err := repo.WriterToLocalRepository(storage.Description{ + Type: r.ContentType, + }) + if err != nil { + logger.Infof("Failed to get cache writer %q\n", err) + return util.JSONResponse{ + Code: 400, + JSON: jsonerror.BadJSON(fmt.Sprintf("Failed to upload: %q", err)), + } + } + + defer writer.Close() + + if _, err = io.Copy(writer, req.Body); err != nil { + logger.Infof("Failed to copy %q\n", err) + return util.JSONResponse{ + Code: 400, + JSON: jsonerror.BadJSON(fmt.Sprintf("Failed to upload: %q", err)), + } + } + + r.Base64FileHash, err = writer.Finished() + if err != nil { + return util.JSONResponse{ + Code: 400, + JSON: jsonerror.BadJSON(fmt.Sprintf("Failed to upload: %q", err)), + } + } + // TODO: check if file with hash already exists + + // TODO: generate thumbnails + + err = db.CreateMedia(r.Base64FileHash, cfg.ServerName, r.ContentType, r.ContentDisposition, r.ContentLength, r.Filename, r.UserID) + if err != nil { + return util.JSONResponse{ + Code: 400, + JSON: jsonerror.BadJSON(fmt.Sprintf("Failed to upload: %q", err)), + } + } return util.JSONResponse{ Code: 200, JSON: uploadResponse{ - ContentURI: "mxc://example.com/AQwafuaFswefuhsfAFAgsw", + ContentURI: fmt.Sprintf("mxc://%s/%s", cfg.ServerName, r.Base64FileHash), }, } } From c1e5974872b6f9f6c826050c1132fa1fe822fd28 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Wed, 26 Apr 2017 12:19:47 +0200 Subject: [PATCH 003/108] mediaapi/storage: Remove DEFAULT from content_disposition in schema It isn't very effective at all as we will anyway write an empty string. We can handle what to do about that in the query code paths. --- src/github.com/matrix-org/dendrite/mediaapi/storage/media.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/storage/media.go b/src/github.com/matrix-org/dendrite/mediaapi/storage/media.go index b46ab8181..8dbc5602d 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/storage/media.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/storage/media.go @@ -31,7 +31,7 @@ CREATE TABLE IF NOT EXISTS media_repository ( -- The MIME-type of the media file. content_type TEXT NOT NULL, -- The HTTP Content-Disposition header for the media file. - content_disposition TEXT NOT NULL DEFAULT 'inline', + content_disposition TEXT NOT NULL, -- Size of the media file in bytes. file_size BIGINT NOT NULL, -- When the content was uploaded in ms. From 81706408bdac21a5faec2386021d071c076eb709 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Thu, 27 Apr 2017 17:40:57 +0200 Subject: [PATCH 004/108] mediaapi: Hack in /download from gotest code --- .../dendrite/mediaapi/routing/routing.go | 59 +++ .../dendrite/mediaapi/storage/media.go | 15 + .../dendrite/mediaapi/storage/storage.go | 5 + .../dendrite/mediaapi/writers/download.go | 361 ++++++++++++++++++ 4 files changed, 440 insertions(+) create mode 100644 src/github.com/matrix-org/dendrite/mediaapi/writers/download.go diff --git a/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go b/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go index 562dc8e84..537bb1413 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go @@ -15,8 +15,10 @@ package routing import ( + "context" "net/http" + log "github.com/Sirupsen/logrus" "github.com/gorilla/mux" "github.com/matrix-org/dendrite/mediaapi/config" "github.com/matrix-org/dendrite/mediaapi/storage" @@ -27,6 +29,48 @@ import ( const pathPrefixR0 = "/_matrix/media/v1" +type contextKeys string + +const ctxValueLogger = contextKeys("logger") +const ctxValueRequestID = contextKeys("requestid") + +type Fudge struct { + Config config.MediaAPI + Database *storage.Database + DownloadServer writers.DownloadServer +} + +func (fudge Fudge) ServeHTTP(w http.ResponseWriter, req *http.Request) { + // NOTE: The code below is from util.Protect and respond but this is the only + // API that needs a different form of it to be able to pass the + // http.ResponseWriter to the handler + reqID := util.RandomString(12) + // Set a Logger and request ID on the context + ctx := context.WithValue(req.Context(), ctxValueLogger, log.WithFields(log.Fields{ + "req.method": req.Method, + "req.path": req.URL.Path, + "req.id": reqID, + })) + ctx = context.WithValue(ctx, ctxValueRequestID, reqID) + req = req.WithContext(ctx) + + logger := util.GetLogger(req.Context()) + logger.Print("Incoming request") + + if req.Method == "OPTIONS" { + util.SetCORSHeaders(w) + w.WriteHeader(200) + return + } + + // Set common headers returned regardless of the outcome of the request + util.SetCORSHeaders(w) + w.Header().Set("Content-Type", "application/json") + + vars := mux.Vars(req) + writers.Download(w, req, vars["serverName"], vars["mediaId"], fudge.Config, fudge.Database, fudge.DownloadServer) +} + // Setup registers HTTP handlers with the given ServeMux. It also supplies the given http.Client // to clients which need to make outbound HTTP requests. func Setup(servMux *http.ServeMux, httpClient *http.Client, cfg config.MediaAPI, db *storage.Database, repo *storage.Repository) { @@ -36,6 +80,21 @@ func Setup(servMux *http.ServeMux, httpClient *http.Client, cfg config.MediaAPI, return writers.Upload(req, cfg, db, repo) }))) + downloadServer := writers.DownloadServer{ + Repository: *repo, + LocalServerName: cfg.ServerName, + } + + fudge := Fudge{ + Config: cfg, + Database: db, + DownloadServer: downloadServer, + } + + r0mux.Handle("/download/{serverName}/{mediaId}", + prometheus.InstrumentHandler("download", fudge), + ) + servMux.Handle("/metrics", prometheus.Handler()) servMux.Handle("/api/", http.StripPrefix("/api", apiMux)) } diff --git a/src/github.com/matrix-org/dendrite/mediaapi/storage/media.go b/src/github.com/matrix-org/dendrite/mediaapi/storage/media.go index 8dbc5602d..8aee283d4 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/storage/media.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/storage/media.go @@ -49,8 +49,13 @@ INSERT INTO media_repository (media_id, media_origin, content_type, content_disp VALUES ($1, $2, $3, $4, $5, $6, $7, $8) ` +const selectMediaSQL = ` +SELECT content_type, content_disposition, file_size, upload_name FROM media_repository WHERE media_id = $1 AND media_origin = $2 +` + type mediaStatements struct { insertMediaStmt *sql.Stmt + selectMediaStmt *sql.Stmt } func (s *mediaStatements) prepare(db *sql.DB) (err error) { @@ -61,6 +66,7 @@ func (s *mediaStatements) prepare(db *sql.DB) (err error) { return statementList{ {&s.insertMediaStmt, insertMediaSQL}, + {&s.selectMediaStmt, selectMediaSQL}, }.prepare(db) } @@ -72,3 +78,12 @@ func (s *mediaStatements) insertMedia(mediaID string, mediaOrigin string, conten ) return err } + +func (s *mediaStatements) selectMedia(mediaID string, mediaOrigin string) (string, string, int64, string, error) { + var contentType string + var contentDisposition string + var fileSize int64 + var uploadName string + err := s.selectMediaStmt.QueryRow(mediaID, mediaOrigin).Scan(&contentType, &contentDisposition, &fileSize, &uploadName) + return string(contentType), string(contentDisposition), int64(fileSize), string(uploadName), err +} diff --git a/src/github.com/matrix-org/dendrite/mediaapi/storage/storage.go b/src/github.com/matrix-org/dendrite/mediaapi/storage/storage.go index dc987e44d..72dc0a62f 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/storage/storage.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/storage/storage.go @@ -44,3 +44,8 @@ func Open(dataSourceName string) (*Database, error) { func (d *Database) CreateMedia(mediaID string, mediaOrigin string, contentType string, contentDisposition string, fileSize int64, uploadName string, userID string) error { return d.statements.insertMedia(mediaID, mediaOrigin, contentType, contentDisposition, fileSize, uploadName, userID) } + +// GetMedia possibly selects the metadata about previously uploaded media from the database. +func (d *Database) GetMedia(mediaID string, mediaOrigin string) (string, string, int64, string, error) { + return d.statements.selectMedia(mediaID, mediaOrigin) +} diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go new file mode 100644 index 000000000..775779a91 --- /dev/null +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -0,0 +1,361 @@ +// Copyright 2017 Vector Creations Ltd +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package writers + +import ( + "encoding/json" + "fmt" + "io" + "net" + "net/http" + "strconv" + "strings" + + log "github.com/Sirupsen/logrus" + "github.com/matrix-org/dendrite/clientapi/jsonerror" + "github.com/matrix-org/dendrite/mediaapi/config" + "github.com/matrix-org/dendrite/mediaapi/storage" + "github.com/matrix-org/util" +) + +// DownloadRequest metadata included in or derivable from an upload request +// https://matrix.org/docs/spec/client_server/r0.2.0.html#post-matrix-media-r0-download +type DownloadRequest struct { + MediaID string + ServerName string +} + +// Validate validates the DownloadRequest fields +func (r DownloadRequest) Validate() *util.JSONResponse { + // FIXME: the following errors aren't bad JSON, rather just a bad request path + // maybe give the URL pattern in the routing, these are not even possible as the handler would not be hit...? + if r.MediaID == "" { + return &util.JSONResponse{ + Code: 400, + JSON: jsonerror.BadJSON("mediaId must be a non-empty string"), + } + } + if r.ServerName == "" { + return &util.JSONResponse{ + Code: 400, + JSON: jsonerror.BadJSON("serverName must be a non-empty string"), + } + } + return nil +} + +func jsonErrorResponse(w http.ResponseWriter, res util.JSONResponse, logger *log.Entry) { + // Marshal JSON response into raw bytes to send as the HTTP body + resBytes, err := json.Marshal(res.JSON) + if err != nil { + logger.WithError(err).Error("Failed to marshal JSONResponse") + // this should never fail to be marshalled so drop err to the floor + res = util.MessageResponse(500, "Internal Server Error") + resBytes, _ = json.Marshal(res.JSON) + } + + // Set status code and write the body + w.WriteHeader(res.Code) + logger.WithField("code", res.Code).Infof("Responding (%d bytes)", len(resBytes)) + w.Write(resBytes) +} + +// Download implements /upload +func Download(w http.ResponseWriter, req *http.Request, serverName string, mediaID string, cfg config.MediaAPI, db *storage.Database, downloadServer DownloadServer) { + logger := util.GetLogger(req.Context()) + + r := &DownloadRequest{ + MediaID: mediaID, + ServerName: serverName, + } + + if resErr := r.Validate(); resErr != nil { + jsonErrorResponse(w, *resErr, logger) + return + } + + // TODO: + // - query db to look up content type and disposition and whether we have the file + logger.Warnln(r.MediaID, r.ServerName, cfg.ServerName) + contentType, contentDisposition, fileSize, filename, err := db.GetMedia(r.MediaID, r.ServerName) + if err != nil { + if strings.Compare(r.ServerName, cfg.ServerName) != 0 { + // TODO: get remote file from remote server + jsonErrorResponse(w, util.JSONResponse{ + Code: 404, + JSON: jsonerror.NotFound(fmt.Sprintf("NOT YET IMPLEMENTED")), + }, logger) + return + } + jsonErrorResponse(w, util.JSONResponse{ + Code: 404, + JSON: jsonerror.NotFound(fmt.Sprintf("File %q does not exist", r.MediaID)), + }, logger) + return + } + + // - read file and respond + logger.WithFields(log.Fields{ + "MediaID": r.MediaID, + "ServerName": r.ServerName, + "Filename": filename, + "Content-Type": contentType, + "Content-Disposition": contentDisposition, + }).Infof("Downloading file") + + logger.WithField("code", 200).Infof("Responding (%d bytes)", fileSize) + + respWriter := httpResponseWriter{resp: w} + if err = downloadServer.getImage(respWriter, r.ServerName, r.MediaID); err != nil { + if respWriter.haveWritten() { + closeConnection(w) + return + } + + errStatus := 500 + switch err { + case errNotFound: + errStatus = 404 + case errProxy: + errStatus = 502 + } + http.Error(w, err.Error(), errStatus) + return + } + + return +} + +// DownloadServer serves and caches remote media. +type DownloadServer struct { + Client http.Client + Repository storage.Repository + LocalServerName string +} + +func (handler *DownloadServer) getImage(w responseWriter, host, name string) error { + var file io.ReadCloser + var descr *storage.Description + var err error + if host == handler.LocalServerName { + file, descr, err = handler.Repository.ReaderFromLocalRepo(name) + } else { + file, descr, err = handler.Repository.ReaderFromRemoteCache(host, name) + } + + if err == nil { + log.Println("Found in Cache") + w.setContentType(descr.Type) + + size := strconv.FormatInt(descr.Length, 10) + w.setContentLength(size) + w.setContentSecurityPolicy() + if _, err = io.Copy(w, file); err != nil { + log.Printf("Failed to copy from cache %v\n", err) + return err + } + w.Close() + return nil + } else if !storage.IsNotExists(err) { + log.Printf("Error looking in cache: %v\n", err) + return err + } + + if host == handler.LocalServerName { + // Its fatal if we can't find local files in our cache. + return errNotFound + } + + respBody, desc, err := handler.fetchRemoteMedia(host, name) + if err != nil { + return err + } + + defer respBody.Close() + + w.setContentType(desc.Type) + if desc.Length > 0 { + w.setContentLength(strconv.FormatInt(desc.Length, 10)) + } + + writer, err := handler.Repository.WriterToRemoteCache(host, name, *desc) + if err != nil { + log.Printf("Failed to get cache writer %q\n", err) + return err + } + + defer writer.Close() + + reader := io.TeeReader(respBody, w) + if _, err := io.Copy(writer, reader); err != nil { + log.Printf("Failed to copy %q\n", err) + return err + } + + writer.Finished() + + log.Println("Finished conn") + + return nil +} + +func (handler *DownloadServer) fetchRemoteMedia(host, name string) (io.ReadCloser, *storage.Description, error) { + urls := getMatrixUrls(host) + + log.Printf("Connecting to remote %q\n", urls[0]) + + remoteReq, err := http.NewRequest("GET", urls[0]+"/_matrix/media/v1/download/"+host+"/"+name, nil) + if err != nil { + log.Printf("Failed to connect to remote: %q\n", err) + return nil, nil, err + } + + remoteReq.Header.Set("Host", host) + + resp, err := handler.Client.Do(remoteReq) + if err != nil { + log.Printf("Failed to connect to remote: %q\n", err) + return nil, nil, errProxy + } + + if resp.StatusCode != 200 { + resp.Body.Close() + log.Printf("Server responded with %d\n", resp.StatusCode) + if resp.StatusCode == 404 { + return nil, nil, errNotFound + } + return nil, nil, errProxy + } + + desc := storage.Description{ + Type: resp.Header.Get("Content-Type"), + Length: -1, + } + + length, err := strconv.ParseInt(resp.Header.Get("Content-Length"), 10, 64) + if err == nil { + desc.Length = length + } + + return resp.Body, &desc, nil +} + +// Given a http.ResponseWriter, attempt to force close the connection. +// +// This is useful if you get a fatal error after sending the initial 200 OK +// response. +func closeConnection(w http.ResponseWriter) { + log.Println("Attempting to close connection") + + // We attempt to bluntly close the connection because that is the + // best thing we can do after we've sent a 200 OK + hijack, ok := w.(http.Hijacker) + if ok { + conn, _, err := hijack.Hijack() + if err != nil { + fmt.Printf("Err trying to hijack: %v", err) + return + } + log.Println("Closing") + conn.Close() + return + } + log.Println("Not hijacker") +} + +// Given a matrix server name, attempt to discover URLs to contact the server +// on. +func getMatrixUrls(host string) []string { + _, srvs, err := net.LookupSRV("matrix", "tcp", host) + if err != nil { + return []string{"https://" + host + ":8448"} + } + + results := make([]string, 0, len(srvs)) + for _, srv := range srvs { + if srv == nil { + continue + } + + url := []string{"https://", strings.Trim(srv.Target, "."), ":", strconv.Itoa(int(srv.Port))} + results = append(results, strings.Join(url, "")) + } + + // TODO: Order based on priority and weight. + + return results +} + +// Given a path of the form '//' extract the host and name. +func getMediaIDFromPath(path string) (host, name string, err error) { + parts := strings.Split(path, "/") + if len(parts) != 3 { + err = fmt.Errorf("Invalid path %q", path) + return + } + + host, name = parts[1], parts[2] + + if host == "" || name == "" { + err = fmt.Errorf("Invalid path %q", path) + return + } + + return +} + +type responseWriter interface { + io.WriteCloser + setContentLength(string) + setContentSecurityPolicy() + setContentType(string) + haveWritten() bool +} + +type httpResponseWriter struct { + resp http.ResponseWriter + written bool +} + +func (writer httpResponseWriter) haveWritten() bool { + return writer.written +} + +func (writer httpResponseWriter) Write(p []byte) (n int, err error) { + writer.written = true + return writer.resp.Write(p) +} + +func (writer httpResponseWriter) Close() error { return nil } + +func (writer httpResponseWriter) setContentType(contentType string) { + writer.resp.Header().Set("Content-Type", contentType) +} + +func (writer httpResponseWriter) setContentLength(length string) { + writer.resp.Header().Set("Content-Length", length) +} + +func (writer httpResponseWriter) setContentSecurityPolicy() { + contentSecurityPolicy := "default-src 'none';" + + " script-src 'none';" + + " plugin-types application/pdf;" + + " style-src 'unsafe-inline';" + + " object-src 'self';" + writer.resp.Header().Set("Content-Security-Policy", contentSecurityPolicy) +} + +var errProxy = fmt.Errorf("Failed to contact remote") +var errNotFound = fmt.Errorf("Image not found") From 0a24e406afdf8af837d45c15295f7405a7eda278 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Fri, 28 Apr 2017 14:44:54 +0200 Subject: [PATCH 005/108] mediaapi/routing: Rename Fudge to downloadRequestHandler --- .../matrix-org/dendrite/mediaapi/routing/routing.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go b/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go index 537bb1413..d79f59955 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go @@ -34,13 +34,13 @@ type contextKeys string const ctxValueLogger = contextKeys("logger") const ctxValueRequestID = contextKeys("requestid") -type Fudge struct { +type downloadRequestHandler struct { Config config.MediaAPI Database *storage.Database DownloadServer writers.DownloadServer } -func (fudge Fudge) ServeHTTP(w http.ResponseWriter, req *http.Request) { +func (handler downloadRequestHandler) ServeHTTP(w http.ResponseWriter, req *http.Request) { // NOTE: The code below is from util.Protect and respond but this is the only // API that needs a different form of it to be able to pass the // http.ResponseWriter to the handler @@ -68,7 +68,7 @@ func (fudge Fudge) ServeHTTP(w http.ResponseWriter, req *http.Request) { w.Header().Set("Content-Type", "application/json") vars := mux.Vars(req) - writers.Download(w, req, vars["serverName"], vars["mediaId"], fudge.Config, fudge.Database, fudge.DownloadServer) + writers.Download(w, req, vars["serverName"], vars["mediaId"], handler.Config, handler.Database, handler.DownloadServer) } // Setup registers HTTP handlers with the given ServeMux. It also supplies the given http.Client @@ -85,14 +85,14 @@ func Setup(servMux *http.ServeMux, httpClient *http.Client, cfg config.MediaAPI, LocalServerName: cfg.ServerName, } - fudge := Fudge{ + handler := downloadRequestHandler{ Config: cfg, Database: db, DownloadServer: downloadServer, } r0mux.Handle("/download/{serverName}/{mediaId}", - prometheus.InstrumentHandler("download", fudge), + prometheus.InstrumentHandler("download", handler), ) servMux.Handle("/metrics", prometheus.Handler()) From f2437be52b251c04c03782c562627ec572609dbd Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Fri, 28 Apr 2017 14:45:18 +0200 Subject: [PATCH 006/108] mediaapi/routing: Correct comment on origin of duplicated code --- .../matrix-org/dendrite/mediaapi/routing/routing.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go b/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go index d79f59955..4b41920e6 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go @@ -41,8 +41,8 @@ type downloadRequestHandler struct { } func (handler downloadRequestHandler) ServeHTTP(w http.ResponseWriter, req *http.Request) { - // NOTE: The code below is from util.Protect and respond but this is the only - // API that needs a different form of it to be able to pass the + // NOTE: The code below is from util.MakeJSONAPI and respond but this is the + // only API that needs a different form of it to be able to pass the // http.ResponseWriter to the handler reqID := util.RandomString(12) // Set a Logger and request ID on the context From 5d4432218b1fe73e09bcc981ddbfca5b01052766 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Fri, 28 Apr 2017 16:24:07 +0200 Subject: [PATCH 007/108] vendor: Update github.com/matrix-org/util --- vendor/manifest | 2 +- vendor/src/github.com/matrix-org/util/json.go | 31 ++++++++++++------- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/vendor/manifest b/vendor/manifest index 2cc75e575..1a43889e6 100644 --- a/vendor/manifest +++ b/vendor/manifest @@ -98,7 +98,7 @@ { "importpath": "github.com/matrix-org/util", "repository": "https://github.com/matrix-org/util", - "revision": "bc9d5e2d2f68a2ca279fce0fa2f28a91ecf301ed", + "revision": "8b11d9882e131d58ff40525c8f7b9a7f4b811a43", "branch": "master" }, { diff --git a/vendor/src/github.com/matrix-org/util/json.go b/vendor/src/github.com/matrix-org/util/json.go index a30d73f8e..a1b67656f 100644 --- a/vendor/src/github.com/matrix-org/util/json.go +++ b/vendor/src/github.com/matrix-org/util/json.go @@ -93,23 +93,30 @@ func Protect(handler http.HandlerFunc) http.HandlerFunc { } } +// SetupRequestLogging sets up standard logging for http.Requests. +// http.Requests will have a logger (with a request ID/method/path logged) attached to the Context. +// This can be accessed via GetLogger(Context). +func SetupRequestLogging(req *http.Request) { + reqID := RandomString(12) + // Set a Logger and request ID on the context + ctx := context.WithValue(req.Context(), ctxValueLogger, log.WithFields(log.Fields{ + "req.method": req.Method, + "req.path": req.URL.Path, + "req.id": reqID, + })) + ctx = context.WithValue(ctx, ctxValueRequestID, reqID) + req = req.WithContext(ctx) + + logger := GetLogger(req.Context()) + logger.Print("Incoming request") +} + // MakeJSONAPI creates an HTTP handler which always responds to incoming requests with JSON responses. // Incoming http.Requests will have a logger (with a request ID/method/path logged) attached to the Context. // This can be accessed via GetLogger(Context). func MakeJSONAPI(handler JSONRequestHandler) http.HandlerFunc { return Protect(func(w http.ResponseWriter, req *http.Request) { - reqID := RandomString(12) - // Set a Logger and request ID on the context - ctx := context.WithValue(req.Context(), ctxValueLogger, log.WithFields(log.Fields{ - "req.method": req.Method, - "req.path": req.URL.Path, - "req.id": reqID, - })) - ctx = context.WithValue(ctx, ctxValueRequestID, reqID) - req = req.WithContext(ctx) - - logger := GetLogger(req.Context()) - logger.Print("Incoming request") + SetupRequestLogging(req) if req.Method == "OPTIONS" { SetCORSHeaders(w) From 42a390f8fea4dbe5bad262b190bc4c90f3d5b72a Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Fri, 28 Apr 2017 16:24:30 +0200 Subject: [PATCH 008/108] mediaapi/routing: Make use of refactored request logging in util package --- .../dendrite/mediaapi/routing/routing.go | 23 +------------------ 1 file changed, 1 insertion(+), 22 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go b/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go index 4b41920e6..5e40b140d 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go @@ -15,10 +15,8 @@ package routing import ( - "context" "net/http" - log "github.com/Sirupsen/logrus" "github.com/gorilla/mux" "github.com/matrix-org/dendrite/mediaapi/config" "github.com/matrix-org/dendrite/mediaapi/storage" @@ -29,11 +27,6 @@ import ( const pathPrefixR0 = "/_matrix/media/v1" -type contextKeys string - -const ctxValueLogger = contextKeys("logger") -const ctxValueRequestID = contextKeys("requestid") - type downloadRequestHandler struct { Config config.MediaAPI Database *storage.Database @@ -41,21 +34,7 @@ type downloadRequestHandler struct { } func (handler downloadRequestHandler) ServeHTTP(w http.ResponseWriter, req *http.Request) { - // NOTE: The code below is from util.MakeJSONAPI and respond but this is the - // only API that needs a different form of it to be able to pass the - // http.ResponseWriter to the handler - reqID := util.RandomString(12) - // Set a Logger and request ID on the context - ctx := context.WithValue(req.Context(), ctxValueLogger, log.WithFields(log.Fields{ - "req.method": req.Method, - "req.path": req.URL.Path, - "req.id": reqID, - })) - ctx = context.WithValue(ctx, ctxValueRequestID, reqID) - req = req.WithContext(ctx) - - logger := util.GetLogger(req.Context()) - logger.Print("Incoming request") + util.SetupRequestLogging(req) if req.Method == "OPTIONS" { util.SetCORSHeaders(w) From 10e843da584282d0c393c2d7be3c772f362fe69a Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Fri, 28 Apr 2017 17:13:36 +0200 Subject: [PATCH 009/108] mediaapi/routing: Remove OPTIONS handling from GET endpoint --- .../matrix-org/dendrite/mediaapi/routing/routing.go | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go b/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go index 5e40b140d..5319d9caa 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go @@ -36,12 +36,6 @@ type downloadRequestHandler struct { func (handler downloadRequestHandler) ServeHTTP(w http.ResponseWriter, req *http.Request) { util.SetupRequestLogging(req) - if req.Method == "OPTIONS" { - util.SetCORSHeaders(w) - w.WriteHeader(200) - return - } - // Set common headers returned regardless of the outcome of the request util.SetCORSHeaders(w) w.Header().Set("Content-Type", "application/json") From 90bac42edbfece6c53fce4fe4d4cdf4e911672ff Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Fri, 28 Apr 2017 17:14:16 +0200 Subject: [PATCH 010/108] mediaapi/writers/download: Only accept GET method --- .../matrix-org/dendrite/mediaapi/writers/download.go | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index 775779a91..239eb294d 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -72,10 +72,18 @@ func jsonErrorResponse(w http.ResponseWriter, res util.JSONResponse, logger *log w.Write(resBytes) } -// Download implements /upload +// Download implements /download func Download(w http.ResponseWriter, req *http.Request, serverName string, mediaID string, cfg config.MediaAPI, db *storage.Database, downloadServer DownloadServer) { logger := util.GetLogger(req.Context()) + if req.Method != "GET" { + jsonErrorResponse(w, util.JSONResponse{ + Code: 405, + JSON: jsonerror.Unknown("request method must be GET"), + }, logger) + return + } + r := &DownloadRequest{ MediaID: mediaID, ServerName: serverName, From 52b8b9b8fe325ffd77e84308c232074c0acadde0 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Fri, 28 Apr 2017 17:15:00 +0200 Subject: [PATCH 011/108] mediaapi/writers/download: Remove cruft --- .../matrix-org/dendrite/mediaapi/writers/download.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index 239eb294d..6dbc00228 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -94,9 +94,6 @@ func Download(w http.ResponseWriter, req *http.Request, serverName string, media return } - // TODO: - // - query db to look up content type and disposition and whether we have the file - logger.Warnln(r.MediaID, r.ServerName, cfg.ServerName) contentType, contentDisposition, fileSize, filename, err := db.GetMedia(r.MediaID, r.ServerName) if err != nil { if strings.Compare(r.ServerName, cfg.ServerName) != 0 { From a24b3e7810aecfae4336827a920ae44cd3c571e2 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Fri, 28 Apr 2017 17:15:26 +0200 Subject: [PATCH 012/108] mediaapi/writers/download: Obtaining a file from a remote server is supported --- .../matrix-org/dendrite/mediaapi/writers/download.go | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index 6dbc00228..730746603 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -95,15 +95,7 @@ func Download(w http.ResponseWriter, req *http.Request, serverName string, media } contentType, contentDisposition, fileSize, filename, err := db.GetMedia(r.MediaID, r.ServerName) - if err != nil { - if strings.Compare(r.ServerName, cfg.ServerName) != 0 { - // TODO: get remote file from remote server - jsonErrorResponse(w, util.JSONResponse{ - Code: 404, - JSON: jsonerror.NotFound(fmt.Sprintf("NOT YET IMPLEMENTED")), - }, logger) - return - } + if err != nil && strings.Compare(r.ServerName, cfg.ServerName) == 0 { jsonErrorResponse(w, util.JSONResponse{ Code: 404, JSON: jsonerror.NotFound(fmt.Sprintf("File %q does not exist", r.MediaID)), From 7cf34af30bc5a97b4045ab239e3aa83fd8f8ac77 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Tue, 9 May 2017 19:49:39 +0200 Subject: [PATCH 013/108] WIP: Refactoring --- .../cmd/dendrite-media-api-server/main.go | 20 +- .../dendrite/mediaapi/config/config.go | 9 +- .../dendrite/mediaapi/routing/routing.go | 45 +- .../dendrite/mediaapi/storage/fileio.go | 92 --- .../dendrite/mediaapi/storage/media.go | 89 --- .../storage/media_repository_table.go | 107 ++++ .../dendrite/mediaapi/storage/repository.go | 283 --------- .../dendrite/mediaapi/storage/storage.go | 20 +- .../dendrite/mediaapi/types/types.go | 57 ++ .../dendrite/mediaapi/writers/download.go | 555 ++++++++++-------- .../dendrite/mediaapi/writers/upload.go | 190 ++++-- .../dendrite/mediaapi/writers/utils.go | 80 +++ 12 files changed, 718 insertions(+), 829 deletions(-) delete mode 100644 src/github.com/matrix-org/dendrite/mediaapi/storage/fileio.go delete mode 100644 src/github.com/matrix-org/dendrite/mediaapi/storage/media.go create mode 100644 src/github.com/matrix-org/dendrite/mediaapi/storage/media_repository_table.go delete mode 100644 src/github.com/matrix-org/dendrite/mediaapi/storage/repository.go create mode 100644 src/github.com/matrix-org/dendrite/mediaapi/types/types.go create mode 100644 src/github.com/matrix-org/dendrite/mediaapi/writers/utils.go diff --git a/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go b/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go index 31d8bfb62..aa6ffef81 100644 --- a/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go +++ b/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go @@ -27,9 +27,9 @@ import ( ) var ( - bindAddr = os.Getenv("BIND_ADDRESS") - database = os.Getenv("DATABASE") - logDir = os.Getenv("LOG_DIR") + bindAddr = os.Getenv("BIND_ADDRESS") + dataSource = os.Getenv("DATABASE") + logDir = os.Getenv("LOG_DIR") ) func main() { @@ -40,9 +40,10 @@ func main() { } cfg := config.MediaAPI{ - ServerName: "localhost", - BasePath: "/Users/robertsw/dendrite", - DataSource: database, + ServerName: "localhost", + BasePath: "/Users/robertsw/dendrite", + MaxFileSize: 61440, + DataSource: dataSource, } db, err := storage.Open(cfg.DataSource) @@ -50,13 +51,8 @@ func main() { log.Panicln("Failed to open database:", err) } - repo := &storage.Repository{ - StorePrefix: cfg.BasePath, - MaxBytes: 61440, - } - log.Info("Starting mediaapi") - routing.Setup(http.DefaultServeMux, http.DefaultClient, cfg, db, repo) + routing.Setup(http.DefaultServeMux, http.DefaultClient, cfg, db) log.Fatal(http.ListenAndServe(bindAddr, nil)) } diff --git a/src/github.com/matrix-org/dendrite/mediaapi/config/config.go b/src/github.com/matrix-org/dendrite/mediaapi/config/config.go index 5900d9d56..2002cd86a 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/config/config.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/config/config.go @@ -14,12 +14,17 @@ package config +import "github.com/matrix-org/dendrite/mediaapi/types" + // MediaAPI contains the config information necessary to spin up a mediaapi process. type MediaAPI struct { // The name of the server. This is usually the domain name, e.g 'matrix.org', 'localhost'. - ServerName string `yaml:"server_name"` + ServerName types.ServerName `yaml:"server_name"` // The base path to where media files will be stored. - BasePath string `yaml:"base_path"` + BasePath types.Path `yaml:"base_path"` + // The maximum file size in bytes that is allowed to be stored on this server. + // Note that remote files larger than this can still be proxied to a client, they will just not be cached. + MaxFileSize types.ContentLength `yaml:"base_path"` // The postgres connection config for connecting to the database e.g a postgres:// URI DataSource string `yaml:"database"` } diff --git a/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go b/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go index 5319d9caa..daa598628 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go @@ -20,6 +20,7 @@ import ( "github.com/gorilla/mux" "github.com/matrix-org/dendrite/mediaapi/config" "github.com/matrix-org/dendrite/mediaapi/storage" + "github.com/matrix-org/dendrite/mediaapi/types" "github.com/matrix-org/dendrite/mediaapi/writers" "github.com/matrix-org/util" "github.com/prometheus/client_golang/prometheus" @@ -27,45 +28,27 @@ import ( const pathPrefixR0 = "/_matrix/media/v1" -type downloadRequestHandler struct { - Config config.MediaAPI - Database *storage.Database - DownloadServer writers.DownloadServer -} - -func (handler downloadRequestHandler) ServeHTTP(w http.ResponseWriter, req *http.Request) { - util.SetupRequestLogging(req) - - // Set common headers returned regardless of the outcome of the request - util.SetCORSHeaders(w) - w.Header().Set("Content-Type", "application/json") - - vars := mux.Vars(req) - writers.Download(w, req, vars["serverName"], vars["mediaId"], handler.Config, handler.Database, handler.DownloadServer) -} - // Setup registers HTTP handlers with the given ServeMux. It also supplies the given http.Client // to clients which need to make outbound HTTP requests. -func Setup(servMux *http.ServeMux, httpClient *http.Client, cfg config.MediaAPI, db *storage.Database, repo *storage.Repository) { +func Setup(servMux *http.ServeMux, httpClient *http.Client, cfg config.MediaAPI, db *storage.Database) { apiMux := mux.NewRouter() r0mux := apiMux.PathPrefix(pathPrefixR0).Subrouter() r0mux.Handle("/upload", make("upload", util.NewJSONRequestHandler(func(req *http.Request) util.JSONResponse { - return writers.Upload(req, cfg, db, repo) + return writers.Upload(req, cfg, db) }))) - downloadServer := writers.DownloadServer{ - Repository: *repo, - LocalServerName: cfg.ServerName, - } - - handler := downloadRequestHandler{ - Config: cfg, - Database: db, - DownloadServer: downloadServer, - } - r0mux.Handle("/download/{serverName}/{mediaId}", - prometheus.InstrumentHandler("download", handler), + prometheus.InstrumentHandler("download", http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { + util.SetupRequestLogging(req) + + // Set common headers returned regardless of the outcome of the request + util.SetCORSHeaders(w) + // TODO: fix comment + w.Header().Set("Content-Type", "application/json") + + vars := mux.Vars(req) + writers.Download(w, req, types.ServerName(vars["serverName"]), types.MediaID(vars["mediaId"]), cfg, db) + })), ) servMux.Handle("/metrics", prometheus.Handler()) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/storage/fileio.go b/src/github.com/matrix-org/dendrite/mediaapi/storage/fileio.go deleted file mode 100644 index 5bd87ff8c..000000000 --- a/src/github.com/matrix-org/dendrite/mediaapi/storage/fileio.go +++ /dev/null @@ -1,92 +0,0 @@ -// Copyright 2017 Vector Creations Ltd -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package storage - -import ( - "fmt" - "os" - - log "github.com/Sirupsen/logrus" -) - -// LimitedFileWriter writes only a limited number of bytes to a file. -// -// If the callee attempts to write more bytes the file is deleted and further -// writes are silently discarded. -// -// This isn't thread safe. -type LimitedFileWriter struct { - filePath string - file *os.File - writtenBytes uint64 - maxBytes uint64 -} - -// NewLimitedFileWriter creates a new LimitedFileWriter at the given location. -// -// If a file already exists at the location it is immediately truncated. -// -// A maxBytes of 0 or negative is treated as no limit. -func NewLimitedFileWriter(filePath string, maxBytes uint64) (*LimitedFileWriter, error) { - file, err := os.Create(filePath) - if err != nil { - return nil, err - } - - writer := LimitedFileWriter{ - filePath: filePath, - file: file, - maxBytes: maxBytes, - } - - return &writer, nil -} - -// Close closes the underlying file descriptor, if its open. -// -// Any error comes from File.Close -func (writer *LimitedFileWriter) Close() error { - if writer.file != nil { - file := writer.file - writer.file = nil - return file.Close() - } - return nil -} - -func (writer *LimitedFileWriter) Write(p []byte) (n int, err error) { - if writer.maxBytes > 0 && uint64(len(p))+writer.writtenBytes > writer.maxBytes { - if writer.file != nil { - writer.Close() - err = os.Remove(writer.filePath) - if err != nil { - log.Printf("Failed to delete file %v\n", err) - } - } - - return 0, fmt.Errorf("Reached limit") - } - - if writer.file != nil { - n, err = writer.file.Write(p) - writer.writtenBytes += uint64(n) - - if err != nil { - log.Printf("Failed to write to file %v\n", err) - } - } - - return -} diff --git a/src/github.com/matrix-org/dendrite/mediaapi/storage/media.go b/src/github.com/matrix-org/dendrite/mediaapi/storage/media.go deleted file mode 100644 index 8aee283d4..000000000 --- a/src/github.com/matrix-org/dendrite/mediaapi/storage/media.go +++ /dev/null @@ -1,89 +0,0 @@ -// Copyright 2017 Vector Creations Ltd -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package storage - -import ( - "database/sql" - "time" -) - -const mediaSchema = ` --- The events table holds metadata for each media upload to the local server, --- the actual file is stored separately. -CREATE TABLE IF NOT EXISTS media_repository ( - -- The id used to refer to the media. - -- This is a base64-encoded sha256 hash of the file data - media_id TEXT PRIMARY KEY, - -- The origin of the media as requested by the client. - media_origin TEXT NOT NULL, - -- The MIME-type of the media file. - content_type TEXT NOT NULL, - -- The HTTP Content-Disposition header for the media file. - content_disposition TEXT NOT NULL, - -- Size of the media file in bytes. - file_size BIGINT NOT NULL, - -- When the content was uploaded in ms. - created_ts BIGINT NOT NULL, - -- The name with which the media was uploaded. - upload_name TEXT NOT NULL, - -- The user who uploaded the file. - user_id TEXT NOT NULL, - UNIQUE(media_id, media_origin) -); -` - -const insertMediaSQL = ` -INSERT INTO media_repository (media_id, media_origin, content_type, content_disposition, file_size, created_ts, upload_name, user_id) - VALUES ($1, $2, $3, $4, $5, $6, $7, $8) -` - -const selectMediaSQL = ` -SELECT content_type, content_disposition, file_size, upload_name FROM media_repository WHERE media_id = $1 AND media_origin = $2 -` - -type mediaStatements struct { - insertMediaStmt *sql.Stmt - selectMediaStmt *sql.Stmt -} - -func (s *mediaStatements) prepare(db *sql.DB) (err error) { - _, err = db.Exec(mediaSchema) - if err != nil { - return - } - - return statementList{ - {&s.insertMediaStmt, insertMediaSQL}, - {&s.selectMediaStmt, selectMediaSQL}, - }.prepare(db) -} - -func (s *mediaStatements) insertMedia(mediaID string, mediaOrigin string, contentType string, - contentDisposition string, fileSize int64, uploadName string, userID string) error { - _, err := s.insertMediaStmt.Exec( - mediaID, mediaOrigin, contentType, contentDisposition, fileSize, - int64(time.Now().UnixNano()/1000000), uploadName, userID, - ) - return err -} - -func (s *mediaStatements) selectMedia(mediaID string, mediaOrigin string) (string, string, int64, string, error) { - var contentType string - var contentDisposition string - var fileSize int64 - var uploadName string - err := s.selectMediaStmt.QueryRow(mediaID, mediaOrigin).Scan(&contentType, &contentDisposition, &fileSize, &uploadName) - return string(contentType), string(contentDisposition), int64(fileSize), string(uploadName), err -} diff --git a/src/github.com/matrix-org/dendrite/mediaapi/storage/media_repository_table.go b/src/github.com/matrix-org/dendrite/mediaapi/storage/media_repository_table.go new file mode 100644 index 000000000..11b9064f1 --- /dev/null +++ b/src/github.com/matrix-org/dendrite/mediaapi/storage/media_repository_table.go @@ -0,0 +1,107 @@ +// Copyright 2017 Vector Creations Ltd +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package storage + +import ( + "database/sql" + "time" + + "github.com/matrix-org/dendrite/mediaapi/types" +) + +const mediaSchema = ` +-- The media_repository table holds metadata for each media file stored and accessible to the local server, +-- the actual file is stored separately. +CREATE TABLE IF NOT EXISTS media_repository ( + -- The id used to refer to the media. + -- For uploads to this server this is a base64-encoded sha256 hash of the file data + -- For media from remote servers, this can be any unique identifier string + media_id TEXT NOT NULL, + -- The origin of the media as requested by the client. Should be a homeserver domain. + media_origin TEXT NOT NULL, + -- The MIME-type of the media file as specified when uploading. + content_type TEXT NOT NULL, + -- The HTTP Content-Disposition header for the media file as specified when uploading. + content_disposition TEXT NOT NULL, + -- Size of the media file in bytes. + content_length BIGINT NOT NULL, + -- When the content was uploaded in UNIX epoch ms. + creation_ts BIGINT NOT NULL, + -- The file name with which the media was uploaded. + upload_name TEXT NOT NULL, + -- The user who uploaded the file. Should be a Matrix user ID. + user_id TEXT NOT NULL +); +CREATE UNIQUE INDEX IF NOT EXISTS media_repository_index ON media_repository (media_id, media_origin); +` + +const insertMediaSQL = ` +INSERT INTO media_repository (media_id, media_origin, content_type, content_disposition, content_length, creation_ts, upload_name, user_id) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8) +` + +const selectMediaSQL = ` +SELECT content_type, content_disposition, content_length, creation_ts, upload_name, user_id FROM media_repository WHERE media_id = $1 AND media_origin = $2 +` + +type mediaStatements struct { + insertMediaStmt *sql.Stmt + selectMediaStmt *sql.Stmt +} + +func (s *mediaStatements) prepare(db *sql.DB) (err error) { + _, err = db.Exec(mediaSchema) + if err != nil { + return + } + + return statementList{ + {&s.insertMediaStmt, insertMediaSQL}, + {&s.selectMediaStmt, selectMediaSQL}, + }.prepare(db) +} + +func (s *mediaStatements) insertMedia(mediaMetadata *types.MediaMetadata) error { + mediaMetadata.CreationTimestamp = types.UnixMs(time.Now().UnixNano() / 1000000) + _, err := s.insertMediaStmt.Exec( + mediaMetadata.MediaID, + mediaMetadata.Origin, + mediaMetadata.ContentType, + mediaMetadata.ContentDisposition, + mediaMetadata.ContentLength, + mediaMetadata.CreationTimestamp, + mediaMetadata.UploadName, + mediaMetadata.UserID, + ) + return err +} + +func (s *mediaStatements) selectMedia(mediaID types.MediaID, mediaOrigin types.ServerName) (*types.MediaMetadata, error) { + mediaMetadata := types.MediaMetadata{ + MediaID: mediaID, + Origin: mediaOrigin, + } + err := s.selectMediaStmt.QueryRow( + mediaMetadata.MediaID, mediaMetadata.Origin, + ).Scan( + &mediaMetadata.ContentType, + &mediaMetadata.ContentDisposition, + &mediaMetadata.ContentLength, + &mediaMetadata.CreationTimestamp, + &mediaMetadata.UploadName, + &mediaMetadata.UserID, + ) + return &mediaMetadata, err +} diff --git a/src/github.com/matrix-org/dendrite/mediaapi/storage/repository.go b/src/github.com/matrix-org/dendrite/mediaapi/storage/repository.go deleted file mode 100644 index 2378646ae..000000000 --- a/src/github.com/matrix-org/dendrite/mediaapi/storage/repository.go +++ /dev/null @@ -1,283 +0,0 @@ -// Copyright 2017 Vector Creations Ltd -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package storage - -import ( - "crypto/sha256" - "encoding/base64" - "hash" - "io" - "io/ioutil" - "os" - "path" - - log "github.com/Sirupsen/logrus" -) - -// Description contains various attributes for an image. -type Description struct { - Type string - Length int64 -} - -type repositoryPaths struct { - contentPath string - typePath string -} - -// Repository stores locally uploaded media, and caches remote media that has -// been requested. -type Repository struct { - StorePrefix string - MaxBytes uint64 -} - -// ReaderFromRemoteCache returns a io.ReadCloser with the cached remote content, -// if it exists. Use IsNotExist to check if the error was due to it not existing -// in the cache -func (repo Repository) ReaderFromRemoteCache(host, name string) (io.ReadCloser, *Description, error) { - mediaDir := repo.getDirForRemoteMedia(host, name) - repoPaths := getPathsForMedia(mediaDir) - - return repo.readerFromRepository(repoPaths) -} - -// ReaderFromLocalRepo returns a io.ReadCloser with the locally uploaded content, -// if it exists. Use IsNotExist to check if the error was due to it not existing -// in the cache -func (repo Repository) ReaderFromLocalRepo(name string) (io.ReadCloser, *Description, error) { - mediaDir := repo.getDirForLocalMedia(name) - repoPaths := getPathsForMedia(mediaDir) - - return repo.readerFromRepository(repoPaths) -} - -func (repo Repository) readerFromRepository(repoPaths repositoryPaths) (io.ReadCloser, *Description, error) { - contentTypeBytes, err := ioutil.ReadFile(repoPaths.typePath) - if err != nil { - return nil, nil, err - } - - contentType := string(contentTypeBytes) - - file, err := os.Open(repoPaths.contentPath) - if err != nil { - return nil, nil, err - } - - stat, err := file.Stat() - if err != nil { - return nil, nil, err - } - - descr := Description{ - Type: contentType, - Length: stat.Size(), - } - - return file, &descr, nil -} - -// WriterToLocalRepository returns a RepositoryWriter for writing newly uploaded -// content into the repository. -// -// The returned RepositoryWriter will fail if more than MaxBytes tries to be -// written. -func (repo Repository) WriterToLocalRepository(descr Description) (RepositoryWriter, error) { - return newLocalRepositoryWriter(repo, descr) -} - -// WriterToRemoteCache returns a RepositoryWriter for caching newly downloaded -// remote content. -// -// The returned RepositoryWriter will silently stop writing if more than MaxBytes -// tries to be written and does *not* return an error. -func (repo Repository) WriterToRemoteCache(host, name string, descr Description) (RepositoryWriter, error) { - return newRemoteRepositoryWriter(repo, host, name, descr) -} - -func (repo *Repository) makeTempDir() (string, error) { - tmpDir := path.Join(repo.StorePrefix, "tmp") - os.MkdirAll(tmpDir, 0770) - return ioutil.TempDir(tmpDir, "") -} - -func (repo *Repository) getDirForLocalMedia(name string) string { - return path.Join(repo.StorePrefix, "local", name[:3], name[3:]) -} - -func (repo *Repository) getDirForRemoteMedia(host, sanitizedName string) string { - return path.Join(repo.StorePrefix, "remote", host, sanitizedName[:3], sanitizedName[3:]) -} - -// Get the actual paths for the data and metadata associated with remote media. -func getPathsForMedia(dir string) repositoryPaths { - contentPath := path.Join(dir, "content") - typePath := path.Join(dir, "type") - return repositoryPaths{ - contentPath: contentPath, - typePath: typePath, - } -} - -// IsNotExists check if error was due to content not existing in cache. -func IsNotExists(err error) bool { return os.IsNotExist(err) } - -// RepositoryWriter is used to either store into the repository newly uploaded -// media or to cache recently fetched remote media. -type RepositoryWriter interface { - io.WriteCloser - - // Finished should be called when successfully finished writing; otherwise - // the written content will not be committed to the repository. - Finished() (string, error) -} - -type remoteRepositoryWriter struct { - tmpDir string - finalDir string - name string - file io.WriteCloser - erred bool -} - -func newRemoteRepositoryWriter(repo Repository, host, name string, descr Description) (*remoteRepositoryWriter, error) { - tmpFile, tmpDir, err := getTempWriter(repo, descr) - if err != nil { - log.Printf("Failed to create writer: %v\n", err) - return nil, err - } - - return &remoteRepositoryWriter{ - tmpDir: tmpDir, - finalDir: repo.getDirForRemoteMedia(host, name), - name: name, - file: tmpFile, - erred: false, - }, nil -} - -func (writer remoteRepositoryWriter) Write(p []byte) (int, error) { - // Its OK to fail when writing to the remote repo. We just hide the error - // from the layers above - if !writer.erred { - if _, err := writer.file.Write(p); err != nil { - writer.erred = true - } - } - return len(p), nil -} - -func (writer remoteRepositoryWriter) Close() error { - os.RemoveAll(writer.tmpDir) - writer.file.Close() - return nil -} - -func (writer remoteRepositoryWriter) Finished() (string, error) { - var err error - if !writer.erred { - os.MkdirAll(path.Dir(writer.finalDir), 0770) - err = os.Rename(writer.tmpDir, writer.finalDir) - if err != nil { - return "", err - } - } - err = writer.Close() - return writer.name, err -} - -type localRepositoryWriter struct { - repo Repository - tmpDir string - hasher hash.Hash - file io.WriteCloser - finished bool -} - -func newLocalRepositoryWriter(repo Repository, descr Description) (*localRepositoryWriter, error) { - tmpFile, tmpDir, err := getTempWriter(repo, descr) - if err != nil { - return nil, err - } - - return &localRepositoryWriter{ - repo: repo, - tmpDir: tmpDir, - hasher: sha256.New(), - file: tmpFile, - finished: false, - }, nil -} - -func (writer localRepositoryWriter) Write(p []byte) (int, error) { - writer.hasher.Write(p) // Never errors. - n, err := writer.file.Write(p) - if err != nil { - writer.Close() - } - return n, err -} - -func (writer localRepositoryWriter) Close() error { - var err error - if !writer.finished { - err = os.RemoveAll(writer.tmpDir) - if err != nil { - return err - } - } - - err = writer.file.Close() - return err -} - -func (writer localRepositoryWriter) Finished() (string, error) { - hash := writer.hasher.Sum(nil) - name := base64.URLEncoding.EncodeToString(hash[:]) - finalDir := writer.repo.getDirForLocalMedia(name) - os.MkdirAll(path.Dir(finalDir), 0770) - err := os.Rename(writer.tmpDir, finalDir) - if err != nil { - log.Println("Failed to move temp directory:", writer.tmpDir, finalDir, err) - return "", err - } - writer.finished = true - writer.Close() - return name, nil -} - -func getTempWriter(repo Repository, descr Description) (io.WriteCloser, string, error) { - tmpDir, err := repo.makeTempDir() - if err != nil { - log.Printf("Failed to create temp dir: %v\n", err) - return nil, "", err - } - - repoPaths := getPathsForMedia(tmpDir) - - if err = ioutil.WriteFile(repoPaths.typePath, []byte(descr.Type), 0660); err != nil { - log.Printf("Failed to create typeFile: %q\n", err) - return nil, "", err - } - - tmpFile, err := NewLimitedFileWriter(repoPaths.contentPath, repo.MaxBytes) - if err != nil { - log.Printf("Failed to create limited file: %v\n", err) - return nil, "", err - } - - return tmpFile, tmpDir, nil -} diff --git a/src/github.com/matrix-org/dendrite/mediaapi/storage/storage.go b/src/github.com/matrix-org/dendrite/mediaapi/storage/storage.go index 72dc0a62f..121a06354 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/storage/storage.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/storage/storage.go @@ -19,6 +19,7 @@ import ( // Import the postgres database driver. _ "github.com/lib/pq" + "github.com/matrix-org/dendrite/mediaapi/types" ) // A Database is used to store room events and stream offsets. @@ -40,12 +41,19 @@ func Open(dataSourceName string) (*Database, error) { return &d, nil } -// CreateMedia inserts the metadata about the uploaded media into the database. -func (d *Database) CreateMedia(mediaID string, mediaOrigin string, contentType string, contentDisposition string, fileSize int64, uploadName string, userID string) error { - return d.statements.insertMedia(mediaID, mediaOrigin, contentType, contentDisposition, fileSize, uploadName, userID) +// StoreMediaMetadata inserts the metadata about the uploaded media into the database. +func (d *Database) StoreMediaMetadata(mediaMetadata *types.MediaMetadata) error { + return d.statements.insertMedia(mediaMetadata) } -// GetMedia possibly selects the metadata about previously uploaded media from the database. -func (d *Database) GetMedia(mediaID string, mediaOrigin string) (string, string, int64, string, error) { - return d.statements.selectMedia(mediaID, mediaOrigin) +// GetMediaMetadata possibly selects the metadata about previously uploaded media from the database. +func (d *Database) GetMediaMetadata(mediaID types.MediaID, mediaOrigin types.ServerName, mediaMetadata *types.MediaMetadata) error { + metadata, err := d.statements.selectMedia(mediaID, mediaOrigin) + mediaMetadata.ContentType = metadata.ContentType + mediaMetadata.ContentDisposition = metadata.ContentDisposition + mediaMetadata.ContentLength = metadata.ContentLength + mediaMetadata.CreationTimestamp = metadata.CreationTimestamp + mediaMetadata.UploadName = metadata.UploadName + mediaMetadata.UserID = metadata.UserID + return err } diff --git a/src/github.com/matrix-org/dendrite/mediaapi/types/types.go b/src/github.com/matrix-org/dendrite/mediaapi/types/types.go new file mode 100644 index 000000000..e1e1a3a44 --- /dev/null +++ b/src/github.com/matrix-org/dendrite/mediaapi/types/types.go @@ -0,0 +1,57 @@ +// Copyright 2017 Vector Creations Ltd +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package types + +// ContentDisposition is an HTTP Content-Disposition header string +type ContentDisposition string + +// ContentLength is an HTTP Content-Length header which is a number of bytes to be expected in a request body +type ContentLength int64 + +// ContentType is an HTTP Content-Type header string representing the MIME type of a request body +type ContentType string + +// Filename is a string representing the name of a file +type Filename string + +// Path is an absolute or relative UNIX filesystem path +type Path string + +// MediaID is a string representing the unique identifier for a file (could be a hash but does not have to be) +type MediaID string + +// ServerName is the host of a matrix homeserver, e.g. matrix.org +type ServerName string + +// RequestMethod is an HTTP request method i.e. GET, POST, etc +type RequestMethod string + +// MatrixUserID is a Matrix user ID string in the form @user:domain e.g. @alice:matrix.org +type MatrixUserID string + +// UnixMs is the milliseconds since the Unix epoch +type UnixMs int64 + +// MediaMetadata is metadata associated with a media file +type MediaMetadata struct { + MediaID MediaID + Origin ServerName + ContentType ContentType + ContentDisposition ContentDisposition + ContentLength ContentLength + CreationTimestamp UnixMs + UploadName Filename + UserID MatrixUserID +} diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index 730746603..ec270e914 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -15,11 +15,14 @@ package writers import ( + "database/sql" "encoding/json" "fmt" "io" "net" "net/http" + "os" + "path" "strconv" "strings" @@ -27,30 +30,30 @@ import ( "github.com/matrix-org/dendrite/clientapi/jsonerror" "github.com/matrix-org/dendrite/mediaapi/config" "github.com/matrix-org/dendrite/mediaapi/storage" + "github.com/matrix-org/dendrite/mediaapi/types" "github.com/matrix-org/util" ) -// DownloadRequest metadata included in or derivable from an upload request +// downloadRequest metadata included in or derivable from an download request // https://matrix.org/docs/spec/client_server/r0.2.0.html#post-matrix-media-r0-download -type DownloadRequest struct { - MediaID string - ServerName string +type downloadRequest struct { + MediaMetadata *types.MediaMetadata } -// Validate validates the DownloadRequest fields -func (r DownloadRequest) Validate() *util.JSONResponse { +// Validate validates the downloadRequest fields +func (r downloadRequest) Validate() *util.JSONResponse { // FIXME: the following errors aren't bad JSON, rather just a bad request path // maybe give the URL pattern in the routing, these are not even possible as the handler would not be hit...? - if r.MediaID == "" { + if r.MediaMetadata.MediaID == "" { return &util.JSONResponse{ - Code: 400, - JSON: jsonerror.BadJSON("mediaId must be a non-empty string"), + Code: 404, + JSON: jsonerror.NotFound("mediaId must be a non-empty string"), } } - if r.ServerName == "" { + if r.MediaMetadata.Origin == "" { return &util.JSONResponse{ - Code: 400, - JSON: jsonerror.BadJSON("serverName must be a non-empty string"), + Code: 404, + JSON: jsonerror.NotFound("serverName must be a non-empty string"), } } return nil @@ -72,10 +75,21 @@ func jsonErrorResponse(w http.ResponseWriter, res util.JSONResponse, logger *log w.Write(resBytes) } +var errFileIsTooLarge = fmt.Errorf("file is too large") +var errRead = fmt.Errorf("failed to read response from remote server") +var errResponse = fmt.Errorf("failed to write file data to response body") +var errWrite = fmt.Errorf("failed to write file to disk") + // Download implements /download -func Download(w http.ResponseWriter, req *http.Request, serverName string, mediaID string, cfg config.MediaAPI, db *storage.Database, downloadServer DownloadServer) { +// Files from this server (i.e. origin == cfg.ServerName) are served directly +// Files from remote servers (i.e. origin != cfg.ServerName) are cached locally. +// If they are present in the cache, they are served directly. +// If they are not present in the cache, they are obtained from the remote server and +// simultaneously served back to the client and written into the cache. +func Download(w http.ResponseWriter, req *http.Request, origin types.ServerName, mediaID types.MediaID, cfg config.MediaAPI, db *storage.Database) { logger := util.GetLogger(req.Context()) + // request validation if req.Method != "GET" { jsonErrorResponse(w, util.JSONResponse{ Code: 405, @@ -84,9 +98,11 @@ func Download(w http.ResponseWriter, req *http.Request, serverName string, media return } - r := &DownloadRequest{ - MediaID: mediaID, - ServerName: serverName, + r := &downloadRequest{ + MediaMetadata: &types.MediaMetadata{ + MediaID: mediaID, + Origin: origin, + }, } if resErr := r.Validate(); resErr != nil { @@ -94,190 +110,285 @@ func Download(w http.ResponseWriter, req *http.Request, serverName string, media return } - contentType, contentDisposition, fileSize, filename, err := db.GetMedia(r.MediaID, r.ServerName) - if err != nil && strings.Compare(r.ServerName, cfg.ServerName) == 0 { + // check if we have a record of the media in our database + err := db.GetMediaMetadata(r.MediaMetadata.MediaID, r.MediaMetadata.Origin, r.MediaMetadata) + + if err == nil { + // If we have a record, we can respond from the local file + logger.WithFields(log.Fields{ + "MediaID": r.MediaMetadata.MediaID, + "Origin": r.MediaMetadata.Origin, + "UploadName": r.MediaMetadata.UploadName, + "Content-Length": r.MediaMetadata.ContentLength, + "Content-Type": r.MediaMetadata.ContentType, + "Content-Disposition": r.MediaMetadata.ContentDisposition, + }).Infof("Downloading file") + + filePath := getPathFromMediaMetadata(r.MediaMetadata, cfg.BasePath) + file, err := os.Open(filePath) + if err != nil { + // FIXME: Remove erroneous file from database? + jsonErrorResponse(w, util.JSONResponse{ + Code: 404, + JSON: jsonerror.NotFound(fmt.Sprintf("File with media ID %q does not exist", r.MediaMetadata.MediaID)), + }, logger) + return + } + + stat, err := file.Stat() + if err != nil { + // FIXME: Remove erroneous file from database? + jsonErrorResponse(w, util.JSONResponse{ + Code: 404, + JSON: jsonerror.NotFound(fmt.Sprintf("File with media ID %q does not exist", r.MediaMetadata.MediaID)), + }, logger) + return + } + + if r.MediaMetadata.ContentLength > 0 && int64(r.MediaMetadata.ContentLength) != stat.Size() { + logger.Warnf("File size in database (%v) and on disk (%v) differ.", r.MediaMetadata.ContentLength, stat.Size()) + // FIXME: Remove erroneous file from database? + } + + w.Header().Set("Content-Type", string(r.MediaMetadata.ContentType)) + w.Header().Set("Content-Length", strconv.FormatInt(stat.Size(), 10)) + contentSecurityPolicy := "default-src 'none';" + + " script-src 'none';" + + " plugin-types application/pdf;" + + " style-src 'unsafe-inline';" + + " object-src 'self';" + w.Header().Set("Content-Security-Policy", contentSecurityPolicy) + + if bytesResponded, err := io.Copy(w, file); err != nil { + logger.Warnf("Failed to copy from cache %v\n", err) + if bytesResponded == 0 { + jsonErrorResponse(w, util.JSONResponse{ + Code: 500, + JSON: jsonerror.NotFound(fmt.Sprintf("Failed to respond with file with media ID %q", r.MediaMetadata.MediaID)), + }, logger) + } + // If we have written any data then we have already responded with 200 OK and all we can do is close the connection + return + } + } else if err == sql.ErrNoRows && r.MediaMetadata.Origin != cfg.ServerName { + // If we do not have a record and the origin is remote, we need to fetch it and respond with that file + logger.WithFields(log.Fields{ + "MediaID": r.MediaMetadata.MediaID, + "Origin": r.MediaMetadata.Origin, + "UploadName": r.MediaMetadata.UploadName, + "Content-Length": r.MediaMetadata.ContentLength, + "Content-Type": r.MediaMetadata.ContentType, + "Content-Disposition": r.MediaMetadata.ContentDisposition, + }).Infof("Fetching remote file") + + // TODO: lock request in hash set + + // FIXME: Only request once (would race if multiple requests for the same remote file) + // Use a hash set based on the origin and media ID (the request URL should be fine...) and synchronise adding / removing members + urls := getMatrixUrls(r.MediaMetadata.Origin) + + logger.Printf("Connecting to remote %q\n", urls[0]) + + remoteReqAddr := urls[0] + "/_matrix/media/v1/download/" + string(r.MediaMetadata.Origin) + "/" + string(r.MediaMetadata.MediaID) + remoteReq, err := http.NewRequest("GET", remoteReqAddr, nil) + if err != nil { + jsonErrorResponse(w, util.JSONResponse{ + Code: 500, + JSON: jsonerror.Unknown(fmt.Sprintf("File with media ID %q could not be downloaded from %q", r.MediaMetadata.MediaID, r.MediaMetadata.Origin)), + }, logger) + return + } + + remoteReq.Header.Set("Host", string(r.MediaMetadata.Origin)) + + client := http.Client{} + resp, err := client.Do(remoteReq) + if err != nil { + jsonErrorResponse(w, util.JSONResponse{ + Code: 502, + JSON: jsonerror.Unknown(fmt.Sprintf("File with media ID %q could not be downloaded from %q", r.MediaMetadata.MediaID, r.MediaMetadata.Origin)), + }, logger) + return + } + defer resp.Body.Close() + + if resp.StatusCode != 200 { + logger.Printf("Server responded with %d\n", resp.StatusCode) + if resp.StatusCode == 404 { + jsonErrorResponse(w, util.JSONResponse{ + Code: 404, + JSON: jsonerror.NotFound(fmt.Sprintf("File with media ID %q does not exist", r.MediaMetadata.MediaID)), + }, logger) + return + } + jsonErrorResponse(w, util.JSONResponse{ + Code: 502, + JSON: jsonerror.Unknown(fmt.Sprintf("File with media ID %q could not be downloaded from %q", r.MediaMetadata.MediaID, r.MediaMetadata.Origin)), + }, logger) + return + } + + contentLength, err := strconv.ParseInt(resp.Header.Get("Content-Length"), 10, 64) + if err != nil { + logger.Warn("Failed to parse content length") + } + r.MediaMetadata.ContentLength = types.ContentLength(contentLength) + + w.Header().Set("Content-Type", string(r.MediaMetadata.ContentType)) + w.Header().Set("Content-Length", strconv.FormatInt(int64(r.MediaMetadata.ContentLength), 10)) + contentSecurityPolicy := "default-src 'none';" + + " script-src 'none';" + + " plugin-types application/pdf;" + + " style-src 'unsafe-inline';" + + " object-src 'self';" + w.Header().Set("Content-Security-Policy", contentSecurityPolicy) + + tmpDir, err := createTempDir(cfg.BasePath) + if err != nil { + logger.Infof("Failed to create temp dir %q\n", err) + jsonErrorResponse(w, util.JSONResponse{ + Code: 400, + JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload: %q", err)), + }, logger) + return + } + tmpFile, writer, err := createFileWriter(tmpDir, types.Filename(r.MediaMetadata.MediaID[3:])) + if err != nil { + logger.Infof("Failed to create file writer %q\n", err) + jsonErrorResponse(w, util.JSONResponse{ + Code: 400, + JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload: %q", err)), + }, logger) + return + } + defer tmpFile.Close() + + // bytesResponded is the total number of bytes written to the response to the client request + // bytesWritten is the total number of bytes written to disk + var bytesResponded, bytesWritten int64 = 0, 0 + var fetchError error + // Note: the buffer size is the same as is used in io.Copy() + buffer := make([]byte, 32*1024) + for { + // read from remote request's response body + bytesRead, readErr := resp.Body.Read(buffer) + if bytesRead > 0 { + // write to client request's response body + bytesTemp, respErr := w.Write(buffer[:bytesRead]) + if bytesTemp != bytesRead || (respErr != nil && respErr != io.EOF) { + // TODO: BORKEN + logger.Errorf("bytesTemp %v != bytesRead %v : %v", bytesTemp, bytesRead, respErr) + fetchError = errResponse + break + } + bytesResponded += int64(bytesTemp) + if fetchError == nil || (fetchError != errFileIsTooLarge && fetchError != errWrite) { + // if larger than cfg.MaxFileSize then stop writing to disk and discard cached file + if bytesWritten+int64(len(buffer)) > int64(cfg.MaxFileSize) { + // TODO: WAAAAHNING and clean up temp files + fetchError = errFileIsTooLarge + } else { + // write to disk + bytesTemp, writeErr := writer.Write(buffer) + if writeErr != nil && writeErr != io.EOF { + // TODO: WAAAAHNING and clean up temp files + fetchError = errWrite + } else { + bytesWritten += int64(bytesTemp) + } + } + } + } + if readErr != nil { + if readErr != io.EOF { + fetchError = errRead + break + } + } + } + + writer.Flush() + + if fetchError != nil { + logFields := log.Fields{ + "MediaID": r.MediaMetadata.MediaID, + "Origin": r.MediaMetadata.Origin, + } + if fetchError == errFileIsTooLarge { + logFields["MaxFileSize"] = cfg.MaxFileSize + } + logger.WithFields(logFields).Warnln(fetchError) + tmpDirErr := os.RemoveAll(string(tmpDir)) + if tmpDirErr != nil { + logger.Warnf("Failed to remove tmpDir (%v): %q\n", tmpDir, tmpDirErr) + } + // Note: if we have responded with any data in the body at all then we have already sent 200 OK and we can only abort at this point + if bytesResponded < 1 { + jsonErrorResponse(w, util.JSONResponse{ + Code: 502, + JSON: jsonerror.Unknown(fmt.Sprintf("File with media ID %q could not be downloaded from %q", r.MediaMetadata.MediaID, r.MediaMetadata.Origin)), + }, logger) + } else { + // We attempt to bluntly close the connection because that is the + // best thing we can do after we've sent a 200 OK + logger.Println("Attempting to close the connection.") + hijacker, ok := w.(http.Hijacker) + if ok { + connection, _, hijackErr := hijacker.Hijack() + if hijackErr == nil { + logger.Println("Closing") + connection.Close() + } else { + logger.Printf("Error trying to hijack: %v", hijackErr) + } + } + } + return + } + + // Note: After this point we have responded to the client's request and are just dealing with local caching. + // As we have responded with 200 OK, any errors are ineffectual to the client request and so we just log and return. + + // if written to disk, add to db + err = db.StoreMediaMetadata(r.MediaMetadata) + if err != nil { + tmpDirErr := os.RemoveAll(string(tmpDir)) + if tmpDirErr != nil { + logger.Warnf("Failed to remove tmpDir (%v): %q\n", tmpDir, tmpDirErr) + } + return + } + + // TODO: unlock request in hash set + + // TODO: generate thumbnails + + err = moveFile( + types.Path(path.Join(string(tmpDir), "content")), + types.Path(getPathFromMediaMetadata(r.MediaMetadata, cfg.BasePath)), + ) + if err != nil { + tmpDirErr := os.RemoveAll(string(tmpDir)) + if tmpDirErr != nil { + logger.Warnf("Failed to remove tmpDir (%v): %q\n", tmpDir, tmpDirErr) + } + return + } + } else { + // TODO: If we do not have a record and the origin is local, or if we have another error from the database, the file is not found jsonErrorResponse(w, util.JSONResponse{ Code: 404, - JSON: jsonerror.NotFound(fmt.Sprintf("File %q does not exist", r.MediaID)), + JSON: jsonerror.NotFound(fmt.Sprintf("File with media ID %q does not exist", r.MediaMetadata.MediaID)), }, logger) - return } - - // - read file and respond - logger.WithFields(log.Fields{ - "MediaID": r.MediaID, - "ServerName": r.ServerName, - "Filename": filename, - "Content-Type": contentType, - "Content-Disposition": contentDisposition, - }).Infof("Downloading file") - - logger.WithField("code", 200).Infof("Responding (%d bytes)", fileSize) - - respWriter := httpResponseWriter{resp: w} - if err = downloadServer.getImage(respWriter, r.ServerName, r.MediaID); err != nil { - if respWriter.haveWritten() { - closeConnection(w) - return - } - - errStatus := 500 - switch err { - case errNotFound: - errStatus = 404 - case errProxy: - errStatus = 502 - } - http.Error(w, err.Error(), errStatus) - return - } - - return -} - -// DownloadServer serves and caches remote media. -type DownloadServer struct { - Client http.Client - Repository storage.Repository - LocalServerName string -} - -func (handler *DownloadServer) getImage(w responseWriter, host, name string) error { - var file io.ReadCloser - var descr *storage.Description - var err error - if host == handler.LocalServerName { - file, descr, err = handler.Repository.ReaderFromLocalRepo(name) - } else { - file, descr, err = handler.Repository.ReaderFromRemoteCache(host, name) - } - - if err == nil { - log.Println("Found in Cache") - w.setContentType(descr.Type) - - size := strconv.FormatInt(descr.Length, 10) - w.setContentLength(size) - w.setContentSecurityPolicy() - if _, err = io.Copy(w, file); err != nil { - log.Printf("Failed to copy from cache %v\n", err) - return err - } - w.Close() - return nil - } else if !storage.IsNotExists(err) { - log.Printf("Error looking in cache: %v\n", err) - return err - } - - if host == handler.LocalServerName { - // Its fatal if we can't find local files in our cache. - return errNotFound - } - - respBody, desc, err := handler.fetchRemoteMedia(host, name) - if err != nil { - return err - } - - defer respBody.Close() - - w.setContentType(desc.Type) - if desc.Length > 0 { - w.setContentLength(strconv.FormatInt(desc.Length, 10)) - } - - writer, err := handler.Repository.WriterToRemoteCache(host, name, *desc) - if err != nil { - log.Printf("Failed to get cache writer %q\n", err) - return err - } - - defer writer.Close() - - reader := io.TeeReader(respBody, w) - if _, err := io.Copy(writer, reader); err != nil { - log.Printf("Failed to copy %q\n", err) - return err - } - - writer.Finished() - - log.Println("Finished conn") - - return nil -} - -func (handler *DownloadServer) fetchRemoteMedia(host, name string) (io.ReadCloser, *storage.Description, error) { - urls := getMatrixUrls(host) - - log.Printf("Connecting to remote %q\n", urls[0]) - - remoteReq, err := http.NewRequest("GET", urls[0]+"/_matrix/media/v1/download/"+host+"/"+name, nil) - if err != nil { - log.Printf("Failed to connect to remote: %q\n", err) - return nil, nil, err - } - - remoteReq.Header.Set("Host", host) - - resp, err := handler.Client.Do(remoteReq) - if err != nil { - log.Printf("Failed to connect to remote: %q\n", err) - return nil, nil, errProxy - } - - if resp.StatusCode != 200 { - resp.Body.Close() - log.Printf("Server responded with %d\n", resp.StatusCode) - if resp.StatusCode == 404 { - return nil, nil, errNotFound - } - return nil, nil, errProxy - } - - desc := storage.Description{ - Type: resp.Header.Get("Content-Type"), - Length: -1, - } - - length, err := strconv.ParseInt(resp.Header.Get("Content-Length"), 10, 64) - if err == nil { - desc.Length = length - } - - return resp.Body, &desc, nil -} - -// Given a http.ResponseWriter, attempt to force close the connection. -// -// This is useful if you get a fatal error after sending the initial 200 OK -// response. -func closeConnection(w http.ResponseWriter) { - log.Println("Attempting to close connection") - - // We attempt to bluntly close the connection because that is the - // best thing we can do after we've sent a 200 OK - hijack, ok := w.(http.Hijacker) - if ok { - conn, _, err := hijack.Hijack() - if err != nil { - fmt.Printf("Err trying to hijack: %v", err) - return - } - log.Println("Closing") - conn.Close() - return - } - log.Println("Not hijacker") } // Given a matrix server name, attempt to discover URLs to contact the server // on. -func getMatrixUrls(host string) []string { - _, srvs, err := net.LookupSRV("matrix", "tcp", host) +func getMatrixUrls(serverName types.ServerName) []string { + _, srvs, err := net.LookupSRV("matrix", "tcp", string(serverName)) if err != nil { - return []string{"https://" + host + ":8448"} + return []string{"https://" + string(serverName) + ":8448"} } results := make([]string, 0, len(srvs)) @@ -294,65 +405,3 @@ func getMatrixUrls(host string) []string { return results } - -// Given a path of the form '//' extract the host and name. -func getMediaIDFromPath(path string) (host, name string, err error) { - parts := strings.Split(path, "/") - if len(parts) != 3 { - err = fmt.Errorf("Invalid path %q", path) - return - } - - host, name = parts[1], parts[2] - - if host == "" || name == "" { - err = fmt.Errorf("Invalid path %q", path) - return - } - - return -} - -type responseWriter interface { - io.WriteCloser - setContentLength(string) - setContentSecurityPolicy() - setContentType(string) - haveWritten() bool -} - -type httpResponseWriter struct { - resp http.ResponseWriter - written bool -} - -func (writer httpResponseWriter) haveWritten() bool { - return writer.written -} - -func (writer httpResponseWriter) Write(p []byte) (n int, err error) { - writer.written = true - return writer.resp.Write(p) -} - -func (writer httpResponseWriter) Close() error { return nil } - -func (writer httpResponseWriter) setContentType(contentType string) { - writer.resp.Header().Set("Content-Type", contentType) -} - -func (writer httpResponseWriter) setContentLength(length string) { - writer.resp.Header().Set("Content-Length", length) -} - -func (writer httpResponseWriter) setContentSecurityPolicy() { - contentSecurityPolicy := "default-src 'none';" + - " script-src 'none';" + - " plugin-types application/pdf;" + - " style-src 'unsafe-inline';" + - " object-src 'self';" - writer.resp.Header().Set("Content-Security-Policy", contentSecurityPolicy) -} - -var errProxy = fmt.Errorf("Failed to contact remote") -var errNotFound = fmt.Errorf("Image not found") diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go index f68168840..7b6b4876d 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go @@ -15,9 +15,14 @@ package writers import ( + "crypto/sha256" + "database/sql" + "encoding/base64" "fmt" "io" "net/http" + "os" + "path" "strings" log "github.com/Sirupsen/logrus" @@ -25,58 +30,54 @@ import ( "github.com/matrix-org/dendrite/clientapi/jsonerror" "github.com/matrix-org/dendrite/mediaapi/config" "github.com/matrix-org/dendrite/mediaapi/storage" + "github.com/matrix-org/dendrite/mediaapi/types" "github.com/matrix-org/util" ) -// UploadRequest metadata included in or derivable from an upload request +// uploadRequest metadata included in or derivable from an upload request // https://matrix.org/docs/spec/client_server/r0.2.0.html#post-matrix-media-r0-upload -// NOTE: ContentType is an HTTP request header and Filename is passed as a query parameter -type UploadRequest struct { - ContentDisposition string - ContentLength int64 - ContentType string - Filename string - Base64FileHash string - Method string - UserID string +// NOTE: The members come from HTTP request metadata such as headers, query parameters or can be derived from such +type uploadRequest struct { + MediaMetadata *types.MediaMetadata } -// Validate validates the UploadRequest fields -func (r UploadRequest) Validate() *util.JSONResponse { +// Validate validates the uploadRequest fields +func (r uploadRequest) Validate(maxFileSize types.ContentLength) *util.JSONResponse { // TODO: Any validation to be done on ContentDisposition? - if r.ContentLength < 1 { + + if r.MediaMetadata.ContentLength < 1 { return &util.JSONResponse{ Code: 400, - JSON: jsonerror.BadJSON("HTTP Content-Length request header must be greater than zero."), + JSON: jsonerror.Unknown("HTTP Content-Length request header must be greater than zero."), + } + } + if maxFileSize > 0 && r.MediaMetadata.ContentLength > maxFileSize { + return &util.JSONResponse{ + Code: 400, + JSON: jsonerror.Unknown(fmt.Sprintf("HTTP Content-Length is greater than the maximum allowed upload size (%v).", maxFileSize)), } } // TODO: Check if the Content-Type is a valid type? - if r.ContentType == "" { + if r.MediaMetadata.ContentType == "" { return &util.JSONResponse{ Code: 400, - JSON: jsonerror.BadJSON("HTTP Content-Type request header must be set."), + JSON: jsonerror.Unknown("HTTP Content-Type request header must be set."), } } // TODO: Validate filename - what are the valid characters? - if r.Method != "POST" { - return &util.JSONResponse{ - Code: 400, - JSON: jsonerror.BadJSON("HTTP request method must be POST."), - } - } - if r.UserID != "" { + if r.MediaMetadata.UserID != "" { // TODO: We should put user ID parsing code into gomatrixserverlib and use that instead // (see https://github.com/matrix-org/gomatrixserverlib/blob/3394e7c7003312043208aa73727d2256eea3d1f6/eventcontent.go#L347 ) // It should be a struct (with pointers into a single string to avoid copying) and // we should update all refs to use UserID types rather than strings. // https://github.com/matrix-org/synapse/blob/v0.19.2/synapse/types.py#L92 - if len(r.UserID) == 0 || r.UserID[0] != '@' { + if len(r.MediaMetadata.UserID) == 0 || r.MediaMetadata.UserID[0] != '@' { return &util.JSONResponse{ Code: 400, - JSON: jsonerror.BadJSON("user id must start with '@'"), + JSON: jsonerror.Unknown("user id must start with '@'"), } } - parts := strings.SplitN(r.UserID[1:], ":", 2) + parts := strings.SplitN(string(r.MediaMetadata.UserID[1:]), ":", 2) if len(parts) != 2 { return &util.JSONResponse{ Code: 400, @@ -93,9 +94,21 @@ type uploadResponse struct { } // Upload implements /upload -func Upload(req *http.Request, cfg config.MediaAPI, db *storage.Database, repo *storage.Repository) util.JSONResponse { +// +// This endpoint involves uploading potentially significant amounts of data to the homeserver. +// This implementation supports a configurable maximum file size limit in bytes. If a user tries to upload more than this, they will receive an error that their upload is too large. +// Uploaded files are processed piece-wise to avoid DoS attacks which would starve the server of memory. +// TODO: Requests time out if they have not received any data within the configured timeout period. +func Upload(req *http.Request, cfg config.MediaAPI, db *storage.Database) util.JSONResponse { logger := util.GetLogger(req.Context()) + if req.Method != "POST" { + return util.JSONResponse{ + Code: 400, + JSON: jsonerror.Unknown("HTTP request method must be POST."), + } + } + // FIXME: This will require querying some other component/db but currently // just accepts a user id for auth userID, resErr := auth.VerifyAccessToken(req) @@ -103,74 +116,129 @@ func Upload(req *http.Request, cfg config.MediaAPI, db *storage.Database, repo * return *resErr } - r := &UploadRequest{ - ContentDisposition: req.Header.Get("Content-Disposition"), - ContentLength: req.ContentLength, - ContentType: req.Header.Get("Content-Type"), - Filename: req.FormValue("filename"), - Method: req.Method, - UserID: userID, + r := &uploadRequest{ + MediaMetadata: &types.MediaMetadata{ + Origin: cfg.ServerName, + ContentDisposition: types.ContentDisposition(req.Header.Get("Content-Disposition")), + ContentLength: types.ContentLength(req.ContentLength), + ContentType: types.ContentType(req.Header.Get("Content-Type")), + UploadName: types.Filename(req.FormValue("filename")), + UserID: types.MatrixUserID(userID), + }, } - if resErr = r.Validate(); resErr != nil { + // FIXME: if no Content-Disposition then set + + if resErr = r.Validate(cfg.MaxFileSize); resErr != nil { return *resErr } logger.WithFields(log.Fields{ - "ContentType": r.ContentType, - "Filename": r.Filename, - "UserID": r.UserID, + "Origin": r.MediaMetadata.Origin, + "UploadName": r.MediaMetadata.UploadName, + "Content-Length": r.MediaMetadata.ContentLength, + "Content-Type": r.MediaMetadata.ContentType, + "Content-Disposition": r.MediaMetadata.ContentDisposition, }).Info("Uploading file") - // TODO: Store file to disk - // - make path to file - // - progressive writing (could support Content-Length 0 and cut off - // after some max upload size is exceeded) - // - generate id (ideally a hash but a random string to start with) - writer, err := repo.WriterToLocalRepository(storage.Description{ - Type: r.ContentType, - }) + tmpDir, err := createTempDir(cfg.BasePath) if err != nil { - logger.Infof("Failed to get cache writer %q\n", err) + logger.Infof("Failed to create temp dir %q\n", err) return util.JSONResponse{ Code: 400, - JSON: jsonerror.BadJSON(fmt.Sprintf("Failed to upload: %q", err)), + JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload: %q", err)), } } + file, writer, err := createFileWriter(tmpDir, "content") + defer file.Close() - defer writer.Close() + // The limited reader restricts how many bytes are read from the body to the specified maximum bytes + // Note: the golang HTTP server closes the request body + limitedBody := io.LimitReader(req.Body, int64(cfg.MaxFileSize)) + hasher := sha256.New() + reader := io.TeeReader(limitedBody, hasher) - if _, err = io.Copy(writer, req.Body); err != nil { + bytesWritten, err := io.Copy(writer, reader) + if err != nil { logger.Infof("Failed to copy %q\n", err) + tmpDirErr := os.RemoveAll(string(tmpDir)) + if tmpDirErr != nil { + logger.Warnf("Failed to remove tmpDir (%v): %q\n", tmpDir, tmpDirErr) + } return util.JSONResponse{ Code: 400, - JSON: jsonerror.BadJSON(fmt.Sprintf("Failed to upload: %q", err)), + JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload: %q", err)), } } - r.Base64FileHash, err = writer.Finished() - if err != nil { - return util.JSONResponse{ - Code: 400, - JSON: jsonerror.BadJSON(fmt.Sprintf("Failed to upload: %q", err)), - } + writer.Flush() + + if bytesWritten != int64(r.MediaMetadata.ContentLength) { + logger.Warnf("Bytes uploaded (%v) != claimed Content-Length (%v)", bytesWritten, r.MediaMetadata.ContentLength) + } + + hash := hasher.Sum(nil) + r.MediaMetadata.MediaID = types.MediaID(base64.URLEncoding.EncodeToString(hash[:])) + + logger.WithFields(log.Fields{ + "MediaID": r.MediaMetadata.MediaID, + "Origin": r.MediaMetadata.Origin, + "UploadName": r.MediaMetadata.UploadName, + "Content-Length": r.MediaMetadata.ContentLength, + "Content-Type": r.MediaMetadata.ContentType, + "Content-Disposition": r.MediaMetadata.ContentDisposition, + }).Info("File uploaded") + + // check if we already have a record of the media in our database and if so, we can remove the temporary directory + err = db.GetMediaMetadata(r.MediaMetadata.MediaID, r.MediaMetadata.Origin, r.MediaMetadata) + if err == nil { + tmpDirErr := os.RemoveAll(string(tmpDir)) + if tmpDirErr != nil { + logger.Warnf("Failed to remove tmpDir (%v): %q\n", tmpDir, tmpDirErr) + } + return util.JSONResponse{ + Code: 200, + JSON: uploadResponse{ + ContentURI: fmt.Sprintf("mxc://%s/%s", cfg.ServerName, r.MediaMetadata.MediaID), + }, + } + } else if err != nil && err != sql.ErrNoRows { + logger.Warnf("Failed to query database for %v: %q", r.MediaMetadata.MediaID, err) } - // TODO: check if file with hash already exists // TODO: generate thumbnails - err = db.CreateMedia(r.Base64FileHash, cfg.ServerName, r.ContentType, r.ContentDisposition, r.ContentLength, r.Filename, r.UserID) + err = db.StoreMediaMetadata(r.MediaMetadata) if err != nil { + tmpDirErr := os.RemoveAll(string(tmpDir)) + if tmpDirErr != nil { + logger.Warnf("Failed to remove tmpDir (%v): %q\n", tmpDir, tmpDirErr) + } return util.JSONResponse{ Code: 400, - JSON: jsonerror.BadJSON(fmt.Sprintf("Failed to upload: %q", err)), + JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload: %q", err)), + } + } + + err = moveFile( + types.Path(path.Join(string(tmpDir), "content")), + types.Path(getPathFromMediaMetadata(r.MediaMetadata, cfg.BasePath)), + ) + if err != nil { + tmpDirErr := os.RemoveAll(string(tmpDir)) + if tmpDirErr != nil { + logger.Warnf("Failed to remove tmpDir (%v): %q\n", tmpDir, tmpDirErr) + } + return util.JSONResponse{ + Code: 400, + JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload: %q", err)), } } return util.JSONResponse{ Code: 200, JSON: uploadResponse{ - ContentURI: fmt.Sprintf("mxc://%s/%s", cfg.ServerName, r.Base64FileHash), + ContentURI: fmt.Sprintf("mxc://%s/%s", cfg.ServerName, r.MediaMetadata.MediaID), }, } } diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/utils.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/utils.go new file mode 100644 index 000000000..f38717389 --- /dev/null +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/utils.go @@ -0,0 +1,80 @@ +// Copyright 2017 Vector Creations Ltd +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package writers + +import ( + "bufio" + "io/ioutil" + "os" + "path" + + log "github.com/Sirupsen/logrus" + "github.com/matrix-org/dendrite/mediaapi/types" +) + +// createTempDir creates a tmp/ directory within baseDirectory and returns its path +func createTempDir(baseDirectory types.Path) (types.Path, error) { + baseTmpDir := path.Join(string(baseDirectory), "tmp") + err := os.MkdirAll(baseTmpDir, 0770) + if err != nil { + log.Printf("Failed to create base temp dir: %v\n", err) + return "", err + } + tmpDir, err := ioutil.TempDir(baseTmpDir, "") + if err != nil { + log.Printf("Failed to create temp dir: %v\n", err) + return "", err + } + return types.Path(tmpDir), nil +} + +// createFileWriter creates a buffered file writer with a new file at directory/filename +// Returns the file handle as it needs to be closed when writing is complete +func createFileWriter(directory types.Path, filename types.Filename) (*os.File, *bufio.Writer, error) { + filePath := path.Join(string(directory), string(filename)) + file, err := os.Create(filePath) + if err != nil { + log.Printf("Failed to create file: %v\n", err) + return nil, nil, err + } + + return file, bufio.NewWriter(file), nil +} + +func getPathFromMediaMetadata(m *types.MediaMetadata, basePath types.Path) string { + return path.Join( + string(basePath), + string(m.Origin), + string(m.MediaID[:3]), + string(m.MediaID[3:]), + ) +} + +// moveFile attempts to move the file src to dst +func moveFile(src types.Path, dst types.Path) error { + dstDir := path.Dir(string(dst)) + + err := os.MkdirAll(dstDir, 0770) + if err != nil { + log.Printf("Failed to make directory: %v\n", dstDir) + return err + } + err = os.Rename(string(src), string(dst)) + if err != nil { + log.Printf("Failed to move directory: %v to %v\n", src, dst) + return err + } + return nil +} From 911b5dc1704b372e3287cea4494f4f7df50a6c0c Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Tue, 9 May 2017 20:10:27 +0200 Subject: [PATCH 014/108] mediaapi/writers/upload: Generate Content-Disposition if not set --- .../matrix-org/dendrite/mediaapi/writers/upload.go | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go index 7b6b4876d..363722e8a 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go @@ -21,6 +21,7 @@ import ( "fmt" "io" "net/http" + "net/url" "os" "path" "strings" @@ -127,12 +128,16 @@ func Upload(req *http.Request, cfg config.MediaAPI, db *storage.Database) util.J }, } - // FIXME: if no Content-Disposition then set - if resErr = r.Validate(cfg.MaxFileSize); resErr != nil { return *resErr } + if len(r.MediaMetadata.UploadName) > 0 { + r.MediaMetadata.ContentDisposition = types.ContentDisposition( + "inline; filename*=utf-8''" + url.PathEscape(string(r.MediaMetadata.UploadName)), + ) + } + logger.WithFields(log.Fields{ "Origin": r.MediaMetadata.Origin, "UploadName": r.MediaMetadata.UploadName, From 8f9eb13f69a46cec513a0fb0761a82f7d80653ea Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Tue, 9 May 2017 20:10:59 +0200 Subject: [PATCH 015/108] mediaapi/writers/upload: Do not overwrite fields from database If the entry does not exist, this would set all but the origin and id to nil. --- src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go index 363722e8a..4b6fb152c 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go @@ -195,7 +195,7 @@ func Upload(req *http.Request, cfg config.MediaAPI, db *storage.Database) util.J }).Info("File uploaded") // check if we already have a record of the media in our database and if so, we can remove the temporary directory - err = db.GetMediaMetadata(r.MediaMetadata.MediaID, r.MediaMetadata.Origin, r.MediaMetadata) + err = db.GetMediaMetadata(r.MediaMetadata.MediaID, r.MediaMetadata.Origin, &types.MediaMetadata{}) if err == nil { tmpDirErr := os.RemoveAll(string(tmpDir)) if tmpDirErr != nil { From 391a1be69f00d717b4e12e417d5c9e2a591f5747 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Tue, 9 May 2017 20:56:20 +0200 Subject: [PATCH 016/108] mediaapi/writers/download: Fix infinite loop reading response body --- src/github.com/matrix-org/dendrite/mediaapi/writers/download.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index ec270e914..f2beef791 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -302,8 +302,8 @@ func Download(w http.ResponseWriter, req *http.Request, origin types.ServerName, if readErr != nil { if readErr != io.EOF { fetchError = errRead - break } + break } } From 619a77e1871f865f105c3a3e9468a41a468d2709 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Tue, 9 May 2017 20:57:34 +0200 Subject: [PATCH 017/108] mediaapi/writers/download: Improve logging --- .../dendrite/mediaapi/writers/download.go | 36 +++++++++++++++---- 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index f2beef791..d0bd22927 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -173,12 +173,8 @@ func Download(w http.ResponseWriter, req *http.Request, origin types.ServerName, } else if err == sql.ErrNoRows && r.MediaMetadata.Origin != cfg.ServerName { // If we do not have a record and the origin is remote, we need to fetch it and respond with that file logger.WithFields(log.Fields{ - "MediaID": r.MediaMetadata.MediaID, - "Origin": r.MediaMetadata.Origin, - "UploadName": r.MediaMetadata.UploadName, - "Content-Length": r.MediaMetadata.ContentLength, - "Content-Type": r.MediaMetadata.ContentType, - "Content-Disposition": r.MediaMetadata.ContentDisposition, + "MediaID": r.MediaMetadata.MediaID, + "Origin": r.MediaMetadata.Origin, }).Infof("Fetching remote file") // TODO: lock request in hash set @@ -234,6 +230,11 @@ func Download(w http.ResponseWriter, req *http.Request, origin types.ServerName, } r.MediaMetadata.ContentLength = types.ContentLength(contentLength) + logger.WithFields(log.Fields{ + "MediaID": r.MediaMetadata.MediaID, + "Origin": r.MediaMetadata.Origin, + }).Infof("Connected to remote") + w.Header().Set("Content-Type", string(r.MediaMetadata.ContentType)) w.Header().Set("Content-Length", strconv.FormatInt(int64(r.MediaMetadata.ContentLength), 10)) contentSecurityPolicy := "default-src 'none';" + @@ -263,6 +264,11 @@ func Download(w http.ResponseWriter, req *http.Request, origin types.ServerName, } defer tmpFile.Close() + logger.WithFields(log.Fields{ + "MediaID": r.MediaMetadata.MediaID, + "Origin": r.MediaMetadata.Origin, + }).Infof("Proxying and caching remote file") + // bytesResponded is the total number of bytes written to the response to the client request // bytesWritten is the total number of bytes written to disk var bytesResponded, bytesWritten int64 = 0, 0 @@ -349,6 +355,15 @@ func Download(w http.ResponseWriter, req *http.Request, origin types.ServerName, // Note: After this point we have responded to the client's request and are just dealing with local caching. // As we have responded with 200 OK, any errors are ineffectual to the client request and so we just log and return. + logger.WithFields(log.Fields{ + "MediaID": r.MediaMetadata.MediaID, + "Origin": r.MediaMetadata.Origin, + "UploadName": r.MediaMetadata.UploadName, + "Content-Length": r.MediaMetadata.ContentLength, + "Content-Type": r.MediaMetadata.ContentType, + "Content-Disposition": r.MediaMetadata.ContentDisposition, + }).Infof("Storing file metadata to media repository database") + // if written to disk, add to db err = db.StoreMediaMetadata(r.MediaMetadata) if err != nil { @@ -374,6 +389,15 @@ func Download(w http.ResponseWriter, req *http.Request, origin types.ServerName, } return } + + logger.WithFields(log.Fields{ + "MediaID": r.MediaMetadata.MediaID, + "Origin": r.MediaMetadata.Origin, + "UploadName": r.MediaMetadata.UploadName, + "Content-Length": r.MediaMetadata.ContentLength, + "Content-Type": r.MediaMetadata.ContentType, + "Content-Disposition": r.MediaMetadata.ContentDisposition, + }).Infof("Remote file cached") } else { // TODO: If we do not have a record and the origin is local, or if we have another error from the database, the file is not found jsonErrorResponse(w, util.JSONResponse{ From 5bb5a28366e02d3664b1bbd53a649268a0bfdc95 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Tue, 9 May 2017 20:57:59 +0200 Subject: [PATCH 018/108] cmd/dendrite-media-api-server: Bump maximum file size to 10MB --- .../matrix-org/dendrite/cmd/dendrite-media-api-server/main.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go b/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go index aa6ffef81..3f7994f4c 100644 --- a/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go +++ b/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go @@ -42,7 +42,7 @@ func main() { cfg := config.MediaAPI{ ServerName: "localhost", BasePath: "/Users/robertsw/dendrite", - MaxFileSize: 61440, + MaxFileSize: 10 * 1024 * 1024, DataSource: dataSource, } From c7c3a36e1bdd59663e3e6db066c60c8e0bd3177e Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Tue, 9 May 2017 20:58:44 +0200 Subject: [PATCH 019/108] mediaapi/writers/download: Set more metadata fields for remote files --- .../matrix-org/dendrite/mediaapi/writers/download.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index d0bd22927..bedcbf381 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -230,6 +230,11 @@ func Download(w http.ResponseWriter, req *http.Request, origin types.ServerName, } r.MediaMetadata.ContentLength = types.ContentLength(contentLength) + r.MediaMetadata.ContentType = types.ContentType(resp.Header.Get("Content-Type")) + r.MediaMetadata.ContentDisposition = types.ContentDisposition(resp.Header.Get("Content-Disposition")) + // FIXME: parse from Content-Disposition header if possible, else fall back + //r.MediaMetadata.UploadName = types.Filename() + logger.WithFields(log.Fields{ "MediaID": r.MediaMetadata.MediaID, "Origin": r.MediaMetadata.Origin, @@ -355,6 +360,9 @@ func Download(w http.ResponseWriter, req *http.Request, origin types.ServerName, // Note: After this point we have responded to the client's request and are just dealing with local caching. // As we have responded with 200 OK, any errors are ineffectual to the client request and so we just log and return. + r.MediaMetadata.ContentLength = types.ContentLength(bytesWritten) + r.MediaMetadata.UserID = types.MatrixUserID("@unknown:" + string(r.MediaMetadata.Origin)) + logger.WithFields(log.Fields{ "MediaID": r.MediaMetadata.MediaID, "Origin": r.MediaMetadata.Origin, From a405dccb4d40f6adf1344dcbb11709c8de08710a Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Wed, 10 May 2017 11:02:01 +0200 Subject: [PATCH 020/108] mediaapi/routing/routing: Comment Content-Type override behaviour --- src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go b/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go index daa598628..844a2c5ab 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go @@ -43,7 +43,7 @@ func Setup(servMux *http.ServeMux, httpClient *http.Client, cfg config.MediaAPI, // Set common headers returned regardless of the outcome of the request util.SetCORSHeaders(w) - // TODO: fix comment + // Content-Type will be overridden in case of returning file data, else we respond with JSON-formatted errors w.Header().Set("Content-Type", "application/json") vars := mux.Vars(req) From 412f408ea062bc8669b362313f71c1879b61b3af Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Wed, 10 May 2017 16:01:27 +0200 Subject: [PATCH 021/108] mediaapi/writers/utils: Improve debug logging of moveFile() --- src/github.com/matrix-org/dendrite/mediaapi/writers/utils.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/utils.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/utils.go index f38717389..d6f6d2cac 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/utils.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/utils.go @@ -68,12 +68,12 @@ func moveFile(src types.Path, dst types.Path) error { err := os.MkdirAll(dstDir, 0770) if err != nil { - log.Printf("Failed to make directory: %v\n", dstDir) + log.Printf("Failed to make directory: %q", err) return err } err = os.Rename(string(src), string(dst)) if err != nil { - log.Printf("Failed to move directory: %v to %v\n", src, dst) + log.Printf("Failed to move directory: %q", err) return err } return nil From b28072259158284d2ee8c1b248630f4cc55fecfa Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Wed, 10 May 2017 16:02:56 +0200 Subject: [PATCH 022/108] mediaapi/writers/download: Factor out respondFromLocalFile() --- .../dendrite/mediaapi/writers/download.go | 115 +++++++++--------- 1 file changed, 60 insertions(+), 55 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index bedcbf381..4204c6ffb 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -115,61 +115,8 @@ func Download(w http.ResponseWriter, req *http.Request, origin types.ServerName, if err == nil { // If we have a record, we can respond from the local file - logger.WithFields(log.Fields{ - "MediaID": r.MediaMetadata.MediaID, - "Origin": r.MediaMetadata.Origin, - "UploadName": r.MediaMetadata.UploadName, - "Content-Length": r.MediaMetadata.ContentLength, - "Content-Type": r.MediaMetadata.ContentType, - "Content-Disposition": r.MediaMetadata.ContentDisposition, - }).Infof("Downloading file") - - filePath := getPathFromMediaMetadata(r.MediaMetadata, cfg.BasePath) - file, err := os.Open(filePath) - if err != nil { - // FIXME: Remove erroneous file from database? - jsonErrorResponse(w, util.JSONResponse{ - Code: 404, - JSON: jsonerror.NotFound(fmt.Sprintf("File with media ID %q does not exist", r.MediaMetadata.MediaID)), - }, logger) - return - } - - stat, err := file.Stat() - if err != nil { - // FIXME: Remove erroneous file from database? - jsonErrorResponse(w, util.JSONResponse{ - Code: 404, - JSON: jsonerror.NotFound(fmt.Sprintf("File with media ID %q does not exist", r.MediaMetadata.MediaID)), - }, logger) - return - } - - if r.MediaMetadata.ContentLength > 0 && int64(r.MediaMetadata.ContentLength) != stat.Size() { - logger.Warnf("File size in database (%v) and on disk (%v) differ.", r.MediaMetadata.ContentLength, stat.Size()) - // FIXME: Remove erroneous file from database? - } - - w.Header().Set("Content-Type", string(r.MediaMetadata.ContentType)) - w.Header().Set("Content-Length", strconv.FormatInt(stat.Size(), 10)) - contentSecurityPolicy := "default-src 'none';" + - " script-src 'none';" + - " plugin-types application/pdf;" + - " style-src 'unsafe-inline';" + - " object-src 'self';" - w.Header().Set("Content-Security-Policy", contentSecurityPolicy) - - if bytesResponded, err := io.Copy(w, file); err != nil { - logger.Warnf("Failed to copy from cache %v\n", err) - if bytesResponded == 0 { - jsonErrorResponse(w, util.JSONResponse{ - Code: 500, - JSON: jsonerror.NotFound(fmt.Sprintf("Failed to respond with file with media ID %q", r.MediaMetadata.MediaID)), - }, logger) - } - // If we have written any data then we have already responded with 200 OK and all we can do is close the connection - return - } + respondFromLocalFile(w, logger, r.MediaMetadata, cfg) + return } else if err == sql.ErrNoRows && r.MediaMetadata.Origin != cfg.ServerName { // If we do not have a record and the origin is remote, we need to fetch it and respond with that file logger.WithFields(log.Fields{ @@ -415,6 +362,64 @@ func Download(w http.ResponseWriter, req *http.Request, origin types.ServerName, } } +func respondFromLocalFile(w http.ResponseWriter, logger *log.Entry, mediaMetadata *types.MediaMetadata, cfg config.MediaAPI) { + logger.WithFields(log.Fields{ + "MediaID": mediaMetadata.MediaID, + "Origin": mediaMetadata.Origin, + "UploadName": mediaMetadata.UploadName, + "Content-Length": mediaMetadata.ContentLength, + "Content-Type": mediaMetadata.ContentType, + "Content-Disposition": mediaMetadata.ContentDisposition, + }).Infof("Downloading file") + + filePath := getPathFromMediaMetadata(mediaMetadata, cfg.BasePath) + file, err := os.Open(filePath) + if err != nil { + // FIXME: Remove erroneous file from database? + jsonErrorResponse(w, util.JSONResponse{ + Code: 404, + JSON: jsonerror.NotFound(fmt.Sprintf("File with media ID %q does not exist", mediaMetadata.MediaID)), + }, logger) + return + } + + stat, err := file.Stat() + if err != nil { + // FIXME: Remove erroneous file from database? + jsonErrorResponse(w, util.JSONResponse{ + Code: 404, + JSON: jsonerror.NotFound(fmt.Sprintf("File with media ID %q does not exist", mediaMetadata.MediaID)), + }, logger) + return + } + + if mediaMetadata.ContentLength > 0 && int64(mediaMetadata.ContentLength) != stat.Size() { + logger.Warnf("File size in database (%v) and on disk (%v) differ.", mediaMetadata.ContentLength, stat.Size()) + // FIXME: Remove erroneous file from database? + } + + w.Header().Set("Content-Type", string(mediaMetadata.ContentType)) + w.Header().Set("Content-Length", strconv.FormatInt(stat.Size(), 10)) + contentSecurityPolicy := "default-src 'none';" + + " script-src 'none';" + + " plugin-types application/pdf;" + + " style-src 'unsafe-inline';" + + " object-src 'self';" + w.Header().Set("Content-Security-Policy", contentSecurityPolicy) + + if bytesResponded, err := io.Copy(w, file); err != nil { + logger.Warnf("Failed to copy from cache %v\n", err) + if bytesResponded == 0 { + jsonErrorResponse(w, util.JSONResponse{ + Code: 500, + JSON: jsonerror.NotFound(fmt.Sprintf("Failed to respond with file with media ID %q", mediaMetadata.MediaID)), + }, logger) + } + // If we have written any data then we have already responded with 200 OK and all we can do is close the connection + return + } +} + // Given a matrix server name, attempt to discover URLs to contact the server // on. func getMatrixUrls(serverName types.ServerName) []string { From 49ec095b5950f60872c1f1d47fc81b7c9f966a83 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Wed, 10 May 2017 16:04:39 +0200 Subject: [PATCH 023/108] mediaapi/writers/download: Try to only request remote files once If multiple requests arrive for the same remote file, we want to download them once and then serve to all the remaining incoming requests from the cache. The main thing missing from the code at this point is a mechanism to time out database queries. They are made across a network and so we should be robust to network connectivity issues. This is a general problem across dendrite and not limited to just this code. --- .../dendrite/mediaapi/routing/routing.go | 6 +- .../dendrite/mediaapi/types/types.go | 9 ++ .../dendrite/mediaapi/writers/download.go | 83 +++++++++++++++---- 3 files changed, 81 insertions(+), 17 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go b/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go index 7d9725f72..7b525984e 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go @@ -16,6 +16,7 @@ package routing import ( "net/http" + "sync" "github.com/gorilla/mux" "github.com/matrix-org/dendrite/mediaapi/config" @@ -37,6 +38,9 @@ func Setup(servMux *http.ServeMux, httpClient *http.Client, cfg config.MediaAPI, return writers.Upload(req, cfg, db) }))) + activeRemoteRequests := &types.ActiveRemoteRequests{ + Set: map[string]*sync.Cond{}, + } r0mux.Handle("/download/{serverName}/{mediaId}", prometheus.InstrumentHandler("download", http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { req = util.RequestWithLogging(req) @@ -47,7 +51,7 @@ func Setup(servMux *http.ServeMux, httpClient *http.Client, cfg config.MediaAPI, w.Header().Set("Content-Type", "application/json") vars := mux.Vars(req) - writers.Download(w, req, types.ServerName(vars["serverName"]), types.MediaID(vars["mediaId"]), cfg, db) + writers.Download(w, req, types.ServerName(vars["serverName"]), types.MediaID(vars["mediaId"]), cfg, db, activeRemoteRequests) })), ) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/types/types.go b/src/github.com/matrix-org/dendrite/mediaapi/types/types.go index e1e1a3a44..34bf80655 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/types/types.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/types/types.go @@ -14,6 +14,8 @@ package types +import "sync" + // ContentDisposition is an HTTP Content-Disposition header string type ContentDisposition string @@ -55,3 +57,10 @@ type MediaMetadata struct { UploadName Filename UserID MatrixUserID } + +// ActiveRemoteRequests is a lockable map of media URIs requested from remote homeservers +// It is used for ensuring multiple requests for the same file do not clobber each other. +type ActiveRemoteRequests struct { + sync.Mutex + Set map[string]*sync.Cond +} diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index 4204c6ffb..38aa8cc8f 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -25,6 +25,7 @@ import ( "path" "strconv" "strings" + "sync" log "github.com/Sirupsen/logrus" "github.com/matrix-org/dendrite/clientapi/jsonerror" @@ -80,13 +81,15 @@ var errRead = fmt.Errorf("failed to read response from remote server") var errResponse = fmt.Errorf("failed to write file data to response body") var errWrite = fmt.Errorf("failed to write file to disk") +var nAttempts = 5 + // Download implements /download // Files from this server (i.e. origin == cfg.ServerName) are served directly // Files from remote servers (i.e. origin != cfg.ServerName) are cached locally. // If they are present in the cache, they are served directly. // If they are not present in the cache, they are obtained from the remote server and // simultaneously served back to the client and written into the cache. -func Download(w http.ResponseWriter, req *http.Request, origin types.ServerName, mediaID types.MediaID, cfg config.MediaAPI, db *storage.Database) { +func Download(w http.ResponseWriter, req *http.Request, origin types.ServerName, mediaID types.MediaID, cfg config.MediaAPI, db *storage.Database, activeRemoteRequests *types.ActiveRemoteRequests) { logger := util.GetLogger(req.Context()) // request validation @@ -124,7 +127,38 @@ func Download(w http.ResponseWriter, req *http.Request, origin types.ServerName, "Origin": r.MediaMetadata.Origin, }).Infof("Fetching remote file") - // TODO: lock request in hash set + mxcURL := "mxc://" + string(r.MediaMetadata.Origin) + "/" + string(r.MediaMetadata.MediaID) + + for attempts := 0; ; attempts++ { + activeRemoteRequests.Lock() + err = db.GetMediaMetadata(r.MediaMetadata.MediaID, r.MediaMetadata.Origin, r.MediaMetadata) + if err == nil { + // If we have a record, we can respond from the local file + respondFromLocalFile(w, logger, r.MediaMetadata, cfg) + activeRemoteRequests.Unlock() + return + } + if activeRemoteRequestCondition, ok := activeRemoteRequests.Set[mxcURL]; ok { + if attempts >= nAttempts { + logger.Warnf("Other goroutines are trying to download the remote file and failing.") + jsonErrorResponse(w, util.JSONResponse{ + Code: 500, + JSON: jsonerror.Unknown(fmt.Sprintf("File with media ID %q could not be downloaded from %q", r.MediaMetadata.MediaID, r.MediaMetadata.Origin)), + }, logger) + return + } + logger.WithFields(log.Fields{ + "Origin": r.MediaMetadata.Origin, + "MediaID": r.MediaMetadata.MediaID, + }).Infof("Waiting for another goroutine to fetch the file.") + activeRemoteRequestCondition.Wait() + activeRemoteRequests.Unlock() + } else { + activeRemoteRequests.Set[mxcURL] = &sync.Cond{L: activeRemoteRequests} + activeRemoteRequests.Unlock() + break + } + } // FIXME: Only request once (would race if multiple requests for the same remote file) // Use a hash set based on the origin and media ID (the request URL should be fine...) and synchronise adding / removing members @@ -319,20 +353,7 @@ func Download(w http.ResponseWriter, req *http.Request, origin types.ServerName, "Content-Disposition": r.MediaMetadata.ContentDisposition, }).Infof("Storing file metadata to media repository database") - // if written to disk, add to db - err = db.StoreMediaMetadata(r.MediaMetadata) - if err != nil { - tmpDirErr := os.RemoveAll(string(tmpDir)) - if tmpDirErr != nil { - logger.Warnf("Failed to remove tmpDir (%v): %q\n", tmpDir, tmpDirErr) - } - return - } - - // TODO: unlock request in hash set - - // TODO: generate thumbnails - + // The database is the source of truth so we need to have moved the file first err = moveFile( types.Path(path.Join(string(tmpDir), "content")), types.Path(getPathFromMediaMetadata(r.MediaMetadata, cfg.BasePath)), @@ -345,6 +366,36 @@ func Download(w http.ResponseWriter, req *http.Request, origin types.ServerName, return } + // Writing the metadata to the media repository database and removing the mxcURL from activeRemoteRequests needs to be atomic. + // If it were not atomic, a new request for the same file could come in in routine A and check the database before the INSERT. + // Routine B which was fetching could then have its INSERT complete and remove the mxcURL from the activeRemoteRequests. + // If routine A then checked the activeRemoteRequests it would think it needed to fetch the file when it's already in the database. + // The locking below mitigates this situation. + activeRemoteRequests.Lock() + // FIXME: unlock after timeout of db request + // if written to disk, add to db + err = db.StoreMediaMetadata(r.MediaMetadata) + if err != nil { + finalDir := path.Dir(getPathFromMediaMetadata(r.MediaMetadata, cfg.BasePath)) + finalDirErr := os.RemoveAll(finalDir) + if finalDirErr != nil { + logger.Warnf("Failed to remove finalDir (%v): %q\n", finalDir, finalDirErr) + } + delete(activeRemoteRequests.Set, mxcURL) + activeRemoteRequests.Unlock() + return + } + activeRemoteRequestCondition, _ := activeRemoteRequests.Set[mxcURL] + logger.WithFields(log.Fields{ + "Origin": r.MediaMetadata.Origin, + "MediaID": r.MediaMetadata.MediaID, + }).Infof("Signalling other goroutines waiting for us to fetch the file.") + activeRemoteRequestCondition.Broadcast() + delete(activeRemoteRequests.Set, mxcURL) + activeRemoteRequests.Unlock() + + // TODO: generate thumbnails + logger.WithFields(log.Fields{ "MediaID": r.MediaMetadata.MediaID, "Origin": r.MediaMetadata.Origin, From 5d67787c46ce745b4b47764aa365f218f3efc43a Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Wed, 10 May 2017 16:07:18 +0200 Subject: [PATCH 024/108] mediaapi/writers/download: Use consistent temporary file name --- src/github.com/matrix-org/dendrite/mediaapi/writers/download.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index 38aa8cc8f..6e859a95c 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -239,7 +239,7 @@ func Download(w http.ResponseWriter, req *http.Request, origin types.ServerName, }, logger) return } - tmpFile, writer, err := createFileWriter(tmpDir, types.Filename(r.MediaMetadata.MediaID[3:])) + tmpFile, writer, err := createFileWriter(tmpDir, "content") if err != nil { logger.Infof("Failed to create file writer %q\n", err) jsonErrorResponse(w, util.JSONResponse{ From 8f68f61117122cbffbc09b67496255dfdbbdeba6 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Wed, 10 May 2017 16:08:32 +0200 Subject: [PATCH 025/108] mediaapi/writers/download: Remove done TODOs --- .../matrix-org/dendrite/mediaapi/writers/download.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index 6e859a95c..d4976b465 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -268,7 +268,6 @@ func Download(w http.ResponseWriter, req *http.Request, origin types.ServerName, // write to client request's response body bytesTemp, respErr := w.Write(buffer[:bytesRead]) if bytesTemp != bytesRead || (respErr != nil && respErr != io.EOF) { - // TODO: BORKEN logger.Errorf("bytesTemp %v != bytesRead %v : %v", bytesTemp, bytesRead, respErr) fetchError = errResponse break @@ -277,13 +276,11 @@ func Download(w http.ResponseWriter, req *http.Request, origin types.ServerName, if fetchError == nil || (fetchError != errFileIsTooLarge && fetchError != errWrite) { // if larger than cfg.MaxFileSize then stop writing to disk and discard cached file if bytesWritten+int64(len(buffer)) > int64(cfg.MaxFileSize) { - // TODO: WAAAAHNING and clean up temp files fetchError = errFileIsTooLarge } else { // write to disk bytesTemp, writeErr := writer.Write(buffer) if writeErr != nil && writeErr != io.EOF { - // TODO: WAAAAHNING and clean up temp files fetchError = errWrite } else { bytesWritten += int64(bytesTemp) From df52b1aef2b2bf85967efcac7d3bfc4c4253bac0 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Wed, 10 May 2017 16:08:51 +0200 Subject: [PATCH 026/108] mediaapi/writers/download: Only try to write as much as was read --- src/github.com/matrix-org/dendrite/mediaapi/writers/download.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index d4976b465..c5625e5af 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -279,7 +279,7 @@ func Download(w http.ResponseWriter, req *http.Request, origin types.ServerName, fetchError = errFileIsTooLarge } else { // write to disk - bytesTemp, writeErr := writer.Write(buffer) + bytesTemp, writeErr := writer.Write(buffer[:bytesRead]) if writeErr != nil && writeErr != io.EOF { fetchError = errWrite } else { From 304a275e1b1cb7b9f7fe1f94096b663a783b912d Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Wed, 10 May 2017 16:09:17 +0200 Subject: [PATCH 027/108] mediaapi/writers/download: Give remote files a placeholder UserID --- src/github.com/matrix-org/dendrite/mediaapi/writers/download.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index c5625e5af..93382b635 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -339,7 +339,7 @@ func Download(w http.ResponseWriter, req *http.Request, origin types.ServerName, // As we have responded with 200 OK, any errors are ineffectual to the client request and so we just log and return. r.MediaMetadata.ContentLength = types.ContentLength(bytesWritten) - r.MediaMetadata.UserID = types.MatrixUserID("@unknown:" + string(r.MediaMetadata.Origin)) + r.MediaMetadata.UserID = types.MatrixUserID("@:" + string(r.MediaMetadata.Origin)) logger.WithFields(log.Fields{ "MediaID": r.MediaMetadata.MediaID, From 563330b82accdfb3eb4a5c49b1f0a600d8ca420a Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Thu, 11 May 2017 09:16:20 +0200 Subject: [PATCH 028/108] mediaapi/writers/download: Rename attempts to tries Tries is more usual language in programming. --- .../matrix-org/dendrite/mediaapi/writers/download.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index 93382b635..eb47237c0 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -81,7 +81,7 @@ var errRead = fmt.Errorf("failed to read response from remote server") var errResponse = fmt.Errorf("failed to write file data to response body") var errWrite = fmt.Errorf("failed to write file to disk") -var nAttempts = 5 +var nTries = 5 // Download implements /download // Files from this server (i.e. origin == cfg.ServerName) are served directly @@ -129,7 +129,7 @@ func Download(w http.ResponseWriter, req *http.Request, origin types.ServerName, mxcURL := "mxc://" + string(r.MediaMetadata.Origin) + "/" + string(r.MediaMetadata.MediaID) - for attempts := 0; ; attempts++ { + for tries := 0; ; tries++ { activeRemoteRequests.Lock() err = db.GetMediaMetadata(r.MediaMetadata.MediaID, r.MediaMetadata.Origin, r.MediaMetadata) if err == nil { @@ -139,7 +139,7 @@ func Download(w http.ResponseWriter, req *http.Request, origin types.ServerName, return } if activeRemoteRequestCondition, ok := activeRemoteRequests.Set[mxcURL]; ok { - if attempts >= nAttempts { + if tries >= nTries { logger.Warnf("Other goroutines are trying to download the remote file and failing.") jsonErrorResponse(w, util.JSONResponse{ Code: 500, From f0c6ec23c9a197686a427ec55ed3fff92dd73295 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Thu, 11 May 2017 09:17:37 +0200 Subject: [PATCH 029/108] mediaapi/writers/download: Move log messages to correct code paths --- .../dendrite/mediaapi/writers/download.go | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index eb47237c0..f087bc63f 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -122,10 +122,6 @@ func Download(w http.ResponseWriter, req *http.Request, origin types.ServerName, return } else if err == sql.ErrNoRows && r.MediaMetadata.Origin != cfg.ServerName { // If we do not have a record and the origin is remote, we need to fetch it and respond with that file - logger.WithFields(log.Fields{ - "MediaID": r.MediaMetadata.MediaID, - "Origin": r.MediaMetadata.Origin, - }).Infof("Fetching remote file") mxcURL := "mxc://" + string(r.MediaMetadata.Origin) + "/" + string(r.MediaMetadata.MediaID) @@ -140,7 +136,10 @@ func Download(w http.ResponseWriter, req *http.Request, origin types.ServerName, } if activeRemoteRequestCondition, ok := activeRemoteRequests.Set[mxcURL]; ok { if tries >= nTries { - logger.Warnf("Other goroutines are trying to download the remote file and failing.") + logger.WithFields(log.Fields{ + "MediaID": r.MediaMetadata.MediaID, + "Origin": r.MediaMetadata.Origin, + }).Warnf("Other goroutines are trying to download the remote file and failing.") jsonErrorResponse(w, util.JSONResponse{ Code: 500, JSON: jsonerror.Unknown(fmt.Sprintf("File with media ID %q could not be downloaded from %q", r.MediaMetadata.MediaID, r.MediaMetadata.Origin)), @@ -154,6 +153,10 @@ func Download(w http.ResponseWriter, req *http.Request, origin types.ServerName, activeRemoteRequestCondition.Wait() activeRemoteRequests.Unlock() } else { + logger.WithFields(log.Fields{ + "MediaID": r.MediaMetadata.MediaID, + "Origin": r.MediaMetadata.Origin, + }).Infof("Fetching remote file") activeRemoteRequests.Set[mxcURL] = &sync.Cond{L: activeRemoteRequests} activeRemoteRequests.Unlock() break From b6af02185828825101aa31b8cf5b6dc2f35b19d2 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Thu, 11 May 2017 09:18:02 +0200 Subject: [PATCH 030/108] mediaapi/writers/download: Unlock activeRemoteRequests after too many tries --- src/github.com/matrix-org/dendrite/mediaapi/writers/download.go | 1 + 1 file changed, 1 insertion(+) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index f087bc63f..4a578af5f 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -144,6 +144,7 @@ func Download(w http.ResponseWriter, req *http.Request, origin types.ServerName, Code: 500, JSON: jsonerror.Unknown(fmt.Sprintf("File with media ID %q could not be downloaded from %q", r.MediaMetadata.MediaID, r.MediaMetadata.Origin)), }, logger) + activeRemoteRequests.Unlock() return } logger.WithFields(log.Fields{ From 9c29a31e7e07b961898a7509388bfe740ede5f72 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Thu, 11 May 2017 09:19:34 +0200 Subject: [PATCH 031/108] mediaapi/writers/download: Factor out respondFromRemoteFile --- .../dendrite/mediaapi/writers/download.go | 512 +++++++++--------- 1 file changed, 271 insertions(+), 241 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index 4a578af5f..f70608292 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -164,247 +164,7 @@ func Download(w http.ResponseWriter, req *http.Request, origin types.ServerName, } } - // FIXME: Only request once (would race if multiple requests for the same remote file) - // Use a hash set based on the origin and media ID (the request URL should be fine...) and synchronise adding / removing members - urls := getMatrixUrls(r.MediaMetadata.Origin) - - logger.Printf("Connecting to remote %q\n", urls[0]) - - remoteReqAddr := urls[0] + "/_matrix/media/v1/download/" + string(r.MediaMetadata.Origin) + "/" + string(r.MediaMetadata.MediaID) - remoteReq, err := http.NewRequest("GET", remoteReqAddr, nil) - if err != nil { - jsonErrorResponse(w, util.JSONResponse{ - Code: 500, - JSON: jsonerror.Unknown(fmt.Sprintf("File with media ID %q could not be downloaded from %q", r.MediaMetadata.MediaID, r.MediaMetadata.Origin)), - }, logger) - return - } - - remoteReq.Header.Set("Host", string(r.MediaMetadata.Origin)) - - client := http.Client{} - resp, err := client.Do(remoteReq) - if err != nil { - jsonErrorResponse(w, util.JSONResponse{ - Code: 502, - JSON: jsonerror.Unknown(fmt.Sprintf("File with media ID %q could not be downloaded from %q", r.MediaMetadata.MediaID, r.MediaMetadata.Origin)), - }, logger) - return - } - defer resp.Body.Close() - - if resp.StatusCode != 200 { - logger.Printf("Server responded with %d\n", resp.StatusCode) - if resp.StatusCode == 404 { - jsonErrorResponse(w, util.JSONResponse{ - Code: 404, - JSON: jsonerror.NotFound(fmt.Sprintf("File with media ID %q does not exist", r.MediaMetadata.MediaID)), - }, logger) - return - } - jsonErrorResponse(w, util.JSONResponse{ - Code: 502, - JSON: jsonerror.Unknown(fmt.Sprintf("File with media ID %q could not be downloaded from %q", r.MediaMetadata.MediaID, r.MediaMetadata.Origin)), - }, logger) - return - } - - contentLength, err := strconv.ParseInt(resp.Header.Get("Content-Length"), 10, 64) - if err != nil { - logger.Warn("Failed to parse content length") - } - r.MediaMetadata.ContentLength = types.ContentLength(contentLength) - - r.MediaMetadata.ContentType = types.ContentType(resp.Header.Get("Content-Type")) - r.MediaMetadata.ContentDisposition = types.ContentDisposition(resp.Header.Get("Content-Disposition")) - // FIXME: parse from Content-Disposition header if possible, else fall back - //r.MediaMetadata.UploadName = types.Filename() - - logger.WithFields(log.Fields{ - "MediaID": r.MediaMetadata.MediaID, - "Origin": r.MediaMetadata.Origin, - }).Infof("Connected to remote") - - w.Header().Set("Content-Type", string(r.MediaMetadata.ContentType)) - w.Header().Set("Content-Length", strconv.FormatInt(int64(r.MediaMetadata.ContentLength), 10)) - contentSecurityPolicy := "default-src 'none';" + - " script-src 'none';" + - " plugin-types application/pdf;" + - " style-src 'unsafe-inline';" + - " object-src 'self';" - w.Header().Set("Content-Security-Policy", contentSecurityPolicy) - - tmpDir, err := createTempDir(cfg.BasePath) - if err != nil { - logger.Infof("Failed to create temp dir %q\n", err) - jsonErrorResponse(w, util.JSONResponse{ - Code: 400, - JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload: %q", err)), - }, logger) - return - } - tmpFile, writer, err := createFileWriter(tmpDir, "content") - if err != nil { - logger.Infof("Failed to create file writer %q\n", err) - jsonErrorResponse(w, util.JSONResponse{ - Code: 400, - JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload: %q", err)), - }, logger) - return - } - defer tmpFile.Close() - - logger.WithFields(log.Fields{ - "MediaID": r.MediaMetadata.MediaID, - "Origin": r.MediaMetadata.Origin, - }).Infof("Proxying and caching remote file") - - // bytesResponded is the total number of bytes written to the response to the client request - // bytesWritten is the total number of bytes written to disk - var bytesResponded, bytesWritten int64 = 0, 0 - var fetchError error - // Note: the buffer size is the same as is used in io.Copy() - buffer := make([]byte, 32*1024) - for { - // read from remote request's response body - bytesRead, readErr := resp.Body.Read(buffer) - if bytesRead > 0 { - // write to client request's response body - bytesTemp, respErr := w.Write(buffer[:bytesRead]) - if bytesTemp != bytesRead || (respErr != nil && respErr != io.EOF) { - logger.Errorf("bytesTemp %v != bytesRead %v : %v", bytesTemp, bytesRead, respErr) - fetchError = errResponse - break - } - bytesResponded += int64(bytesTemp) - if fetchError == nil || (fetchError != errFileIsTooLarge && fetchError != errWrite) { - // if larger than cfg.MaxFileSize then stop writing to disk and discard cached file - if bytesWritten+int64(len(buffer)) > int64(cfg.MaxFileSize) { - fetchError = errFileIsTooLarge - } else { - // write to disk - bytesTemp, writeErr := writer.Write(buffer[:bytesRead]) - if writeErr != nil && writeErr != io.EOF { - fetchError = errWrite - } else { - bytesWritten += int64(bytesTemp) - } - } - } - } - if readErr != nil { - if readErr != io.EOF { - fetchError = errRead - } - break - } - } - - writer.Flush() - - if fetchError != nil { - logFields := log.Fields{ - "MediaID": r.MediaMetadata.MediaID, - "Origin": r.MediaMetadata.Origin, - } - if fetchError == errFileIsTooLarge { - logFields["MaxFileSize"] = cfg.MaxFileSize - } - logger.WithFields(logFields).Warnln(fetchError) - tmpDirErr := os.RemoveAll(string(tmpDir)) - if tmpDirErr != nil { - logger.Warnf("Failed to remove tmpDir (%v): %q\n", tmpDir, tmpDirErr) - } - // Note: if we have responded with any data in the body at all then we have already sent 200 OK and we can only abort at this point - if bytesResponded < 1 { - jsonErrorResponse(w, util.JSONResponse{ - Code: 502, - JSON: jsonerror.Unknown(fmt.Sprintf("File with media ID %q could not be downloaded from %q", r.MediaMetadata.MediaID, r.MediaMetadata.Origin)), - }, logger) - } else { - // We attempt to bluntly close the connection because that is the - // best thing we can do after we've sent a 200 OK - logger.Println("Attempting to close the connection.") - hijacker, ok := w.(http.Hijacker) - if ok { - connection, _, hijackErr := hijacker.Hijack() - if hijackErr == nil { - logger.Println("Closing") - connection.Close() - } else { - logger.Printf("Error trying to hijack: %v", hijackErr) - } - } - } - return - } - - // Note: After this point we have responded to the client's request and are just dealing with local caching. - // As we have responded with 200 OK, any errors are ineffectual to the client request and so we just log and return. - - r.MediaMetadata.ContentLength = types.ContentLength(bytesWritten) - r.MediaMetadata.UserID = types.MatrixUserID("@:" + string(r.MediaMetadata.Origin)) - - logger.WithFields(log.Fields{ - "MediaID": r.MediaMetadata.MediaID, - "Origin": r.MediaMetadata.Origin, - "UploadName": r.MediaMetadata.UploadName, - "Content-Length": r.MediaMetadata.ContentLength, - "Content-Type": r.MediaMetadata.ContentType, - "Content-Disposition": r.MediaMetadata.ContentDisposition, - }).Infof("Storing file metadata to media repository database") - - // The database is the source of truth so we need to have moved the file first - err = moveFile( - types.Path(path.Join(string(tmpDir), "content")), - types.Path(getPathFromMediaMetadata(r.MediaMetadata, cfg.BasePath)), - ) - if err != nil { - tmpDirErr := os.RemoveAll(string(tmpDir)) - if tmpDirErr != nil { - logger.Warnf("Failed to remove tmpDir (%v): %q\n", tmpDir, tmpDirErr) - } - return - } - - // Writing the metadata to the media repository database and removing the mxcURL from activeRemoteRequests needs to be atomic. - // If it were not atomic, a new request for the same file could come in in routine A and check the database before the INSERT. - // Routine B which was fetching could then have its INSERT complete and remove the mxcURL from the activeRemoteRequests. - // If routine A then checked the activeRemoteRequests it would think it needed to fetch the file when it's already in the database. - // The locking below mitigates this situation. - activeRemoteRequests.Lock() - // FIXME: unlock after timeout of db request - // if written to disk, add to db - err = db.StoreMediaMetadata(r.MediaMetadata) - if err != nil { - finalDir := path.Dir(getPathFromMediaMetadata(r.MediaMetadata, cfg.BasePath)) - finalDirErr := os.RemoveAll(finalDir) - if finalDirErr != nil { - logger.Warnf("Failed to remove finalDir (%v): %q\n", finalDir, finalDirErr) - } - delete(activeRemoteRequests.Set, mxcURL) - activeRemoteRequests.Unlock() - return - } - activeRemoteRequestCondition, _ := activeRemoteRequests.Set[mxcURL] - logger.WithFields(log.Fields{ - "Origin": r.MediaMetadata.Origin, - "MediaID": r.MediaMetadata.MediaID, - }).Infof("Signalling other goroutines waiting for us to fetch the file.") - activeRemoteRequestCondition.Broadcast() - delete(activeRemoteRequests.Set, mxcURL) - activeRemoteRequests.Unlock() - - // TODO: generate thumbnails - - logger.WithFields(log.Fields{ - "MediaID": r.MediaMetadata.MediaID, - "Origin": r.MediaMetadata.Origin, - "UploadName": r.MediaMetadata.UploadName, - "Content-Length": r.MediaMetadata.ContentLength, - "Content-Type": r.MediaMetadata.ContentType, - "Content-Disposition": r.MediaMetadata.ContentDisposition, - }).Infof("Remote file cached") + respondFromRemoteFile(w, logger, r.MediaMetadata, cfg, db, activeRemoteRequests) } else { // TODO: If we do not have a record and the origin is local, or if we have another error from the database, the file is not found jsonErrorResponse(w, util.JSONResponse{ @@ -472,6 +232,276 @@ func respondFromLocalFile(w http.ResponseWriter, logger *log.Entry, mediaMetadat } } +func respondFromRemoteFile(w http.ResponseWriter, logger *log.Entry, mediaMetadata *types.MediaMetadata, cfg config.MediaAPI, db *storage.Database, activeRemoteRequests *types.ActiveRemoteRequests) { + logger.WithFields(log.Fields{ + "MediaID": mediaMetadata.MediaID, + "Origin": mediaMetadata.Origin, + }).Infof("Fetching remote file") + + mxcURL := "mxc://" + string(mediaMetadata.Origin) + "/" + string(mediaMetadata.MediaID) + + // If we hit an error and we return early, we need to lock, broadcast on the condition, delete the condition and unlock. + // If we return normally we have slightly different locking around the storage of metadata to the database and deletion of the condition. + // As such, this deferred cleanup of the sync.Cond is conditional. + // This approach seems safer than potentially missing this cleanup in error cases. + updateActiveRemoteRequests := true + defer func() { + if updateActiveRemoteRequests { + activeRemoteRequests.Lock() + if activeRemoteRequestCondition, ok := activeRemoteRequests.Set[mxcURL]; ok { + activeRemoteRequestCondition.Broadcast() + } + delete(activeRemoteRequests.Set, mxcURL) + activeRemoteRequests.Unlock() + } + }() + + urls := getMatrixUrls(mediaMetadata.Origin) + + logger.Printf("Connecting to remote %q\n", urls[0]) + + remoteReqAddr := urls[0] + "/_matrix/media/v1/download/" + string(mediaMetadata.Origin) + "/" + string(mediaMetadata.MediaID) + remoteReq, err := http.NewRequest("GET", remoteReqAddr, nil) + if err != nil { + jsonErrorResponse(w, util.JSONResponse{ + Code: 500, + JSON: jsonerror.Unknown(fmt.Sprintf("File with media ID %q could not be downloaded from %q", mediaMetadata.MediaID, mediaMetadata.Origin)), + }, logger) + return + } + + remoteReq.Header.Set("Host", string(mediaMetadata.Origin)) + + client := http.Client{} + resp, err := client.Do(remoteReq) + if err != nil { + jsonErrorResponse(w, util.JSONResponse{ + Code: 502, + JSON: jsonerror.Unknown(fmt.Sprintf("File with media ID %q could not be downloaded from %q", mediaMetadata.MediaID, mediaMetadata.Origin)), + }, logger) + return + } + defer resp.Body.Close() + + if resp.StatusCode != 200 { + logger.Printf("Server responded with %d\n", resp.StatusCode) + if resp.StatusCode == 404 { + jsonErrorResponse(w, util.JSONResponse{ + Code: 404, + JSON: jsonerror.NotFound(fmt.Sprintf("File with media ID %q does not exist", mediaMetadata.MediaID)), + }, logger) + return + } + jsonErrorResponse(w, util.JSONResponse{ + Code: 502, + JSON: jsonerror.Unknown(fmt.Sprintf("File with media ID %q could not be downloaded from %q", mediaMetadata.MediaID, mediaMetadata.Origin)), + }, logger) + return + } + + contentLength, err := strconv.ParseInt(resp.Header.Get("Content-Length"), 10, 64) + if err != nil { + logger.Warn("Failed to parse content length") + } + mediaMetadata.ContentLength = types.ContentLength(contentLength) + + mediaMetadata.ContentType = types.ContentType(resp.Header.Get("Content-Type")) + mediaMetadata.ContentDisposition = types.ContentDisposition(resp.Header.Get("Content-Disposition")) + // FIXME: parse from Content-Disposition header if possible, else fall back + //mediaMetadata.UploadName = types.Filename() + + logger.WithFields(log.Fields{ + "MediaID": mediaMetadata.MediaID, + "Origin": mediaMetadata.Origin, + }).Infof("Connected to remote") + + w.Header().Set("Content-Type", string(mediaMetadata.ContentType)) + w.Header().Set("Content-Length", strconv.FormatInt(int64(mediaMetadata.ContentLength), 10)) + contentSecurityPolicy := "default-src 'none';" + + " script-src 'none';" + + " plugin-types application/pdf;" + + " style-src 'unsafe-inline';" + + " object-src 'self';" + w.Header().Set("Content-Security-Policy", contentSecurityPolicy) + + tmpDir, err := createTempDir(cfg.BasePath) + if err != nil { + logger.Infof("Failed to create temp dir %q\n", err) + jsonErrorResponse(w, util.JSONResponse{ + Code: 400, + JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload: %q", err)), + }, logger) + return + } + tmpFile, writer, err := createFileWriter(tmpDir, "content") + if err != nil { + logger.Infof("Failed to create file writer %q\n", err) + jsonErrorResponse(w, util.JSONResponse{ + Code: 400, + JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload: %q", err)), + }, logger) + return + } + defer tmpFile.Close() + + logger.WithFields(log.Fields{ + "MediaID": mediaMetadata.MediaID, + "Origin": mediaMetadata.Origin, + }).Infof("Proxying and caching remote file") + + // bytesResponded is the total number of bytes written to the response to the client request + // bytesWritten is the total number of bytes written to disk + var bytesResponded, bytesWritten int64 = 0, 0 + var fetchError error + // Note: the buffer size is the same as is used in io.Copy() + buffer := make([]byte, 32*1024) + for { + // read from remote request's response body + bytesRead, readErr := resp.Body.Read(buffer) + if bytesRead > 0 { + // write to client request's response body + bytesTemp, respErr := w.Write(buffer[:bytesRead]) + if bytesTemp != bytesRead || (respErr != nil && respErr != io.EOF) { + logger.Errorf("bytesTemp %v != bytesRead %v : %v", bytesTemp, bytesRead, respErr) + fetchError = errResponse + break + } + bytesResponded += int64(bytesTemp) + if fetchError == nil || (fetchError != errFileIsTooLarge && fetchError != errWrite) { + // if larger than cfg.MaxFileSize then stop writing to disk and discard cached file + if bytesWritten+int64(len(buffer)) > int64(cfg.MaxFileSize) { + fetchError = errFileIsTooLarge + } else { + // write to disk + bytesTemp, writeErr := writer.Write(buffer[:bytesRead]) + if writeErr != nil && writeErr != io.EOF { + fetchError = errWrite + } else { + bytesWritten += int64(bytesTemp) + } + } + } + } + if readErr != nil { + if readErr != io.EOF { + fetchError = errRead + } + break + } + } + + writer.Flush() + + if fetchError != nil { + logFields := log.Fields{ + "MediaID": mediaMetadata.MediaID, + "Origin": mediaMetadata.Origin, + } + if fetchError == errFileIsTooLarge { + logFields["MaxFileSize"] = cfg.MaxFileSize + } + logger.WithFields(logFields).Warnln(fetchError) + tmpDirErr := os.RemoveAll(string(tmpDir)) + if tmpDirErr != nil { + logger.Warnf("Failed to remove tmpDir (%v): %q\n", tmpDir, tmpDirErr) + } + // Note: if we have responded with any data in the body at all then we have already sent 200 OK and we can only abort at this point + if bytesResponded < 1 { + jsonErrorResponse(w, util.JSONResponse{ + Code: 502, + JSON: jsonerror.Unknown(fmt.Sprintf("File with media ID %q could not be downloaded from %q", mediaMetadata.MediaID, mediaMetadata.Origin)), + }, logger) + } else { + // We attempt to bluntly close the connection because that is the + // best thing we can do after we've sent a 200 OK + logger.Println("Attempting to close the connection.") + hijacker, ok := w.(http.Hijacker) + if ok { + connection, _, hijackErr := hijacker.Hijack() + if hijackErr == nil { + logger.Println("Closing") + connection.Close() + } else { + logger.Printf("Error trying to hijack: %v", hijackErr) + } + } + } + return + } + + // Note: After this point we have responded to the client's request and are just dealing with local caching. + // As we have responded with 200 OK, any errors are ineffectual to the client request and so we just log and return. + + mediaMetadata.ContentLength = types.ContentLength(bytesWritten) + mediaMetadata.UserID = types.MatrixUserID("@:" + string(mediaMetadata.Origin)) + + logger.WithFields(log.Fields{ + "MediaID": mediaMetadata.MediaID, + "Origin": mediaMetadata.Origin, + "UploadName": mediaMetadata.UploadName, + "Content-Length": mediaMetadata.ContentLength, + "Content-Type": mediaMetadata.ContentType, + "Content-Disposition": mediaMetadata.ContentDisposition, + }).Infof("Storing file metadata to media repository database") + + // The database is the source of truth so we need to have moved the file first + err = moveFile( + types.Path(path.Join(string(tmpDir), "content")), + types.Path(getPathFromMediaMetadata(mediaMetadata, cfg.BasePath)), + ) + if err != nil { + tmpDirErr := os.RemoveAll(string(tmpDir)) + if tmpDirErr != nil { + logger.Warnf("Failed to remove tmpDir (%v): %q\n", tmpDir, tmpDirErr) + } + return + } + + // Writing the metadata to the media repository database and removing the mxcURL from activeRemoteRequests needs to be atomic. + // If it were not atomic, a new request for the same file could come in in routine A and check the database before the INSERT. + // Routine B which was fetching could then have its INSERT complete and remove the mxcURL from the activeRemoteRequests. + // If routine A then checked the activeRemoteRequests it would think it needed to fetch the file when it's already in the database. + // The locking below mitigates this situation. + updateActiveRemoteRequests = false + activeRemoteRequests.Lock() + // FIXME: unlock after timeout of db request + // if written to disk, add to db + err = db.StoreMediaMetadata(mediaMetadata) + if err != nil { + finalDir := path.Dir(getPathFromMediaMetadata(mediaMetadata, cfg.BasePath)) + finalDirErr := os.RemoveAll(finalDir) + if finalDirErr != nil { + logger.Warnf("Failed to remove finalDir (%v): %q\n", finalDir, finalDirErr) + } + if activeRemoteRequestCondition, ok := activeRemoteRequests.Set[mxcURL]; ok { + activeRemoteRequestCondition.Broadcast() + } + delete(activeRemoteRequests.Set, mxcURL) + activeRemoteRequests.Unlock() + return + } + logger.WithFields(log.Fields{ + "Origin": mediaMetadata.Origin, + "MediaID": mediaMetadata.MediaID, + }).Infof("Signalling other goroutines waiting for us to fetch the file.") + if activeRemoteRequestCondition, ok := activeRemoteRequests.Set[mxcURL]; ok { + activeRemoteRequestCondition.Broadcast() + } + delete(activeRemoteRequests.Set, mxcURL) + activeRemoteRequests.Unlock() + + // TODO: generate thumbnails + + logger.WithFields(log.Fields{ + "MediaID": mediaMetadata.MediaID, + "Origin": mediaMetadata.Origin, + "UploadName": mediaMetadata.UploadName, + "Content-Length": mediaMetadata.ContentLength, + "Content-Type": mediaMetadata.ContentType, + "Content-Disposition": mediaMetadata.ContentDisposition, + }).Infof("Remote file cached") +} + // Given a matrix server name, attempt to discover URLs to contact the server // on. func getMatrixUrls(serverName types.ServerName) []string { From da28e4a3e25e4e7e8fae69ae1b6e78028235aada Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Thu, 11 May 2017 09:19:56 +0200 Subject: [PATCH 032/108] mediaapi/writers/download: Update TODO comment that is done --- src/github.com/matrix-org/dendrite/mediaapi/writers/download.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index f70608292..68e205d07 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -166,7 +166,7 @@ func Download(w http.ResponseWriter, req *http.Request, origin types.ServerName, respondFromRemoteFile(w, logger, r.MediaMetadata, cfg, db, activeRemoteRequests) } else { - // TODO: If we do not have a record and the origin is local, or if we have another error from the database, the file is not found + // If we do not have a record and the origin is local, or if we have another error from the database, the file is not found jsonErrorResponse(w, util.JSONResponse{ Code: 404, JSON: jsonerror.NotFound(fmt.Sprintf("File with media ID %q does not exist", r.MediaMetadata.MediaID)), From 5e66ad6d39162e641c425e9226c5c77b21263fd3 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Thu, 11 May 2017 09:30:10 +0200 Subject: [PATCH 033/108] mediaapi/writers/download: Comment the flow of respondFromRemoteFile --- .../dendrite/mediaapi/writers/download.go | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index 68e205d07..01d15d161 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -122,6 +122,9 @@ func Download(w http.ResponseWriter, req *http.Request, origin types.ServerName, return } else if err == sql.ErrNoRows && r.MediaMetadata.Origin != cfg.ServerName { // If we do not have a record and the origin is remote, we need to fetch it and respond with that file + // The following code using activeRemoteRequests is avoiding duplication of fetches from the remote server in the case + // of multiple simultaneous incoming requests for the same remote file - it will be downloaded once, cached and served + // to all clients. mxcURL := "mxc://" + string(r.MediaMetadata.Origin) + "/" + string(r.MediaMetadata.MediaID) @@ -256,6 +259,7 @@ func respondFromRemoteFile(w http.ResponseWriter, logger *log.Entry, mediaMetada } }() + // create request for remote file urls := getMatrixUrls(mediaMetadata.Origin) logger.Printf("Connecting to remote %q\n", urls[0]) @@ -299,6 +303,7 @@ func respondFromRemoteFile(w http.ResponseWriter, logger *log.Entry, mediaMetada return } + // get metadata from request and set metadata on response contentLength, err := strconv.ParseInt(resp.Header.Get("Content-Length"), 10, 64) if err != nil { logger.Warn("Failed to parse content length") @@ -324,6 +329,7 @@ func respondFromRemoteFile(w http.ResponseWriter, logger *log.Entry, mediaMetada " object-src 'self';" w.Header().Set("Content-Security-Policy", contentSecurityPolicy) + // create the temporary file writer tmpDir, err := createTempDir(cfg.BasePath) if err != nil { logger.Infof("Failed to create temp dir %q\n", err) @@ -344,6 +350,8 @@ func respondFromRemoteFile(w http.ResponseWriter, logger *log.Entry, mediaMetada } defer tmpFile.Close() + // read the remote request's response body + // simultaneously write it to the incoming request's response body and the temporary file logger.WithFields(log.Fields{ "MediaID": mediaMetadata.MediaID, "Origin": mediaMetadata.Origin, @@ -429,9 +437,14 @@ func respondFromRemoteFile(w http.ResponseWriter, logger *log.Entry, mediaMetada return } + // The file has been fetched. It is moved to its final destination and its metadata is inserted into the database. + // Note: After this point we have responded to the client's request and are just dealing with local caching. // As we have responded with 200 OK, any errors are ineffectual to the client request and so we just log and return. + // It's possible the bytesWritten to the temporary file is different to the reported Content-Length from the remote + // request's response. bytesWritten is therefore used as it is what would be sent to clients when reading from the local + // file. mediaMetadata.ContentLength = types.ContentLength(bytesWritten) mediaMetadata.UserID = types.MatrixUserID("@:" + string(mediaMetadata.Origin)) From ef7dc8c8c54596ec0b90dcdb62fd2e513cfa6b3e Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Thu, 11 May 2017 09:31:40 +0200 Subject: [PATCH 034/108] mediaapi/writers/download: Clarify log message that file is remote --- src/github.com/matrix-org/dendrite/mediaapi/writers/download.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index 01d15d161..4807568e4 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -153,7 +153,7 @@ func Download(w http.ResponseWriter, req *http.Request, origin types.ServerName, logger.WithFields(log.Fields{ "Origin": r.MediaMetadata.Origin, "MediaID": r.MediaMetadata.MediaID, - }).Infof("Waiting for another goroutine to fetch the file.") + }).Infof("Waiting for another goroutine to fetch the remote file.") activeRemoteRequestCondition.Wait() activeRemoteRequests.Unlock() } else { From 6c3dbb9b48b6b684da585fab138d4161989d8181 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Thu, 11 May 2017 10:36:32 +0200 Subject: [PATCH 035/108] mediaapi/writers/download: Factor out createRemoteRequest --- .../dendrite/mediaapi/writers/download.go | 85 ++++++++++--------- 1 file changed, 45 insertions(+), 40 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index 4807568e4..f90a876d1 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -235,6 +235,48 @@ func respondFromLocalFile(w http.ResponseWriter, logger *log.Entry, mediaMetadat } } +func createRemoteRequest(mediaMetadata *types.MediaMetadata, logger *log.Entry) (*http.Response, *util.JSONResponse) { + urls := getMatrixUrls(mediaMetadata.Origin) + + logger.Printf("Connecting to remote %q\n", urls[0]) + + remoteReqAddr := urls[0] + "/_matrix/media/v1/download/" + string(mediaMetadata.Origin) + "/" + string(mediaMetadata.MediaID) + remoteReq, err := http.NewRequest("GET", remoteReqAddr, nil) + if err != nil { + return nil, &util.JSONResponse{ + Code: 500, + JSON: jsonerror.Unknown(fmt.Sprintf("File with media ID %q could not be downloaded from %q", mediaMetadata.MediaID, mediaMetadata.Origin)), + } + } + + remoteReq.Header.Set("Host", string(mediaMetadata.Origin)) + + client := http.Client{} + resp, err := client.Do(remoteReq) + if err != nil { + return nil, &util.JSONResponse{ + Code: 502, + JSON: jsonerror.Unknown(fmt.Sprintf("File with media ID %q could not be downloaded from %q", mediaMetadata.MediaID, mediaMetadata.Origin)), + } + } + + if resp.StatusCode != 200 { + logger.Printf("Server responded with %d\n", resp.StatusCode) + if resp.StatusCode == 404 { + return nil, &util.JSONResponse{ + Code: 404, + JSON: jsonerror.NotFound(fmt.Sprintf("File with media ID %q does not exist", mediaMetadata.MediaID)), + } + } + return nil, &util.JSONResponse{ + Code: 502, + JSON: jsonerror.Unknown(fmt.Sprintf("File with media ID %q could not be downloaded from %q", mediaMetadata.MediaID, mediaMetadata.Origin)), + } + } + + return resp, nil +} + func respondFromRemoteFile(w http.ResponseWriter, logger *log.Entry, mediaMetadata *types.MediaMetadata, cfg config.MediaAPI, db *storage.Database, activeRemoteRequests *types.ActiveRemoteRequests) { logger.WithFields(log.Fields{ "MediaID": mediaMetadata.MediaID, @@ -259,50 +301,13 @@ func respondFromRemoteFile(w http.ResponseWriter, logger *log.Entry, mediaMetada } }() - // create request for remote file - urls := getMatrixUrls(mediaMetadata.Origin) - - logger.Printf("Connecting to remote %q\n", urls[0]) - - remoteReqAddr := urls[0] + "/_matrix/media/v1/download/" + string(mediaMetadata.Origin) + "/" + string(mediaMetadata.MediaID) - remoteReq, err := http.NewRequest("GET", remoteReqAddr, nil) - if err != nil { - jsonErrorResponse(w, util.JSONResponse{ - Code: 500, - JSON: jsonerror.Unknown(fmt.Sprintf("File with media ID %q could not be downloaded from %q", mediaMetadata.MediaID, mediaMetadata.Origin)), - }, logger) - return - } - - remoteReq.Header.Set("Host", string(mediaMetadata.Origin)) - - client := http.Client{} - resp, err := client.Do(remoteReq) - if err != nil { - jsonErrorResponse(w, util.JSONResponse{ - Code: 502, - JSON: jsonerror.Unknown(fmt.Sprintf("File with media ID %q could not be downloaded from %q", mediaMetadata.MediaID, mediaMetadata.Origin)), - }, logger) + resp, errorResponse := createRemoteRequest(mediaMetadata, logger) + if errorResponse != nil { + jsonErrorResponse(w, *errorResponse, logger) return } defer resp.Body.Close() - if resp.StatusCode != 200 { - logger.Printf("Server responded with %d\n", resp.StatusCode) - if resp.StatusCode == 404 { - jsonErrorResponse(w, util.JSONResponse{ - Code: 404, - JSON: jsonerror.NotFound(fmt.Sprintf("File with media ID %q does not exist", mediaMetadata.MediaID)), - }, logger) - return - } - jsonErrorResponse(w, util.JSONResponse{ - Code: 502, - JSON: jsonerror.Unknown(fmt.Sprintf("File with media ID %q could not be downloaded from %q", mediaMetadata.MediaID, mediaMetadata.Origin)), - }, logger) - return - } - // get metadata from request and set metadata on response contentLength, err := strconv.ParseInt(resp.Header.Get("Content-Length"), 10, 64) if err != nil { From abcbb57aa16b38b173b7307b8ed41e7e1fc9cf8f Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Thu, 11 May 2017 14:31:24 +0200 Subject: [PATCH 036/108] mediaapi/writers/download: A bunch more refactoring --- .../dendrite/mediaapi/writers/download.go | 300 ++++++++++-------- 1 file changed, 165 insertions(+), 135 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index f90a876d1..dd2ed966a 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -15,6 +15,7 @@ package writers import ( + "bufio" "database/sql" "encoding/json" "fmt" @@ -277,6 +278,159 @@ func createRemoteRequest(mediaMetadata *types.MediaMetadata, logger *log.Entry) return resp, nil } +// FIXME: move to utils and use in upload as well +func createTempFileWriter(basePath types.Path, logger *log.Entry) (*bufio.Writer, *os.File, types.Path, *util.JSONResponse) { + tmpDir, err := createTempDir(basePath) + if err != nil { + logger.Infof("Failed to create temp dir %q\n", err) + return nil, nil, "", &util.JSONResponse{ + Code: 400, + JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload: %q", err)), + } + } + tmpFile, writer, err := createFileWriter(tmpDir, "content") + if err != nil { + logger.Infof("Failed to create file writer %q\n", err) + return nil, nil, "", &util.JSONResponse{ + Code: 400, + JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload: %q", err)), + } + } + return writer, tmpFile, tmpDir, nil +} + +// copyToActiveAndPassive works like io.Copy except it copies from the reader to both of the writers +// If there is an error with the reader or the active writer, that is considered an error +// If there is an error with the passive writer, that is non-critical and copying continues +// maxFileSize limits the amount of data written to the passive writer +func copyToActiveAndPassive(r io.Reader, wActive io.Writer, wPassive io.Writer, maxFileSize types.ContentLength, mediaMetadata *types.MediaMetadata, logger *log.Entry) (int64, int64, error) { + var bytesResponded, bytesWritten int64 = 0, 0 + var fetchError error + // Note: the buffer size is the same as is used in io.Copy() + buffer := make([]byte, 32*1024) + for { + // read from remote request's response body + bytesRead, readErr := r.Read(buffer) + if bytesRead > 0 { + // write to client request's response body + bytesTemp, respErr := wActive.Write(buffer[:bytesRead]) + if bytesTemp != bytesRead || (respErr != nil && respErr != io.EOF) { + logger.Errorf("bytesTemp %v != bytesRead %v : %v", bytesTemp, bytesRead, respErr) + fetchError = errResponse + break + } + bytesResponded += int64(bytesTemp) + if fetchError == nil || (fetchError != errFileIsTooLarge && fetchError != errWrite) { + // if larger than cfg.MaxFileSize then stop writing to disk and discard cached file + if bytesWritten+int64(len(buffer)) > int64(maxFileSize) { + fetchError = errFileIsTooLarge + } else { + // write to disk + bytesTemp, writeErr := wPassive.Write(buffer[:bytesRead]) + if writeErr != nil && writeErr != io.EOF { + fetchError = errWrite + } else { + bytesWritten += int64(bytesTemp) + } + } + } + } + if readErr != nil { + if readErr != io.EOF { + fetchError = errRead + } + break + } + } + + if fetchError != nil { + logFields := log.Fields{ + "MediaID": mediaMetadata.MediaID, + "Origin": mediaMetadata.Origin, + } + if fetchError == errFileIsTooLarge { + logFields["MaxFileSize"] = maxFileSize + } + logger.WithFields(logFields).Warnln(fetchError) + } + + return bytesResponded, bytesWritten, fetchError +} + +func closeConnection(w http.ResponseWriter, logger *log.Entry) { + logger.Println("Attempting to close the connection.") + hijacker, ok := w.(http.Hijacker) + if ok { + connection, _, hijackErr := hijacker.Hijack() + if hijackErr == nil { + logger.Println("Closing") + connection.Close() + } else { + logger.Printf("Error trying to hijack: %v", hijackErr) + } + } +} + +func completeRemoteRequest(activeRemoteRequests *types.ActiveRemoteRequests, mxcURL string) { + if activeRemoteRequestCondition, ok := activeRemoteRequests.Set[mxcURL]; ok { + activeRemoteRequestCondition.Broadcast() + } + delete(activeRemoteRequests.Set, mxcURL) + activeRemoteRequests.Unlock() +} + +func commitFileAndMetadata(tmpDir types.Path, basePath types.Path, mediaMetadata *types.MediaMetadata, activeRemoteRequests *types.ActiveRemoteRequests, db *storage.Database, mxcURL string, logger *log.Entry) bool { + updateActiveRemoteRequests := true + + logger.WithFields(log.Fields{ + "MediaID": mediaMetadata.MediaID, + "Origin": mediaMetadata.Origin, + "UploadName": mediaMetadata.UploadName, + "Content-Length": mediaMetadata.ContentLength, + "Content-Type": mediaMetadata.ContentType, + "Content-Disposition": mediaMetadata.ContentDisposition, + }).Infof("Storing file metadata to media repository database") + + // The database is the source of truth so we need to have moved the file first + err := moveFile( + types.Path(path.Join(string(tmpDir), "content")), + types.Path(getPathFromMediaMetadata(mediaMetadata, basePath)), + ) + if err != nil { + tmpDirErr := os.RemoveAll(string(tmpDir)) + if tmpDirErr != nil { + logger.Warnf("Failed to remove tmpDir (%v): %q\n", tmpDir, tmpDirErr) + } + return updateActiveRemoteRequests + } + + // Writing the metadata to the media repository database and removing the mxcURL from activeRemoteRequests needs to be atomic. + // If it were not atomic, a new request for the same file could come in in routine A and check the database before the INSERT. + // Routine B which was fetching could then have its INSERT complete and remove the mxcURL from the activeRemoteRequests. + // If routine A then checked the activeRemoteRequests it would think it needed to fetch the file when it's already in the database. + // The locking below mitigates this situation. + updateActiveRemoteRequests = false + activeRemoteRequests.Lock() + // FIXME: unlock after timeout of db request + // if written to disk, add to db + err = db.StoreMediaMetadata(mediaMetadata) + if err != nil { + finalDir := path.Dir(getPathFromMediaMetadata(mediaMetadata, basePath)) + finalDirErr := os.RemoveAll(finalDir) + if finalDirErr != nil { + logger.Warnf("Failed to remove finalDir (%v): %q\n", finalDir, finalDirErr) + } + completeRemoteRequest(activeRemoteRequests, mxcURL) + return updateActiveRemoteRequests + } + logger.WithFields(log.Fields{ + "Origin": mediaMetadata.Origin, + "MediaID": mediaMetadata.MediaID, + }).Infof("Signalling other goroutines waiting for us to fetch the file.") + completeRemoteRequest(activeRemoteRequests, mxcURL) + return updateActiveRemoteRequests +} + func respondFromRemoteFile(w http.ResponseWriter, logger *log.Entry, mediaMetadata *types.MediaMetadata, cfg config.MediaAPI, db *storage.Database, activeRemoteRequests *types.ActiveRemoteRequests) { logger.WithFields(log.Fields{ "MediaID": mediaMetadata.MediaID, @@ -293,14 +447,12 @@ func respondFromRemoteFile(w http.ResponseWriter, logger *log.Entry, mediaMetada defer func() { if updateActiveRemoteRequests { activeRemoteRequests.Lock() - if activeRemoteRequestCondition, ok := activeRemoteRequests.Set[mxcURL]; ok { - activeRemoteRequestCondition.Broadcast() - } - delete(activeRemoteRequests.Set, mxcURL) - activeRemoteRequests.Unlock() + // Note that completeRemoteRequest unlocks activeRemoteRequests + completeRemoteRequest(activeRemoteRequests, mxcURL) } }() + // create request for remote file resp, errorResponse := createRemoteRequest(mediaMetadata, logger) if errorResponse != nil { jsonErrorResponse(w, *errorResponse, logger) @@ -335,22 +487,9 @@ func respondFromRemoteFile(w http.ResponseWriter, logger *log.Entry, mediaMetada w.Header().Set("Content-Security-Policy", contentSecurityPolicy) // create the temporary file writer - tmpDir, err := createTempDir(cfg.BasePath) - if err != nil { - logger.Infof("Failed to create temp dir %q\n", err) - jsonErrorResponse(w, util.JSONResponse{ - Code: 400, - JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload: %q", err)), - }, logger) - return - } - tmpFile, writer, err := createFileWriter(tmpDir, "content") - if err != nil { - logger.Infof("Failed to create file writer %q\n", err) - jsonErrorResponse(w, util.JSONResponse{ - Code: 400, - JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload: %q", err)), - }, logger) + tmpFileWriter, tmpFile, tmpDir, errorResponse := createTempFileWriter(cfg.BasePath, logger) + if errorResponse != nil { + jsonErrorResponse(w, *errorResponse, logger) return } defer tmpFile.Close() @@ -364,56 +503,9 @@ func respondFromRemoteFile(w http.ResponseWriter, logger *log.Entry, mediaMetada // bytesResponded is the total number of bytes written to the response to the client request // bytesWritten is the total number of bytes written to disk - var bytesResponded, bytesWritten int64 = 0, 0 - var fetchError error - // Note: the buffer size is the same as is used in io.Copy() - buffer := make([]byte, 32*1024) - for { - // read from remote request's response body - bytesRead, readErr := resp.Body.Read(buffer) - if bytesRead > 0 { - // write to client request's response body - bytesTemp, respErr := w.Write(buffer[:bytesRead]) - if bytesTemp != bytesRead || (respErr != nil && respErr != io.EOF) { - logger.Errorf("bytesTemp %v != bytesRead %v : %v", bytesTemp, bytesRead, respErr) - fetchError = errResponse - break - } - bytesResponded += int64(bytesTemp) - if fetchError == nil || (fetchError != errFileIsTooLarge && fetchError != errWrite) { - // if larger than cfg.MaxFileSize then stop writing to disk and discard cached file - if bytesWritten+int64(len(buffer)) > int64(cfg.MaxFileSize) { - fetchError = errFileIsTooLarge - } else { - // write to disk - bytesTemp, writeErr := writer.Write(buffer[:bytesRead]) - if writeErr != nil && writeErr != io.EOF { - fetchError = errWrite - } else { - bytesWritten += int64(bytesTemp) - } - } - } - } - if readErr != nil { - if readErr != io.EOF { - fetchError = errRead - } - break - } - } - - writer.Flush() - + bytesResponded, bytesWritten, fetchError := copyToActiveAndPassive(resp.Body, w, tmpFileWriter, cfg.MaxFileSize, mediaMetadata, logger) + tmpFileWriter.Flush() if fetchError != nil { - logFields := log.Fields{ - "MediaID": mediaMetadata.MediaID, - "Origin": mediaMetadata.Origin, - } - if fetchError == errFileIsTooLarge { - logFields["MaxFileSize"] = cfg.MaxFileSize - } - logger.WithFields(logFields).Warnln(fetchError) tmpDirErr := os.RemoveAll(string(tmpDir)) if tmpDirErr != nil { logger.Warnf("Failed to remove tmpDir (%v): %q\n", tmpDir, tmpDirErr) @@ -427,17 +519,7 @@ func respondFromRemoteFile(w http.ResponseWriter, logger *log.Entry, mediaMetada } else { // We attempt to bluntly close the connection because that is the // best thing we can do after we've sent a 200 OK - logger.Println("Attempting to close the connection.") - hijacker, ok := w.(http.Hijacker) - if ok { - connection, _, hijackErr := hijacker.Hijack() - if hijackErr == nil { - logger.Println("Closing") - connection.Close() - } else { - logger.Printf("Error trying to hijack: %v", hijackErr) - } - } + closeConnection(w, logger) } return } @@ -446,6 +528,7 @@ func respondFromRemoteFile(w http.ResponseWriter, logger *log.Entry, mediaMetada // Note: After this point we have responded to the client's request and are just dealing with local caching. // As we have responded with 200 OK, any errors are ineffectual to the client request and so we just log and return. + // FIXME: Does continuing to do work here that is ineffectual to the client have any bad side effects? Could we fire off the remainder in a separate goroutine to mitigate that? // It's possible the bytesWritten to the temporary file is different to the reported Content-Length from the remote // request's response. bytesWritten is therefore used as it is what would be sent to clients when reading from the local @@ -453,60 +536,7 @@ func respondFromRemoteFile(w http.ResponseWriter, logger *log.Entry, mediaMetada mediaMetadata.ContentLength = types.ContentLength(bytesWritten) mediaMetadata.UserID = types.MatrixUserID("@:" + string(mediaMetadata.Origin)) - logger.WithFields(log.Fields{ - "MediaID": mediaMetadata.MediaID, - "Origin": mediaMetadata.Origin, - "UploadName": mediaMetadata.UploadName, - "Content-Length": mediaMetadata.ContentLength, - "Content-Type": mediaMetadata.ContentType, - "Content-Disposition": mediaMetadata.ContentDisposition, - }).Infof("Storing file metadata to media repository database") - - // The database is the source of truth so we need to have moved the file first - err = moveFile( - types.Path(path.Join(string(tmpDir), "content")), - types.Path(getPathFromMediaMetadata(mediaMetadata, cfg.BasePath)), - ) - if err != nil { - tmpDirErr := os.RemoveAll(string(tmpDir)) - if tmpDirErr != nil { - logger.Warnf("Failed to remove tmpDir (%v): %q\n", tmpDir, tmpDirErr) - } - return - } - - // Writing the metadata to the media repository database and removing the mxcURL from activeRemoteRequests needs to be atomic. - // If it were not atomic, a new request for the same file could come in in routine A and check the database before the INSERT. - // Routine B which was fetching could then have its INSERT complete and remove the mxcURL from the activeRemoteRequests. - // If routine A then checked the activeRemoteRequests it would think it needed to fetch the file when it's already in the database. - // The locking below mitigates this situation. - updateActiveRemoteRequests = false - activeRemoteRequests.Lock() - // FIXME: unlock after timeout of db request - // if written to disk, add to db - err = db.StoreMediaMetadata(mediaMetadata) - if err != nil { - finalDir := path.Dir(getPathFromMediaMetadata(mediaMetadata, cfg.BasePath)) - finalDirErr := os.RemoveAll(finalDir) - if finalDirErr != nil { - logger.Warnf("Failed to remove finalDir (%v): %q\n", finalDir, finalDirErr) - } - if activeRemoteRequestCondition, ok := activeRemoteRequests.Set[mxcURL]; ok { - activeRemoteRequestCondition.Broadcast() - } - delete(activeRemoteRequests.Set, mxcURL) - activeRemoteRequests.Unlock() - return - } - logger.WithFields(log.Fields{ - "Origin": mediaMetadata.Origin, - "MediaID": mediaMetadata.MediaID, - }).Infof("Signalling other goroutines waiting for us to fetch the file.") - if activeRemoteRequestCondition, ok := activeRemoteRequests.Set[mxcURL]; ok { - activeRemoteRequestCondition.Broadcast() - } - delete(activeRemoteRequests.Set, mxcURL) - activeRemoteRequests.Unlock() + updateActiveRemoteRequests = commitFileAndMetadata(tmpDir, cfg.BasePath, mediaMetadata, activeRemoteRequests, db, mxcURL, logger) // TODO: generate thumbnails From 074e89900080c56add0e12351de6b0021d412482 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Thu, 11 May 2017 14:39:40 +0200 Subject: [PATCH 037/108] mediaapi/writers/utils: Move and reuse createTempFileWriter --- .../dendrite/mediaapi/writers/download.go | 22 ------------------ .../dendrite/mediaapi/writers/upload.go | 11 +++------ .../dendrite/mediaapi/writers/utils.go | 23 +++++++++++++++++++ 3 files changed, 26 insertions(+), 30 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index dd2ed966a..6c6d8c2e1 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -15,7 +15,6 @@ package writers import ( - "bufio" "database/sql" "encoding/json" "fmt" @@ -278,27 +277,6 @@ func createRemoteRequest(mediaMetadata *types.MediaMetadata, logger *log.Entry) return resp, nil } -// FIXME: move to utils and use in upload as well -func createTempFileWriter(basePath types.Path, logger *log.Entry) (*bufio.Writer, *os.File, types.Path, *util.JSONResponse) { - tmpDir, err := createTempDir(basePath) - if err != nil { - logger.Infof("Failed to create temp dir %q\n", err) - return nil, nil, "", &util.JSONResponse{ - Code: 400, - JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload: %q", err)), - } - } - tmpFile, writer, err := createFileWriter(tmpDir, "content") - if err != nil { - logger.Infof("Failed to create file writer %q\n", err) - return nil, nil, "", &util.JSONResponse{ - Code: 400, - JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload: %q", err)), - } - } - return writer, tmpFile, tmpDir, nil -} - // copyToActiveAndPassive works like io.Copy except it copies from the reader to both of the writers // If there is an error with the reader or the active writer, that is considered an error // If there is an error with the passive writer, that is non-critical and copying continues diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go index 4b6fb152c..56f8c8350 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go @@ -146,15 +146,10 @@ func Upload(req *http.Request, cfg config.MediaAPI, db *storage.Database) util.J "Content-Disposition": r.MediaMetadata.ContentDisposition, }).Info("Uploading file") - tmpDir, err := createTempDir(cfg.BasePath) - if err != nil { - logger.Infof("Failed to create temp dir %q\n", err) - return util.JSONResponse{ - Code: 400, - JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload: %q", err)), - } + writer, file, tmpDir, errorResponse := createTempFileWriter(cfg.BasePath, logger) + if errorResponse != nil { + return *errorResponse } - file, writer, err := createFileWriter(tmpDir, "content") defer file.Close() // The limited reader restricts how many bytes are read from the body to the specified maximum bytes diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/utils.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/utils.go index d6f6d2cac..12a5dc2b4 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/utils.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/utils.go @@ -16,12 +16,15 @@ package writers import ( "bufio" + "fmt" "io/ioutil" "os" "path" log "github.com/Sirupsen/logrus" + "github.com/matrix-org/dendrite/clientapi/jsonerror" "github.com/matrix-org/dendrite/mediaapi/types" + "github.com/matrix-org/util" ) // createTempDir creates a tmp/ directory within baseDirectory and returns its path @@ -53,6 +56,26 @@ func createFileWriter(directory types.Path, filename types.Filename) (*os.File, return file, bufio.NewWriter(file), nil } +func createTempFileWriter(basePath types.Path, logger *log.Entry) (*bufio.Writer, *os.File, types.Path, *util.JSONResponse) { + tmpDir, err := createTempDir(basePath) + if err != nil { + logger.Infof("Failed to create temp dir %q\n", err) + return nil, nil, "", &util.JSONResponse{ + Code: 400, + JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload: %q", err)), + } + } + tmpFile, writer, err := createFileWriter(tmpDir, "content") + if err != nil { + logger.Infof("Failed to create file writer %q\n", err) + return nil, nil, "", &util.JSONResponse{ + Code: 400, + JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload: %q", err)), + } + } + return writer, tmpFile, tmpDir, nil +} + func getPathFromMediaMetadata(m *types.MediaMetadata, basePath types.Path) string { return path.Join( string(basePath), From a821b9155f42bfe2bb25be176826d885e90456c6 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Thu, 11 May 2017 14:40:35 +0200 Subject: [PATCH 038/108] mediaapi/writers/utils: Reorder createFileWriter return parameters for consistency --- .../matrix-org/dendrite/mediaapi/writers/utils.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/utils.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/utils.go index 12a5dc2b4..7b2852aa6 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/utils.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/utils.go @@ -45,7 +45,7 @@ func createTempDir(baseDirectory types.Path) (types.Path, error) { // createFileWriter creates a buffered file writer with a new file at directory/filename // Returns the file handle as it needs to be closed when writing is complete -func createFileWriter(directory types.Path, filename types.Filename) (*os.File, *bufio.Writer, error) { +func createFileWriter(directory types.Path, filename types.Filename) (*bufio.Writer, *os.File, error) { filePath := path.Join(string(directory), string(filename)) file, err := os.Create(filePath) if err != nil { @@ -53,7 +53,7 @@ func createFileWriter(directory types.Path, filename types.Filename) (*os.File, return nil, nil, err } - return file, bufio.NewWriter(file), nil + return bufio.NewWriter(file), file, nil } func createTempFileWriter(basePath types.Path, logger *log.Entry) (*bufio.Writer, *os.File, types.Path, *util.JSONResponse) { @@ -65,7 +65,7 @@ func createTempFileWriter(basePath types.Path, logger *log.Entry) (*bufio.Writer JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload: %q", err)), } } - tmpFile, writer, err := createFileWriter(tmpDir, "content") + writer, tmpFile, err := createFileWriter(tmpDir, "content") if err != nil { logger.Infof("Failed to create file writer %q\n", err) return nil, nil, "", &util.JSONResponse{ From 937162a72210355f26db387c648b45916b88f1bc Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Wed, 17 May 2017 13:29:35 +0200 Subject: [PATCH 039/108] mediaapi/routing: Pass pointer to config struct instead of copying --- .../matrix-org/dendrite/cmd/dendrite-media-api-server/main.go | 2 +- src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go | 2 +- src/github.com/matrix-org/dendrite/mediaapi/writers/download.go | 2 +- src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go b/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go index 3f7994f4c..d00bbd49c 100644 --- a/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go +++ b/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go @@ -39,7 +39,7 @@ func main() { log.Panic("No BIND_ADDRESS environment variable found.") } - cfg := config.MediaAPI{ + cfg := &config.MediaAPI{ ServerName: "localhost", BasePath: "/Users/robertsw/dendrite", MaxFileSize: 10 * 1024 * 1024, diff --git a/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go b/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go index 7b525984e..4c505ffdc 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go @@ -31,7 +31,7 @@ const pathPrefixR0 = "/_matrix/media/v1" // Setup registers HTTP handlers with the given ServeMux. It also supplies the given http.Client // to clients which need to make outbound HTTP requests. -func Setup(servMux *http.ServeMux, httpClient *http.Client, cfg config.MediaAPI, db *storage.Database) { +func Setup(servMux *http.ServeMux, httpClient *http.Client, cfg *config.MediaAPI, db *storage.Database) { apiMux := mux.NewRouter() r0mux := apiMux.PathPrefix(pathPrefixR0).Subrouter() r0mux.Handle("/upload", make("upload", util.NewJSONRequestHandler(func(req *http.Request) util.JSONResponse { diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index 6c6d8c2e1..d93945e4b 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -89,7 +89,7 @@ var nTries = 5 // If they are present in the cache, they are served directly. // If they are not present in the cache, they are obtained from the remote server and // simultaneously served back to the client and written into the cache. -func Download(w http.ResponseWriter, req *http.Request, origin types.ServerName, mediaID types.MediaID, cfg config.MediaAPI, db *storage.Database, activeRemoteRequests *types.ActiveRemoteRequests) { +func Download(w http.ResponseWriter, req *http.Request, origin types.ServerName, mediaID types.MediaID, cfg *config.MediaAPI, db *storage.Database, activeRemoteRequests *types.ActiveRemoteRequests) { logger := util.GetLogger(req.Context()) // request validation diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go index 56f8c8350..c1db78ca7 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go @@ -100,7 +100,7 @@ type uploadResponse struct { // This implementation supports a configurable maximum file size limit in bytes. If a user tries to upload more than this, they will receive an error that their upload is too large. // Uploaded files are processed piece-wise to avoid DoS attacks which would starve the server of memory. // TODO: Requests time out if they have not received any data within the configured timeout period. -func Upload(req *http.Request, cfg config.MediaAPI, db *storage.Database) util.JSONResponse { +func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util.JSONResponse { logger := util.GetLogger(req.Context()) if req.Method != "POST" { From 6e24fb86cb77d0f57d46a03ef6c25b956fca05d9 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Wed, 17 May 2017 16:13:54 +0200 Subject: [PATCH 040/108] mediaapi/writer/download: Make functions into methods and use MediaMetadata --- .../dendrite/mediaapi/writers/download.go | 128 +++++++++--------- 1 file changed, 64 insertions(+), 64 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index d93945e4b..981e0cc0d 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -118,7 +118,7 @@ func Download(w http.ResponseWriter, req *http.Request, origin types.ServerName, if err == nil { // If we have a record, we can respond from the local file - respondFromLocalFile(w, logger, r.MediaMetadata, cfg) + r.respondFromLocalFile(w, logger, cfg) return } else if err == sql.ErrNoRows && r.MediaMetadata.Origin != cfg.ServerName { // If we do not have a record and the origin is remote, we need to fetch it and respond with that file @@ -133,7 +133,7 @@ func Download(w http.ResponseWriter, req *http.Request, origin types.ServerName, err = db.GetMediaMetadata(r.MediaMetadata.MediaID, r.MediaMetadata.Origin, r.MediaMetadata) if err == nil { // If we have a record, we can respond from the local file - respondFromLocalFile(w, logger, r.MediaMetadata, cfg) + r.respondFromLocalFile(w, logger, cfg) activeRemoteRequests.Unlock() return } @@ -167,7 +167,7 @@ func Download(w http.ResponseWriter, req *http.Request, origin types.ServerName, } } - respondFromRemoteFile(w, logger, r.MediaMetadata, cfg, db, activeRemoteRequests) + r.respondFromRemoteFile(w, logger, cfg, db, activeRemoteRequests) } else { // If we do not have a record and the origin is local, or if we have another error from the database, the file is not found jsonErrorResponse(w, util.JSONResponse{ @@ -177,24 +177,24 @@ func Download(w http.ResponseWriter, req *http.Request, origin types.ServerName, } } -func respondFromLocalFile(w http.ResponseWriter, logger *log.Entry, mediaMetadata *types.MediaMetadata, cfg config.MediaAPI) { +func respondFromLocalFile(w http.ResponseWriter, logger *log.Entry, cfg config.MediaAPI) { logger.WithFields(log.Fields{ - "MediaID": mediaMetadata.MediaID, - "Origin": mediaMetadata.Origin, - "UploadName": mediaMetadata.UploadName, - "Content-Length": mediaMetadata.ContentLength, - "Content-Type": mediaMetadata.ContentType, - "Content-Disposition": mediaMetadata.ContentDisposition, + "MediaID": r.MediaMetadata.MediaID, + "Origin": r.MediaMetadata.Origin, + "UploadName": r.MediaMetadata.UploadName, + "Content-Length": r.MediaMetadata.ContentLength, + "Content-Type": r.MediaMetadata.ContentType, + "Content-Disposition": r.MediaMetadata.ContentDisposition, }).Infof("Downloading file") - filePath := getPathFromMediaMetadata(mediaMetadata, cfg.BasePath) + filePath := getPathFromMediaMetadata(r.MediaMetadata, cfg.BasePath) file, err := os.Open(filePath) if err != nil { // FIXME: Remove erroneous file from database? jsonErrorResponse(w, util.JSONResponse{ Code: 404, - JSON: jsonerror.NotFound(fmt.Sprintf("File with media ID %q does not exist", mediaMetadata.MediaID)), - }, logger) + JSON: jsonerror.NotFound(fmt.Sprintf("File with media ID %q does not exist", r.MediaMetadata.MediaID)), + }) return } @@ -203,17 +203,17 @@ func respondFromLocalFile(w http.ResponseWriter, logger *log.Entry, mediaMetadat // FIXME: Remove erroneous file from database? jsonErrorResponse(w, util.JSONResponse{ Code: 404, - JSON: jsonerror.NotFound(fmt.Sprintf("File with media ID %q does not exist", mediaMetadata.MediaID)), + JSON: jsonerror.NotFound(fmt.Sprintf("File with media ID %q does not exist", r.MediaMetadata.MediaID)), }, logger) return } - if mediaMetadata.ContentLength > 0 && int64(mediaMetadata.ContentLength) != stat.Size() { - logger.Warnf("File size in database (%v) and on disk (%v) differ.", mediaMetadata.ContentLength, stat.Size()) + if r.MediaMetadata.ContentLength > 0 && int64(r.MediaMetadata.ContentLength) != stat.Size() { + logger.Warnf("File size in database (%v) and on disk (%v) differ.", r.MediaMetadata.ContentLength, stat.Size()) // FIXME: Remove erroneous file from database? } - w.Header().Set("Content-Type", string(mediaMetadata.ContentType)) + w.Header().Set("Content-Type", string(r.MediaMetadata.ContentType)) w.Header().Set("Content-Length", strconv.FormatInt(stat.Size(), 10)) contentSecurityPolicy := "default-src 'none';" + " script-src 'none';" + @@ -227,7 +227,7 @@ func respondFromLocalFile(w http.ResponseWriter, logger *log.Entry, mediaMetadat if bytesResponded == 0 { jsonErrorResponse(w, util.JSONResponse{ Code: 500, - JSON: jsonerror.NotFound(fmt.Sprintf("Failed to respond with file with media ID %q", mediaMetadata.MediaID)), + JSON: jsonerror.NotFound(fmt.Sprintf("Failed to respond with file with media ID %q", r.MediaMetadata.MediaID)), }, logger) } // If we have written any data then we have already responded with 200 OK and all we can do is close the connection @@ -235,28 +235,28 @@ func respondFromLocalFile(w http.ResponseWriter, logger *log.Entry, mediaMetadat } } -func createRemoteRequest(mediaMetadata *types.MediaMetadata, logger *log.Entry) (*http.Response, *util.JSONResponse) { - urls := getMatrixUrls(mediaMetadata.Origin) +func (r *downloadRequest) createRemoteRequest(logger *log.Entry) (*http.Response, *util.JSONResponse) { + urls := getMatrixUrls(r.MediaMetadata.Origin) logger.Printf("Connecting to remote %q\n", urls[0]) - remoteReqAddr := urls[0] + "/_matrix/media/v1/download/" + string(mediaMetadata.Origin) + "/" + string(mediaMetadata.MediaID) + remoteReqAddr := urls[0] + "/_matrix/media/v1/download/" + string(r.MediaMetadata.Origin) + "/" + string(r.MediaMetadata.MediaID) remoteReq, err := http.NewRequest("GET", remoteReqAddr, nil) if err != nil { return nil, &util.JSONResponse{ Code: 500, - JSON: jsonerror.Unknown(fmt.Sprintf("File with media ID %q could not be downloaded from %q", mediaMetadata.MediaID, mediaMetadata.Origin)), + JSON: jsonerror.Unknown(fmt.Sprintf("File with media ID %q could not be downloaded from %q", r.MediaMetadata.MediaID, r.MediaMetadata.Origin)), } } - remoteReq.Header.Set("Host", string(mediaMetadata.Origin)) + remoteReq.Header.Set("Host", string(r.MediaMetadata.Origin)) client := http.Client{} resp, err := client.Do(remoteReq) if err != nil { return nil, &util.JSONResponse{ Code: 502, - JSON: jsonerror.Unknown(fmt.Sprintf("File with media ID %q could not be downloaded from %q", mediaMetadata.MediaID, mediaMetadata.Origin)), + JSON: jsonerror.Unknown(fmt.Sprintf("File with media ID %q could not be downloaded from %q", r.MediaMetadata.MediaID, r.MediaMetadata.Origin)), } } @@ -265,12 +265,12 @@ func createRemoteRequest(mediaMetadata *types.MediaMetadata, logger *log.Entry) if resp.StatusCode == 404 { return nil, &util.JSONResponse{ Code: 404, - JSON: jsonerror.NotFound(fmt.Sprintf("File with media ID %q does not exist", mediaMetadata.MediaID)), + JSON: jsonerror.NotFound(fmt.Sprintf("File with media ID %q does not exist", r.MediaMetadata.MediaID)), } } return nil, &util.JSONResponse{ Code: 502, - JSON: jsonerror.Unknown(fmt.Sprintf("File with media ID %q could not be downloaded from %q", mediaMetadata.MediaID, mediaMetadata.Origin)), + JSON: jsonerror.Unknown(fmt.Sprintf("File with media ID %q could not be downloaded from %q", r.MediaMetadata.MediaID, r.MediaMetadata.Origin)), } } @@ -357,22 +357,22 @@ func completeRemoteRequest(activeRemoteRequests *types.ActiveRemoteRequests, mxc activeRemoteRequests.Unlock() } -func commitFileAndMetadata(tmpDir types.Path, basePath types.Path, mediaMetadata *types.MediaMetadata, activeRemoteRequests *types.ActiveRemoteRequests, db *storage.Database, mxcURL string, logger *log.Entry) bool { +func (r *downloadRequest) commitFileAndMetadata(tmpDir types.Path, basePath types.Path, activeRemoteRequests *types.ActiveRemoteRequests, db *storage.Database, mxcURL string, logger *log.Entry) bool { updateActiveRemoteRequests := true logger.WithFields(log.Fields{ - "MediaID": mediaMetadata.MediaID, - "Origin": mediaMetadata.Origin, - "UploadName": mediaMetadata.UploadName, - "Content-Length": mediaMetadata.ContentLength, - "Content-Type": mediaMetadata.ContentType, - "Content-Disposition": mediaMetadata.ContentDisposition, + "MediaID": r.MediaMetadata.MediaID, + "Origin": r.MediaMetadata.Origin, + "UploadName": r.MediaMetadata.UploadName, + "Content-Length": r.MediaMetadata.ContentLength, + "Content-Type": r.MediaMetadata.ContentType, + "Content-Disposition": r.MediaMetadata.ContentDisposition, }).Infof("Storing file metadata to media repository database") // The database is the source of truth so we need to have moved the file first err := moveFile( types.Path(path.Join(string(tmpDir), "content")), - types.Path(getPathFromMediaMetadata(mediaMetadata, basePath)), + types.Path(getPathFromMediaMetadata(r.MediaMetadata, basePath)), ) if err != nil { tmpDirErr := os.RemoveAll(string(tmpDir)) @@ -391,9 +391,9 @@ func commitFileAndMetadata(tmpDir types.Path, basePath types.Path, mediaMetadata activeRemoteRequests.Lock() // FIXME: unlock after timeout of db request // if written to disk, add to db - err = db.StoreMediaMetadata(mediaMetadata) + err = db.StoreMediaMetadata(r.MediaMetadata) if err != nil { - finalDir := path.Dir(getPathFromMediaMetadata(mediaMetadata, basePath)) + finalDir := path.Dir(getPathFromMediaMetadata(r.MediaMetadata, basePath)) finalDirErr := os.RemoveAll(finalDir) if finalDirErr != nil { logger.Warnf("Failed to remove finalDir (%v): %q\n", finalDir, finalDirErr) @@ -402,20 +402,20 @@ func commitFileAndMetadata(tmpDir types.Path, basePath types.Path, mediaMetadata return updateActiveRemoteRequests } logger.WithFields(log.Fields{ - "Origin": mediaMetadata.Origin, - "MediaID": mediaMetadata.MediaID, + "Origin": r.MediaMetadata.Origin, + "MediaID": r.MediaMetadata.MediaID, }).Infof("Signalling other goroutines waiting for us to fetch the file.") completeRemoteRequest(activeRemoteRequests, mxcURL) return updateActiveRemoteRequests } -func respondFromRemoteFile(w http.ResponseWriter, logger *log.Entry, mediaMetadata *types.MediaMetadata, cfg config.MediaAPI, db *storage.Database, activeRemoteRequests *types.ActiveRemoteRequests) { +func (r *downloadRequest) respondFromRemoteFile(w http.ResponseWriter, logger *log.Entry, cfg config.MediaAPI, db *storage.Database, activeRemoteRequests *types.ActiveRemoteRequests) { logger.WithFields(log.Fields{ - "MediaID": mediaMetadata.MediaID, - "Origin": mediaMetadata.Origin, + "MediaID": r.MediaMetadata.MediaID, + "Origin": r.MediaMetadata.Origin, }).Infof("Fetching remote file") - mxcURL := "mxc://" + string(mediaMetadata.Origin) + "/" + string(mediaMetadata.MediaID) + mxcURL := "mxc://" + string(r.MediaMetadata.Origin) + "/" + string(r.MediaMetadata.MediaID) // If we hit an error and we return early, we need to lock, broadcast on the condition, delete the condition and unlock. // If we return normally we have slightly different locking around the storage of metadata to the database and deletion of the condition. @@ -431,7 +431,7 @@ func respondFromRemoteFile(w http.ResponseWriter, logger *log.Entry, mediaMetada }() // create request for remote file - resp, errorResponse := createRemoteRequest(mediaMetadata, logger) + resp, errorResponse := r.createRemoteRequest(logger) if errorResponse != nil { jsonErrorResponse(w, *errorResponse, logger) return @@ -443,20 +443,20 @@ func respondFromRemoteFile(w http.ResponseWriter, logger *log.Entry, mediaMetada if err != nil { logger.Warn("Failed to parse content length") } - mediaMetadata.ContentLength = types.ContentLength(contentLength) + r.MediaMetadata.ContentLength = types.ContentLength(contentLength) - mediaMetadata.ContentType = types.ContentType(resp.Header.Get("Content-Type")) - mediaMetadata.ContentDisposition = types.ContentDisposition(resp.Header.Get("Content-Disposition")) + r.MediaMetadata.ContentType = types.ContentType(resp.Header.Get("Content-Type")) + r.MediaMetadata.ContentDisposition = types.ContentDisposition(resp.Header.Get("Content-Disposition")) // FIXME: parse from Content-Disposition header if possible, else fall back - //mediaMetadata.UploadName = types.Filename() + //r.MediaMetadata.UploadName = types.Filename() logger.WithFields(log.Fields{ - "MediaID": mediaMetadata.MediaID, - "Origin": mediaMetadata.Origin, + "MediaID": r.MediaMetadata.MediaID, + "Origin": r.MediaMetadata.Origin, }).Infof("Connected to remote") - w.Header().Set("Content-Type", string(mediaMetadata.ContentType)) - w.Header().Set("Content-Length", strconv.FormatInt(int64(mediaMetadata.ContentLength), 10)) + w.Header().Set("Content-Type", string(r.MediaMetadata.ContentType)) + w.Header().Set("Content-Length", strconv.FormatInt(int64(r.MediaMetadata.ContentLength), 10)) contentSecurityPolicy := "default-src 'none';" + " script-src 'none';" + " plugin-types application/pdf;" + @@ -475,13 +475,13 @@ func respondFromRemoteFile(w http.ResponseWriter, logger *log.Entry, mediaMetada // read the remote request's response body // simultaneously write it to the incoming request's response body and the temporary file logger.WithFields(log.Fields{ - "MediaID": mediaMetadata.MediaID, - "Origin": mediaMetadata.Origin, + "MediaID": r.MediaMetadata.MediaID, + "Origin": r.MediaMetadata.Origin, }).Infof("Proxying and caching remote file") // bytesResponded is the total number of bytes written to the response to the client request // bytesWritten is the total number of bytes written to disk - bytesResponded, bytesWritten, fetchError := copyToActiveAndPassive(resp.Body, w, tmpFileWriter, cfg.MaxFileSize, mediaMetadata, logger) + bytesResponded, bytesWritten, fetchError := copyToActiveAndPassive(resp.Body, w, tmpFileWriter, cfg.MaxFileSize, r.MediaMetadata, logger) tmpFileWriter.Flush() if fetchError != nil { tmpDirErr := os.RemoveAll(string(tmpDir)) @@ -492,7 +492,7 @@ func respondFromRemoteFile(w http.ResponseWriter, logger *log.Entry, mediaMetada if bytesResponded < 1 { jsonErrorResponse(w, util.JSONResponse{ Code: 502, - JSON: jsonerror.Unknown(fmt.Sprintf("File with media ID %q could not be downloaded from %q", mediaMetadata.MediaID, mediaMetadata.Origin)), + JSON: jsonerror.Unknown(fmt.Sprintf("File with media ID %q could not be downloaded from %q", r.MediaMetadata.MediaID, r.MediaMetadata.Origin)), }, logger) } else { // We attempt to bluntly close the connection because that is the @@ -511,20 +511,20 @@ func respondFromRemoteFile(w http.ResponseWriter, logger *log.Entry, mediaMetada // It's possible the bytesWritten to the temporary file is different to the reported Content-Length from the remote // request's response. bytesWritten is therefore used as it is what would be sent to clients when reading from the local // file. - mediaMetadata.ContentLength = types.ContentLength(bytesWritten) - mediaMetadata.UserID = types.MatrixUserID("@:" + string(mediaMetadata.Origin)) + r.MediaMetadata.ContentLength = types.ContentLength(bytesWritten) + r.MediaMetadata.UserID = types.MatrixUserID("@:" + string(r.MediaMetadata.Origin)) - updateActiveRemoteRequests = commitFileAndMetadata(tmpDir, cfg.BasePath, mediaMetadata, activeRemoteRequests, db, mxcURL, logger) + updateActiveRemoteRequests = r.commitFileAndMetadata(tmpDir, cfg.BasePath, activeRemoteRequests, db, mxcURL, logger) // TODO: generate thumbnails logger.WithFields(log.Fields{ - "MediaID": mediaMetadata.MediaID, - "Origin": mediaMetadata.Origin, - "UploadName": mediaMetadata.UploadName, - "Content-Length": mediaMetadata.ContentLength, - "Content-Type": mediaMetadata.ContentType, - "Content-Disposition": mediaMetadata.ContentDisposition, + "MediaID": r.MediaMetadata.MediaID, + "Origin": r.MediaMetadata.Origin, + "UploadName": r.MediaMetadata.UploadName, + "Content-Length": r.MediaMetadata.ContentLength, + "Content-Type": r.MediaMetadata.ContentType, + "Content-Disposition": r.MediaMetadata.ContentDisposition, }).Infof("Remote file cached") } From 13b1051a3e23ef578d3db1c3ba2ef52f49dd0c27 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Wed, 17 May 2017 16:24:30 +0200 Subject: [PATCH 041/108] mediaapi/writers/download: Add logger to downloadRequest and use it --- .../dendrite/mediaapi/writers/download.go | 126 +++++++++--------- 1 file changed, 63 insertions(+), 63 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index 981e0cc0d..8481cd05c 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -39,10 +39,11 @@ import ( // https://matrix.org/docs/spec/client_server/r0.2.0.html#post-matrix-media-r0-download type downloadRequest struct { MediaMetadata *types.MediaMetadata + Logger *log.Entry } // Validate validates the downloadRequest fields -func (r downloadRequest) Validate() *util.JSONResponse { +func (r *downloadRequest) Validate() *util.JSONResponse { // FIXME: the following errors aren't bad JSON, rather just a bad request path // maybe give the URL pattern in the routing, these are not even possible as the handler would not be hit...? if r.MediaMetadata.MediaID == "" { @@ -60,11 +61,11 @@ func (r downloadRequest) Validate() *util.JSONResponse { return nil } -func jsonErrorResponse(w http.ResponseWriter, res util.JSONResponse, logger *log.Entry) { +func (r *downloadRequest) jsonErrorResponse(w http.ResponseWriter, res util.JSONResponse) { // Marshal JSON response into raw bytes to send as the HTTP body resBytes, err := json.Marshal(res.JSON) if err != nil { - logger.WithError(err).Error("Failed to marshal JSONResponse") + r.Logger.WithError(err).Error("Failed to marshal JSONResponse") // this should never fail to be marshalled so drop err to the floor res = util.MessageResponse(500, "Internal Server Error") resBytes, _ = json.Marshal(res.JSON) @@ -72,7 +73,7 @@ func jsonErrorResponse(w http.ResponseWriter, res util.JSONResponse, logger *log // Set status code and write the body w.WriteHeader(res.Code) - logger.WithField("code", res.Code).Infof("Responding (%d bytes)", len(resBytes)) + r.Logger.WithField("code", res.Code).Infof("Responding (%d bytes)", len(resBytes)) w.Write(resBytes) } @@ -90,26 +91,25 @@ var nTries = 5 // If they are not present in the cache, they are obtained from the remote server and // simultaneously served back to the client and written into the cache. func Download(w http.ResponseWriter, req *http.Request, origin types.ServerName, mediaID types.MediaID, cfg *config.MediaAPI, db *storage.Database, activeRemoteRequests *types.ActiveRemoteRequests) { - logger := util.GetLogger(req.Context()) - - // request validation - if req.Method != "GET" { - jsonErrorResponse(w, util.JSONResponse{ - Code: 405, - JSON: jsonerror.Unknown("request method must be GET"), - }, logger) - return - } - r := &downloadRequest{ MediaMetadata: &types.MediaMetadata{ MediaID: mediaID, Origin: origin, }, + Logger: util.GetLogger(req.Context()), + } + + // request validation + if req.Method != "GET" { + r.jsonErrorResponse(w, util.JSONResponse{ + Code: 405, + JSON: jsonerror.Unknown("request method must be GET"), + }) + return } if resErr := r.Validate(); resErr != nil { - jsonErrorResponse(w, *resErr, logger) + r.jsonErrorResponse(w, *resErr) return } @@ -118,7 +118,7 @@ func Download(w http.ResponseWriter, req *http.Request, origin types.ServerName, if err == nil { // If we have a record, we can respond from the local file - r.respondFromLocalFile(w, logger, cfg) + r.respondFromLocalFile(w, cfg) return } else if err == sql.ErrNoRows && r.MediaMetadata.Origin != cfg.ServerName { // If we do not have a record and the origin is remote, we need to fetch it and respond with that file @@ -133,31 +133,31 @@ func Download(w http.ResponseWriter, req *http.Request, origin types.ServerName, err = db.GetMediaMetadata(r.MediaMetadata.MediaID, r.MediaMetadata.Origin, r.MediaMetadata) if err == nil { // If we have a record, we can respond from the local file - r.respondFromLocalFile(w, logger, cfg) + r.respondFromLocalFile(w, cfg) activeRemoteRequests.Unlock() return } if activeRemoteRequestCondition, ok := activeRemoteRequests.Set[mxcURL]; ok { if tries >= nTries { - logger.WithFields(log.Fields{ + r.Logger.WithFields(log.Fields{ "MediaID": r.MediaMetadata.MediaID, "Origin": r.MediaMetadata.Origin, }).Warnf("Other goroutines are trying to download the remote file and failing.") - jsonErrorResponse(w, util.JSONResponse{ + r.jsonErrorResponse(w, util.JSONResponse{ Code: 500, JSON: jsonerror.Unknown(fmt.Sprintf("File with media ID %q could not be downloaded from %q", r.MediaMetadata.MediaID, r.MediaMetadata.Origin)), - }, logger) + }) activeRemoteRequests.Unlock() return } - logger.WithFields(log.Fields{ + r.Logger.WithFields(log.Fields{ "Origin": r.MediaMetadata.Origin, "MediaID": r.MediaMetadata.MediaID, }).Infof("Waiting for another goroutine to fetch the remote file.") activeRemoteRequestCondition.Wait() activeRemoteRequests.Unlock() } else { - logger.WithFields(log.Fields{ + r.Logger.WithFields(log.Fields{ "MediaID": r.MediaMetadata.MediaID, "Origin": r.MediaMetadata.Origin, }).Infof("Fetching remote file") @@ -167,18 +167,18 @@ func Download(w http.ResponseWriter, req *http.Request, origin types.ServerName, } } - r.respondFromRemoteFile(w, logger, cfg, db, activeRemoteRequests) + r.respondFromRemoteFile(w, cfg, db, activeRemoteRequests) } else { // If we do not have a record and the origin is local, or if we have another error from the database, the file is not found - jsonErrorResponse(w, util.JSONResponse{ + r.jsonErrorResponse(w, util.JSONResponse{ Code: 404, JSON: jsonerror.NotFound(fmt.Sprintf("File with media ID %q does not exist", r.MediaMetadata.MediaID)), - }, logger) + }) } } -func respondFromLocalFile(w http.ResponseWriter, logger *log.Entry, cfg config.MediaAPI) { - logger.WithFields(log.Fields{ +func (r *downloadRequest) respondFromLocalFile(w http.ResponseWriter, cfg config.MediaAPI) { + r.Logger.WithFields(log.Fields{ "MediaID": r.MediaMetadata.MediaID, "Origin": r.MediaMetadata.Origin, "UploadName": r.MediaMetadata.UploadName, @@ -191,7 +191,7 @@ func respondFromLocalFile(w http.ResponseWriter, logger *log.Entry, cfg config.M file, err := os.Open(filePath) if err != nil { // FIXME: Remove erroneous file from database? - jsonErrorResponse(w, util.JSONResponse{ + r.jsonErrorResponse(w, util.JSONResponse{ Code: 404, JSON: jsonerror.NotFound(fmt.Sprintf("File with media ID %q does not exist", r.MediaMetadata.MediaID)), }) @@ -201,15 +201,15 @@ func respondFromLocalFile(w http.ResponseWriter, logger *log.Entry, cfg config.M stat, err := file.Stat() if err != nil { // FIXME: Remove erroneous file from database? - jsonErrorResponse(w, util.JSONResponse{ + r.jsonErrorResponse(w, util.JSONResponse{ Code: 404, JSON: jsonerror.NotFound(fmt.Sprintf("File with media ID %q does not exist", r.MediaMetadata.MediaID)), - }, logger) + }) return } if r.MediaMetadata.ContentLength > 0 && int64(r.MediaMetadata.ContentLength) != stat.Size() { - logger.Warnf("File size in database (%v) and on disk (%v) differ.", r.MediaMetadata.ContentLength, stat.Size()) + r.Logger.Warnf("File size in database (%v) and on disk (%v) differ.", r.MediaMetadata.ContentLength, stat.Size()) // FIXME: Remove erroneous file from database? } @@ -223,22 +223,22 @@ func respondFromLocalFile(w http.ResponseWriter, logger *log.Entry, cfg config.M w.Header().Set("Content-Security-Policy", contentSecurityPolicy) if bytesResponded, err := io.Copy(w, file); err != nil { - logger.Warnf("Failed to copy from cache %v\n", err) + r.Logger.Warnf("Failed to copy from cache %v\n", err) if bytesResponded == 0 { - jsonErrorResponse(w, util.JSONResponse{ + r.jsonErrorResponse(w, util.JSONResponse{ Code: 500, JSON: jsonerror.NotFound(fmt.Sprintf("Failed to respond with file with media ID %q", r.MediaMetadata.MediaID)), - }, logger) + }) } // If we have written any data then we have already responded with 200 OK and all we can do is close the connection return } } -func (r *downloadRequest) createRemoteRequest(logger *log.Entry) (*http.Response, *util.JSONResponse) { +func (r *downloadRequest) createRemoteRequest() (*http.Response, *util.JSONResponse) { urls := getMatrixUrls(r.MediaMetadata.Origin) - logger.Printf("Connecting to remote %q\n", urls[0]) + r.Logger.Printf("Connecting to remote %q\n", urls[0]) remoteReqAddr := urls[0] + "/_matrix/media/v1/download/" + string(r.MediaMetadata.Origin) + "/" + string(r.MediaMetadata.MediaID) remoteReq, err := http.NewRequest("GET", remoteReqAddr, nil) @@ -261,7 +261,7 @@ func (r *downloadRequest) createRemoteRequest(logger *log.Entry) (*http.Response } if resp.StatusCode != 200 { - logger.Printf("Server responded with %d\n", resp.StatusCode) + r.Logger.Printf("Server responded with %d\n", resp.StatusCode) if resp.StatusCode == 404 { return nil, &util.JSONResponse{ Code: 404, @@ -335,16 +335,16 @@ func copyToActiveAndPassive(r io.Reader, wActive io.Writer, wPassive io.Writer, return bytesResponded, bytesWritten, fetchError } -func closeConnection(w http.ResponseWriter, logger *log.Entry) { - logger.Println("Attempting to close the connection.") +func (r *downloadRequest) closeConnection(w http.ResponseWriter) { + r.Logger.Println("Attempting to close the connection.") hijacker, ok := w.(http.Hijacker) if ok { connection, _, hijackErr := hijacker.Hijack() if hijackErr == nil { - logger.Println("Closing") + r.Logger.Println("Closing") connection.Close() } else { - logger.Printf("Error trying to hijack: %v", hijackErr) + r.Logger.Printf("Error trying to hijack: %v", hijackErr) } } } @@ -357,10 +357,10 @@ func completeRemoteRequest(activeRemoteRequests *types.ActiveRemoteRequests, mxc activeRemoteRequests.Unlock() } -func (r *downloadRequest) commitFileAndMetadata(tmpDir types.Path, basePath types.Path, activeRemoteRequests *types.ActiveRemoteRequests, db *storage.Database, mxcURL string, logger *log.Entry) bool { +func (r *downloadRequest) commitFileAndMetadata(tmpDir types.Path, basePath types.Path, activeRemoteRequests *types.ActiveRemoteRequests, db *storage.Database, mxcURL string) bool { updateActiveRemoteRequests := true - logger.WithFields(log.Fields{ + r.Logger.WithFields(log.Fields{ "MediaID": r.MediaMetadata.MediaID, "Origin": r.MediaMetadata.Origin, "UploadName": r.MediaMetadata.UploadName, @@ -377,7 +377,7 @@ func (r *downloadRequest) commitFileAndMetadata(tmpDir types.Path, basePath type if err != nil { tmpDirErr := os.RemoveAll(string(tmpDir)) if tmpDirErr != nil { - logger.Warnf("Failed to remove tmpDir (%v): %q\n", tmpDir, tmpDirErr) + r.Logger.Warnf("Failed to remove tmpDir (%v): %q\n", tmpDir, tmpDirErr) } return updateActiveRemoteRequests } @@ -396,12 +396,12 @@ func (r *downloadRequest) commitFileAndMetadata(tmpDir types.Path, basePath type finalDir := path.Dir(getPathFromMediaMetadata(r.MediaMetadata, basePath)) finalDirErr := os.RemoveAll(finalDir) if finalDirErr != nil { - logger.Warnf("Failed to remove finalDir (%v): %q\n", finalDir, finalDirErr) + r.Logger.Warnf("Failed to remove finalDir (%v): %q\n", finalDir, finalDirErr) } completeRemoteRequest(activeRemoteRequests, mxcURL) return updateActiveRemoteRequests } - logger.WithFields(log.Fields{ + r.Logger.WithFields(log.Fields{ "Origin": r.MediaMetadata.Origin, "MediaID": r.MediaMetadata.MediaID, }).Infof("Signalling other goroutines waiting for us to fetch the file.") @@ -409,8 +409,8 @@ func (r *downloadRequest) commitFileAndMetadata(tmpDir types.Path, basePath type return updateActiveRemoteRequests } -func (r *downloadRequest) respondFromRemoteFile(w http.ResponseWriter, logger *log.Entry, cfg config.MediaAPI, db *storage.Database, activeRemoteRequests *types.ActiveRemoteRequests) { - logger.WithFields(log.Fields{ +func (r *downloadRequest) respondFromRemoteFile(w http.ResponseWriter, cfg config.MediaAPI, db *storage.Database, activeRemoteRequests *types.ActiveRemoteRequests) { + r.Logger.WithFields(log.Fields{ "MediaID": r.MediaMetadata.MediaID, "Origin": r.MediaMetadata.Origin, }).Infof("Fetching remote file") @@ -431,9 +431,9 @@ func (r *downloadRequest) respondFromRemoteFile(w http.ResponseWriter, logger *l }() // create request for remote file - resp, errorResponse := r.createRemoteRequest(logger) + resp, errorResponse := r.createRemoteRequest() if errorResponse != nil { - jsonErrorResponse(w, *errorResponse, logger) + r.jsonErrorResponse(w, *errorResponse) return } defer resp.Body.Close() @@ -441,7 +441,7 @@ func (r *downloadRequest) respondFromRemoteFile(w http.ResponseWriter, logger *l // get metadata from request and set metadata on response contentLength, err := strconv.ParseInt(resp.Header.Get("Content-Length"), 10, 64) if err != nil { - logger.Warn("Failed to parse content length") + r.Logger.Warn("Failed to parse content length") } r.MediaMetadata.ContentLength = types.ContentLength(contentLength) @@ -450,7 +450,7 @@ func (r *downloadRequest) respondFromRemoteFile(w http.ResponseWriter, logger *l // FIXME: parse from Content-Disposition header if possible, else fall back //r.MediaMetadata.UploadName = types.Filename() - logger.WithFields(log.Fields{ + r.Logger.WithFields(log.Fields{ "MediaID": r.MediaMetadata.MediaID, "Origin": r.MediaMetadata.Origin, }).Infof("Connected to remote") @@ -465,39 +465,39 @@ func (r *downloadRequest) respondFromRemoteFile(w http.ResponseWriter, logger *l w.Header().Set("Content-Security-Policy", contentSecurityPolicy) // create the temporary file writer - tmpFileWriter, tmpFile, tmpDir, errorResponse := createTempFileWriter(cfg.BasePath, logger) + tmpFileWriter, tmpFile, tmpDir, errorResponse := createTempFileWriter(cfg.BasePath, r.Logger) if errorResponse != nil { - jsonErrorResponse(w, *errorResponse, logger) + r.jsonErrorResponse(w, *errorResponse) return } defer tmpFile.Close() // read the remote request's response body // simultaneously write it to the incoming request's response body and the temporary file - logger.WithFields(log.Fields{ + r.Logger.WithFields(log.Fields{ "MediaID": r.MediaMetadata.MediaID, "Origin": r.MediaMetadata.Origin, }).Infof("Proxying and caching remote file") // bytesResponded is the total number of bytes written to the response to the client request // bytesWritten is the total number of bytes written to disk - bytesResponded, bytesWritten, fetchError := copyToActiveAndPassive(resp.Body, w, tmpFileWriter, cfg.MaxFileSize, r.MediaMetadata, logger) + bytesResponded, bytesWritten, fetchError := copyToActiveAndPassive(resp.Body, w, tmpFileWriter, cfg.MaxFileSize, r.MediaMetadata, r.Logger) tmpFileWriter.Flush() if fetchError != nil { tmpDirErr := os.RemoveAll(string(tmpDir)) if tmpDirErr != nil { - logger.Warnf("Failed to remove tmpDir (%v): %q\n", tmpDir, tmpDirErr) + r.Logger.Warnf("Failed to remove tmpDir (%v): %q\n", tmpDir, tmpDirErr) } // Note: if we have responded with any data in the body at all then we have already sent 200 OK and we can only abort at this point if bytesResponded < 1 { - jsonErrorResponse(w, util.JSONResponse{ + r.jsonErrorResponse(w, util.JSONResponse{ Code: 502, JSON: jsonerror.Unknown(fmt.Sprintf("File with media ID %q could not be downloaded from %q", r.MediaMetadata.MediaID, r.MediaMetadata.Origin)), - }, logger) + }) } else { // We attempt to bluntly close the connection because that is the // best thing we can do after we've sent a 200 OK - closeConnection(w, logger) + r.closeConnection(w) } return } @@ -514,11 +514,11 @@ func (r *downloadRequest) respondFromRemoteFile(w http.ResponseWriter, logger *l r.MediaMetadata.ContentLength = types.ContentLength(bytesWritten) r.MediaMetadata.UserID = types.MatrixUserID("@:" + string(r.MediaMetadata.Origin)) - updateActiveRemoteRequests = r.commitFileAndMetadata(tmpDir, cfg.BasePath, activeRemoteRequests, db, mxcURL, logger) + updateActiveRemoteRequests = r.commitFileAndMetadata(tmpDir, cfg.BasePath, activeRemoteRequests, db, mxcURL) // TODO: generate thumbnails - logger.WithFields(log.Fields{ + r.Logger.WithFields(log.Fields{ "MediaID": r.MediaMetadata.MediaID, "Origin": r.MediaMetadata.Origin, "UploadName": r.MediaMetadata.UploadName, From 19ffa4d75aba41f0ff5f7c7f422e229bec1b497d Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Wed, 17 May 2017 16:30:01 +0200 Subject: [PATCH 042/108] mediaapi/writers/download: respondFromLocalFile only needs basePath --- .../matrix-org/dendrite/mediaapi/writers/download.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index 8481cd05c..29bfa28c2 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -118,7 +118,7 @@ func Download(w http.ResponseWriter, req *http.Request, origin types.ServerName, if err == nil { // If we have a record, we can respond from the local file - r.respondFromLocalFile(w, cfg) + r.respondFromLocalFile(w, cfg.BasePath) return } else if err == sql.ErrNoRows && r.MediaMetadata.Origin != cfg.ServerName { // If we do not have a record and the origin is remote, we need to fetch it and respond with that file @@ -133,7 +133,7 @@ func Download(w http.ResponseWriter, req *http.Request, origin types.ServerName, err = db.GetMediaMetadata(r.MediaMetadata.MediaID, r.MediaMetadata.Origin, r.MediaMetadata) if err == nil { // If we have a record, we can respond from the local file - r.respondFromLocalFile(w, cfg) + r.respondFromLocalFile(w, cfg.BasePath) activeRemoteRequests.Unlock() return } @@ -177,7 +177,7 @@ func Download(w http.ResponseWriter, req *http.Request, origin types.ServerName, } } -func (r *downloadRequest) respondFromLocalFile(w http.ResponseWriter, cfg config.MediaAPI) { +func (r *downloadRequest) respondFromLocalFile(w http.ResponseWriter, basePath types.Path) { r.Logger.WithFields(log.Fields{ "MediaID": r.MediaMetadata.MediaID, "Origin": r.MediaMetadata.Origin, @@ -187,7 +187,7 @@ func (r *downloadRequest) respondFromLocalFile(w http.ResponseWriter, cfg config "Content-Disposition": r.MediaMetadata.ContentDisposition, }).Infof("Downloading file") - filePath := getPathFromMediaMetadata(r.MediaMetadata, cfg.BasePath) + filePath := getPathFromMediaMetadata(r.MediaMetadata, basePath) file, err := os.Open(filePath) if err != nil { // FIXME: Remove erroneous file from database? From 9f8458716d4eaeb93c749dcefa304293d4b52200 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Wed, 17 May 2017 16:37:11 +0200 Subject: [PATCH 043/108] mediaapi/writers/download: Pass config members to respondFromRemoteFile --- .../matrix-org/dendrite/mediaapi/writers/download.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index 29bfa28c2..732824078 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -167,7 +167,7 @@ func Download(w http.ResponseWriter, req *http.Request, origin types.ServerName, } } - r.respondFromRemoteFile(w, cfg, db, activeRemoteRequests) + r.respondFromRemoteFile(w, cfg.BasePath, cfg.MaxFileSize, db, activeRemoteRequests) } else { // If we do not have a record and the origin is local, or if we have another error from the database, the file is not found r.jsonErrorResponse(w, util.JSONResponse{ @@ -409,7 +409,7 @@ func (r *downloadRequest) commitFileAndMetadata(tmpDir types.Path, basePath type return updateActiveRemoteRequests } -func (r *downloadRequest) respondFromRemoteFile(w http.ResponseWriter, cfg config.MediaAPI, db *storage.Database, activeRemoteRequests *types.ActiveRemoteRequests) { +func (r *downloadRequest) respondFromRemoteFile(w http.ResponseWriter, basePath types.Path, maxFileSize types.ContentLength, db *storage.Database, activeRemoteRequests *types.ActiveRemoteRequests) { r.Logger.WithFields(log.Fields{ "MediaID": r.MediaMetadata.MediaID, "Origin": r.MediaMetadata.Origin, @@ -465,7 +465,7 @@ func (r *downloadRequest) respondFromRemoteFile(w http.ResponseWriter, cfg confi w.Header().Set("Content-Security-Policy", contentSecurityPolicy) // create the temporary file writer - tmpFileWriter, tmpFile, tmpDir, errorResponse := createTempFileWriter(cfg.BasePath, r.Logger) + tmpFileWriter, tmpFile, tmpDir, errorResponse := createTempFileWriter(basePath, r.Logger) if errorResponse != nil { r.jsonErrorResponse(w, *errorResponse) return @@ -481,7 +481,7 @@ func (r *downloadRequest) respondFromRemoteFile(w http.ResponseWriter, cfg confi // bytesResponded is the total number of bytes written to the response to the client request // bytesWritten is the total number of bytes written to disk - bytesResponded, bytesWritten, fetchError := copyToActiveAndPassive(resp.Body, w, tmpFileWriter, cfg.MaxFileSize, r.MediaMetadata, r.Logger) + bytesResponded, bytesWritten, fetchError := copyToActiveAndPassive(resp.Body, w, tmpFileWriter, maxFileSize, r.MediaMetadata, r.Logger) tmpFileWriter.Flush() if fetchError != nil { tmpDirErr := os.RemoveAll(string(tmpDir)) @@ -514,7 +514,7 @@ func (r *downloadRequest) respondFromRemoteFile(w http.ResponseWriter, cfg confi r.MediaMetadata.ContentLength = types.ContentLength(bytesWritten) r.MediaMetadata.UserID = types.MatrixUserID("@:" + string(r.MediaMetadata.Origin)) - updateActiveRemoteRequests = r.commitFileAndMetadata(tmpDir, cfg.BasePath, activeRemoteRequests, db, mxcURL) + updateActiveRemoteRequests = r.commitFileAndMetadata(tmpDir, basePath, activeRemoteRequests, db, mxcURL) // TODO: generate thumbnails From a8b7130745657710b7fd9328de10ea76e06d4b90 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Wed, 17 May 2017 16:39:01 +0200 Subject: [PATCH 044/108] mediaapi/writers/download: Clean up copyToActiveAndPassive --- .../dendrite/mediaapi/writers/download.go | 40 +++++++++---------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index 732824078..9a60401af 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -281,9 +281,9 @@ func (r *downloadRequest) createRemoteRequest() (*http.Response, *util.JSONRespo // If there is an error with the reader or the active writer, that is considered an error // If there is an error with the passive writer, that is non-critical and copying continues // maxFileSize limits the amount of data written to the passive writer -func copyToActiveAndPassive(r io.Reader, wActive io.Writer, wPassive io.Writer, maxFileSize types.ContentLength, mediaMetadata *types.MediaMetadata, logger *log.Entry) (int64, int64, error) { +func copyToActiveAndPassive(r io.Reader, wActive io.Writer, wPassive io.Writer, maxFileSize types.ContentLength, mediaMetadata *types.MediaMetadata) (int64, int64, error) { var bytesResponded, bytesWritten int64 = 0, 0 - var fetchError error + var copyError error // Note: the buffer size is the same as is used in io.Copy() buffer := make([]byte, 32*1024) for { @@ -293,20 +293,19 @@ func copyToActiveAndPassive(r io.Reader, wActive io.Writer, wPassive io.Writer, // write to client request's response body bytesTemp, respErr := wActive.Write(buffer[:bytesRead]) if bytesTemp != bytesRead || (respErr != nil && respErr != io.EOF) { - logger.Errorf("bytesTemp %v != bytesRead %v : %v", bytesTemp, bytesRead, respErr) - fetchError = errResponse + copyError = errResponse break } bytesResponded += int64(bytesTemp) - if fetchError == nil || (fetchError != errFileIsTooLarge && fetchError != errWrite) { - // if larger than cfg.MaxFileSize then stop writing to disk and discard cached file + if copyError == nil || (copyError != errFileIsTooLarge && copyError != errWrite) { + // if larger than maxFileSize then stop writing to disk and discard cached file if bytesWritten+int64(len(buffer)) > int64(maxFileSize) { - fetchError = errFileIsTooLarge + copyError = errFileIsTooLarge } else { // write to disk bytesTemp, writeErr := wPassive.Write(buffer[:bytesRead]) if writeErr != nil && writeErr != io.EOF { - fetchError = errWrite + copyError = errWrite } else { bytesWritten += int64(bytesTemp) } @@ -315,24 +314,13 @@ func copyToActiveAndPassive(r io.Reader, wActive io.Writer, wPassive io.Writer, } if readErr != nil { if readErr != io.EOF { - fetchError = errRead + copyError = errRead } break } } - if fetchError != nil { - logFields := log.Fields{ - "MediaID": mediaMetadata.MediaID, - "Origin": mediaMetadata.Origin, - } - if fetchError == errFileIsTooLarge { - logFields["MaxFileSize"] = maxFileSize - } - logger.WithFields(logFields).Warnln(fetchError) - } - - return bytesResponded, bytesWritten, fetchError + return bytesResponded, bytesWritten, copyError } func (r *downloadRequest) closeConnection(w http.ResponseWriter) { @@ -481,9 +469,17 @@ func (r *downloadRequest) respondFromRemoteFile(w http.ResponseWriter, basePath // bytesResponded is the total number of bytes written to the response to the client request // bytesWritten is the total number of bytes written to disk - bytesResponded, bytesWritten, fetchError := copyToActiveAndPassive(resp.Body, w, tmpFileWriter, maxFileSize, r.MediaMetadata, r.Logger) + bytesResponded, bytesWritten, fetchError := copyToActiveAndPassive(resp.Body, w, tmpFileWriter, maxFileSize, r.MediaMetadata) tmpFileWriter.Flush() if fetchError != nil { + logFields := log.Fields{ + "MediaID": r.MediaMetadata.MediaID, + "Origin": r.MediaMetadata.Origin, + } + if fetchError == errFileIsTooLarge { + logFields["MaxFileSize"] = maxFileSize + } + r.Logger.WithFields(logFields).Warnln(fetchError) tmpDirErr := os.RemoveAll(string(tmpDir)) if tmpDirErr != nil { r.Logger.Warnf("Failed to remove tmpDir (%v): %q\n", tmpDir, tmpDirErr) From c43aa20adc7922340a4733c765aef23306c42a54 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Thu, 18 May 2017 08:56:54 +0200 Subject: [PATCH 045/108] mediaapi/writers/download: Remove duplicate log message --- .../matrix-org/dendrite/mediaapi/writers/download.go | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index 9a60401af..d1ee42d3c 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -157,10 +157,6 @@ func Download(w http.ResponseWriter, req *http.Request, origin types.ServerName, activeRemoteRequestCondition.Wait() activeRemoteRequests.Unlock() } else { - r.Logger.WithFields(log.Fields{ - "MediaID": r.MediaMetadata.MediaID, - "Origin": r.MediaMetadata.Origin, - }).Infof("Fetching remote file") activeRemoteRequests.Set[mxcURL] = &sync.Cond{L: activeRemoteRequests} activeRemoteRequests.Unlock() break From eddc3e0789f22b9ef2114ede04855b1a96a206b8 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Thu, 18 May 2017 08:57:29 +0200 Subject: [PATCH 046/108] mediaapi/writers/download: Log warnings about why we return 404 --- .../matrix-org/dendrite/mediaapi/writers/download.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index d1ee42d3c..83ea70225 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -166,6 +166,7 @@ func Download(w http.ResponseWriter, req *http.Request, origin types.ServerName, r.respondFromRemoteFile(w, cfg.BasePath, cfg.MaxFileSize, db, activeRemoteRequests) } else { // If we do not have a record and the origin is local, or if we have another error from the database, the file is not found + r.Logger.Warnln("Failed to look up file in database:", err) r.jsonErrorResponse(w, util.JSONResponse{ Code: 404, JSON: jsonerror.NotFound(fmt.Sprintf("File with media ID %q does not exist", r.MediaMetadata.MediaID)), @@ -187,6 +188,7 @@ func (r *downloadRequest) respondFromLocalFile(w http.ResponseWriter, basePath t file, err := os.Open(filePath) if err != nil { // FIXME: Remove erroneous file from database? + r.Logger.Warnln("Failed to open file:", err) r.jsonErrorResponse(w, util.JSONResponse{ Code: 404, JSON: jsonerror.NotFound(fmt.Sprintf("File with media ID %q does not exist", r.MediaMetadata.MediaID)), @@ -197,6 +199,7 @@ func (r *downloadRequest) respondFromLocalFile(w http.ResponseWriter, basePath t stat, err := file.Stat() if err != nil { // FIXME: Remove erroneous file from database? + r.Logger.Warnln("Failed to stat file:", err) r.jsonErrorResponse(w, util.JSONResponse{ Code: 404, JSON: jsonerror.NotFound(fmt.Sprintf("File with media ID %q does not exist", r.MediaMetadata.MediaID)), @@ -259,6 +262,7 @@ func (r *downloadRequest) createRemoteRequest() (*http.Response, *util.JSONRespo if resp.StatusCode != 200 { r.Logger.Printf("Server responded with %d\n", resp.StatusCode) if resp.StatusCode == 404 { + r.Logger.Warnln("Remote server says file does not exist") return nil, &util.JSONResponse{ Code: 404, JSON: jsonerror.NotFound(fmt.Sprintf("File with media ID %q does not exist", r.MediaMetadata.MediaID)), From 8cf507f85f1ba9feae40b2aae4dee00da02070f6 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Thu, 18 May 2017 09:04:36 +0200 Subject: [PATCH 047/108] mediaapi/writers: Never return server errors to user but log them --- .../matrix-org/dendrite/mediaapi/writers/upload.go | 10 ++++++---- .../matrix-org/dendrite/mediaapi/writers/utils.go | 4 ++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go index c1db78ca7..4640e4ed4 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go @@ -160,14 +160,14 @@ func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util. bytesWritten, err := io.Copy(writer, reader) if err != nil { - logger.Infof("Failed to copy %q\n", err) + logger.Warnf("Failed to copy %q\n", err) tmpDirErr := os.RemoveAll(string(tmpDir)) if tmpDirErr != nil { logger.Warnf("Failed to remove tmpDir (%v): %q\n", tmpDir, tmpDirErr) } return util.JSONResponse{ Code: 400, - JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload: %q", err)), + JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload")), } } @@ -210,13 +210,14 @@ func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util. err = db.StoreMediaMetadata(r.MediaMetadata) if err != nil { + logger.Warnf("Failed to store metadata: %q\n", err) tmpDirErr := os.RemoveAll(string(tmpDir)) if tmpDirErr != nil { logger.Warnf("Failed to remove tmpDir (%v): %q\n", tmpDir, tmpDirErr) } return util.JSONResponse{ Code: 400, - JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload: %q", err)), + JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload")), } } @@ -225,13 +226,14 @@ func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util. types.Path(getPathFromMediaMetadata(r.MediaMetadata, cfg.BasePath)), ) if err != nil { + logger.Warnf("Failed to move file to final destination: %q\n", err) tmpDirErr := os.RemoveAll(string(tmpDir)) if tmpDirErr != nil { logger.Warnf("Failed to remove tmpDir (%v): %q\n", tmpDir, tmpDirErr) } return util.JSONResponse{ Code: 400, - JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload: %q", err)), + JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload")), } } diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/utils.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/utils.go index 7b2852aa6..9317404b6 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/utils.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/utils.go @@ -62,7 +62,7 @@ func createTempFileWriter(basePath types.Path, logger *log.Entry) (*bufio.Writer logger.Infof("Failed to create temp dir %q\n", err) return nil, nil, "", &util.JSONResponse{ Code: 400, - JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload: %q", err)), + JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload")), } } writer, tmpFile, err := createFileWriter(tmpDir, "content") @@ -70,7 +70,7 @@ func createTempFileWriter(basePath types.Path, logger *log.Entry) (*bufio.Writer logger.Infof("Failed to create file writer %q\n", err) return nil, nil, "", &util.JSONResponse{ Code: 400, - JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload: %q", err)), + JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload")), } } return writer, tmpFile, tmpDir, nil From b80d5ab9195c4602f3b9d2ec1bf58e3ede45a2cf Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Thu, 18 May 2017 09:12:01 +0200 Subject: [PATCH 048/108] cmd/mediaapi-integration-tests: Test downloading same file 100 times Spawns a GET request for the same file in 100 parallel go routines and prints the body (which is some error JSON) in case of not 200 OK. Also prints the number of successful requests. This of course should take command line arguments for the URL and number of requests but that can be done as soon as needed. --- .../cmd/mediaapi-integration-tests/main.go | 68 +++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 src/github.com/matrix-org/dendrite/cmd/mediaapi-integration-tests/main.go diff --git a/src/github.com/matrix-org/dendrite/cmd/mediaapi-integration-tests/main.go b/src/github.com/matrix-org/dendrite/cmd/mediaapi-integration-tests/main.go new file mode 100644 index 000000000..eac779eb0 --- /dev/null +++ b/src/github.com/matrix-org/dendrite/cmd/mediaapi-integration-tests/main.go @@ -0,0 +1,68 @@ +// Copyright 2017 Vector Creations Ltd +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "fmt" + "io/ioutil" + "log" + "net/http" + "sync" +) + +const origin = "matrix.org" +const mediaID = "rZOBfBHnuOoyqBKUIHAaSbcM" + +const requestCount = 100 + +func main() { + httpURL := "http://localhost:7777/api/_matrix/media/v1/download/" + origin + "/" + mediaID + jsonResponses := make(chan string) + + var wg sync.WaitGroup + + wg.Add(requestCount) + + for i := 0; i < requestCount; i++ { + go func() { + defer wg.Done() + res, err := http.Get(httpURL) + if err != nil { + log.Fatal(err) + } else { + defer res.Body.Close() + body, err := ioutil.ReadAll(res.Body) + if err != nil { + log.Fatal(err) + } else { + if res.StatusCode != 200 { + jsonResponses <- string(body) + } + } + } + }() + } + + errorCount := 0 + go func() { + for response := range jsonResponses { + errorCount++ + fmt.Println(response) + } + }() + + wg.Wait() + fmt.Printf("%v/%v requests were successful\n", requestCount-errorCount, requestCount) +} From 5348b64edc01ea23739e4b88d23fd652e49c361f Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Thu, 18 May 2017 10:17:11 +0200 Subject: [PATCH 049/108] mediaapi/writers/download: Reduce complexity of copyToActiveAndPassive --- .../matrix-org/dendrite/mediaapi/writers/download.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index 83ea70225..ac8aaff01 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -297,7 +297,10 @@ func copyToActiveAndPassive(r io.Reader, wActive io.Writer, wPassive io.Writer, break } bytesResponded += int64(bytesTemp) - if copyError == nil || (copyError != errFileIsTooLarge && copyError != errWrite) { + if copyError == nil { + // Note: if we get here then copyError != errFileIsTooLarge && copyError != errWrite + // as if copyError == errResponse || copyError == errWrite then we would have broken + // out of the loop and there are no other cases // if larger than maxFileSize then stop writing to disk and discard cached file if bytesWritten+int64(len(buffer)) > int64(maxFileSize) { copyError = errFileIsTooLarge From f28235c05d48bc419ba6ee2b234ff5130243849a Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Thu, 18 May 2017 11:07:03 +0200 Subject: [PATCH 050/108] mediaapi/writers/upload: Factor out removeDir Reduces complexity of Upload. Note that we never care about the error from os.RemoveAll() beyond logging as we are already in an error case. --- .../dendrite/mediaapi/writers/upload.go | 27 ++++++++----------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go index 4640e4ed4..cdebc6461 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go @@ -94,6 +94,13 @@ type uploadResponse struct { ContentURI string `json:"content_uri"` } +func removeDir(dir types.Path, logger *log.Entry) { + dirErr := os.RemoveAll(string(dir)) + if dirErr != nil { + logger.Warnf("Failed to remove directory (%v): %q\n", dir, dirErr) + } +} + // Upload implements /upload // // This endpoint involves uploading potentially significant amounts of data to the homeserver. @@ -161,10 +168,7 @@ func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util. bytesWritten, err := io.Copy(writer, reader) if err != nil { logger.Warnf("Failed to copy %q\n", err) - tmpDirErr := os.RemoveAll(string(tmpDir)) - if tmpDirErr != nil { - logger.Warnf("Failed to remove tmpDir (%v): %q\n", tmpDir, tmpDirErr) - } + removeDir(tmpDir, logger) return util.JSONResponse{ Code: 400, JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload")), @@ -192,10 +196,7 @@ func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util. // check if we already have a record of the media in our database and if so, we can remove the temporary directory err = db.GetMediaMetadata(r.MediaMetadata.MediaID, r.MediaMetadata.Origin, &types.MediaMetadata{}) if err == nil { - tmpDirErr := os.RemoveAll(string(tmpDir)) - if tmpDirErr != nil { - logger.Warnf("Failed to remove tmpDir (%v): %q\n", tmpDir, tmpDirErr) - } + removeDir(tmpDir, logger) return util.JSONResponse{ Code: 200, JSON: uploadResponse{ @@ -211,10 +212,7 @@ func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util. err = db.StoreMediaMetadata(r.MediaMetadata) if err != nil { logger.Warnf("Failed to store metadata: %q\n", err) - tmpDirErr := os.RemoveAll(string(tmpDir)) - if tmpDirErr != nil { - logger.Warnf("Failed to remove tmpDir (%v): %q\n", tmpDir, tmpDirErr) - } + removeDir(tmpDir, logger) return util.JSONResponse{ Code: 400, JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload")), @@ -227,10 +225,7 @@ func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util. ) if err != nil { logger.Warnf("Failed to move file to final destination: %q\n", err) - tmpDirErr := os.RemoveAll(string(tmpDir)) - if tmpDirErr != nil { - logger.Warnf("Failed to remove tmpDir (%v): %q\n", tmpDir, tmpDirErr) - } + removeDir(tmpDir, logger) return util.JSONResponse{ Code: 400, JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload")), From 3f904e1cdb6198d4a4b5f343d8f048148aef1660 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Thu, 18 May 2017 11:09:09 +0200 Subject: [PATCH 051/108] mediaapi/writer/upload: Remove unnecessary logic --- src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go index cdebc6461..fc0c3602a 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go @@ -203,7 +203,7 @@ func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util. ContentURI: fmt.Sprintf("mxc://%s/%s", cfg.ServerName, r.MediaMetadata.MediaID), }, } - } else if err != nil && err != sql.ErrNoRows { + } else if err != sql.ErrNoRows { logger.Warnf("Failed to query database for %v: %q", r.MediaMetadata.MediaID, err) } From deee6f84c7cbdbcc612eb6c9ef09a533893a265e Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Thu, 18 May 2017 11:10:41 +0200 Subject: [PATCH 052/108] mediaapi/writers/upload: Move file first as db is source of truth The database is the source of truth. If we add the metadata to the database and it succeeds, and then the file fails to be moved, we think we have a file when we actually don't. --- .../dendrite/mediaapi/writers/upload.go | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go index fc0c3602a..37749ca10 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go @@ -209,9 +209,14 @@ func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util. // TODO: generate thumbnails - err = db.StoreMediaMetadata(r.MediaMetadata) + finalPath := getPathFromMediaMetadata(r.MediaMetadata, cfg.BasePath) + + err = moveFile( + types.Path(path.Join(string(tmpDir), "content")), + types.Path(finalPath), + ) if err != nil { - logger.Warnf("Failed to store metadata: %q\n", err) + logger.Warnf("Failed to move file to final destination: %q\n", err) removeDir(tmpDir, logger) return util.JSONResponse{ Code: 400, @@ -219,13 +224,10 @@ func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util. } } - err = moveFile( - types.Path(path.Join(string(tmpDir), "content")), - types.Path(getPathFromMediaMetadata(r.MediaMetadata, cfg.BasePath)), - ) + err = db.StoreMediaMetadata(r.MediaMetadata) if err != nil { - logger.Warnf("Failed to move file to final destination: %q\n", err) - removeDir(tmpDir, logger) + logger.Warnf("Failed to store metadata: %q\n", err) + removeDir(types.Path(path.Dir(finalPath)), logger) return util.JSONResponse{ Code: 400, JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload")), From ff3009ffdda4bce3bc0357209ebe1e12a8593a84 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Thu, 18 May 2017 11:32:30 +0200 Subject: [PATCH 053/108] cmd/dendrite-media-api-server: Add SERVER_NAME configuration --- .../dendrite/cmd/dendrite-media-api-server/main.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go b/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go index d00bbd49c..58191a8c2 100644 --- a/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go +++ b/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go @@ -22,6 +22,7 @@ import ( "github.com/matrix-org/dendrite/mediaapi/config" "github.com/matrix-org/dendrite/mediaapi/routing" "github.com/matrix-org/dendrite/mediaapi/storage" + "github.com/matrix-org/dendrite/mediaapi/types" log "github.com/Sirupsen/logrus" ) @@ -30,6 +31,7 @@ var ( bindAddr = os.Getenv("BIND_ADDRESS") dataSource = os.Getenv("DATABASE") logDir = os.Getenv("LOG_DIR") + serverName = os.Getenv("SERVER_NAME") ) func main() { @@ -39,8 +41,12 @@ func main() { log.Panic("No BIND_ADDRESS environment variable found.") } + if serverName == "" { + serverName = "localhost" + } + cfg := &config.MediaAPI{ - ServerName: "localhost", + ServerName: types.ServerName(serverName), BasePath: "/Users/robertsw/dendrite", MaxFileSize: 10 * 1024 * 1024, DataSource: dataSource, From 35a0b5d2e99677bd7c6bac31f835c55d1df211f4 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Thu, 18 May 2017 11:34:01 +0200 Subject: [PATCH 054/108] cmd/dendrite-media-api-server: Add BASE_PATH configuration --- .../dendrite/cmd/dendrite-media-api-server/main.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go b/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go index 58191a8c2..3895348db 100644 --- a/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go +++ b/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go @@ -32,6 +32,7 @@ var ( dataSource = os.Getenv("DATABASE") logDir = os.Getenv("LOG_DIR") serverName = os.Getenv("SERVER_NAME") + basePath = os.Getenv("BASE_PATH") ) func main() { @@ -40,6 +41,9 @@ func main() { if bindAddr == "" { log.Panic("No BIND_ADDRESS environment variable found.") } + if basePath == "" { + log.Panic("No BASE_PATH environment variable found.") + } if serverName == "" { serverName = "localhost" @@ -47,7 +51,7 @@ func main() { cfg := &config.MediaAPI{ ServerName: types.ServerName(serverName), - BasePath: "/Users/robertsw/dendrite", + BasePath: types.Path(basePath), MaxFileSize: 10 * 1024 * 1024, DataSource: dataSource, } From 846aece163f9ea145e6544568d8e46c6206d53ed Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Thu, 18 May 2017 11:36:26 +0200 Subject: [PATCH 055/108] mediaapi: MaxFileSize -> MaxFileSizeBytes --- .../cmd/dendrite-media-api-server/main.go | 8 ++++---- .../dendrite/mediaapi/config/config.go | 2 +- .../dendrite/mediaapi/writers/download.go | 16 ++++++++-------- .../dendrite/mediaapi/writers/upload.go | 10 +++++----- 4 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go b/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go index 3895348db..3dcf74b12 100644 --- a/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go +++ b/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go @@ -50,10 +50,10 @@ func main() { } cfg := &config.MediaAPI{ - ServerName: types.ServerName(serverName), - BasePath: types.Path(basePath), - MaxFileSize: 10 * 1024 * 1024, - DataSource: dataSource, + ServerName: types.ServerName(serverName), + BasePath: types.Path(basePath), + MaxFileSizeBytes: 10 * 1024 * 1024, + DataSource: dataSource, } db, err := storage.Open(cfg.DataSource) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/config/config.go b/src/github.com/matrix-org/dendrite/mediaapi/config/config.go index 2002cd86a..060964d27 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/config/config.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/config/config.go @@ -24,7 +24,7 @@ type MediaAPI struct { BasePath types.Path `yaml:"base_path"` // The maximum file size in bytes that is allowed to be stored on this server. // Note that remote files larger than this can still be proxied to a client, they will just not be cached. - MaxFileSize types.ContentLength `yaml:"base_path"` + MaxFileSizeBytes types.ContentLength `yaml:"base_path"` // The postgres connection config for connecting to the database e.g a postgres:// URI DataSource string `yaml:"database"` } diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index ac8aaff01..e72fcae07 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -163,7 +163,7 @@ func Download(w http.ResponseWriter, req *http.Request, origin types.ServerName, } } - r.respondFromRemoteFile(w, cfg.BasePath, cfg.MaxFileSize, db, activeRemoteRequests) + r.respondFromRemoteFile(w, cfg.BasePath, cfg.MaxFileSizeBytes, db, activeRemoteRequests) } else { // If we do not have a record and the origin is local, or if we have another error from the database, the file is not found r.Logger.Warnln("Failed to look up file in database:", err) @@ -280,8 +280,8 @@ func (r *downloadRequest) createRemoteRequest() (*http.Response, *util.JSONRespo // copyToActiveAndPassive works like io.Copy except it copies from the reader to both of the writers // If there is an error with the reader or the active writer, that is considered an error // If there is an error with the passive writer, that is non-critical and copying continues -// maxFileSize limits the amount of data written to the passive writer -func copyToActiveAndPassive(r io.Reader, wActive io.Writer, wPassive io.Writer, maxFileSize types.ContentLength, mediaMetadata *types.MediaMetadata) (int64, int64, error) { +// maxFileSizeBytes limits the amount of data written to the passive writer +func copyToActiveAndPassive(r io.Reader, wActive io.Writer, wPassive io.Writer, maxFileSizeBytes types.ContentLength, mediaMetadata *types.MediaMetadata) (int64, int64, error) { var bytesResponded, bytesWritten int64 = 0, 0 var copyError error // Note: the buffer size is the same as is used in io.Copy() @@ -301,8 +301,8 @@ func copyToActiveAndPassive(r io.Reader, wActive io.Writer, wPassive io.Writer, // Note: if we get here then copyError != errFileIsTooLarge && copyError != errWrite // as if copyError == errResponse || copyError == errWrite then we would have broken // out of the loop and there are no other cases - // if larger than maxFileSize then stop writing to disk and discard cached file - if bytesWritten+int64(len(buffer)) > int64(maxFileSize) { + // if larger than maxFileSizeBytes then stop writing to disk and discard cached file + if bytesWritten+int64(len(buffer)) > int64(maxFileSizeBytes) { copyError = errFileIsTooLarge } else { // write to disk @@ -400,7 +400,7 @@ func (r *downloadRequest) commitFileAndMetadata(tmpDir types.Path, basePath type return updateActiveRemoteRequests } -func (r *downloadRequest) respondFromRemoteFile(w http.ResponseWriter, basePath types.Path, maxFileSize types.ContentLength, db *storage.Database, activeRemoteRequests *types.ActiveRemoteRequests) { +func (r *downloadRequest) respondFromRemoteFile(w http.ResponseWriter, basePath types.Path, maxFileSizeBytes types.ContentLength, db *storage.Database, activeRemoteRequests *types.ActiveRemoteRequests) { r.Logger.WithFields(log.Fields{ "MediaID": r.MediaMetadata.MediaID, "Origin": r.MediaMetadata.Origin, @@ -472,7 +472,7 @@ func (r *downloadRequest) respondFromRemoteFile(w http.ResponseWriter, basePath // bytesResponded is the total number of bytes written to the response to the client request // bytesWritten is the total number of bytes written to disk - bytesResponded, bytesWritten, fetchError := copyToActiveAndPassive(resp.Body, w, tmpFileWriter, maxFileSize, r.MediaMetadata) + bytesResponded, bytesWritten, fetchError := copyToActiveAndPassive(resp.Body, w, tmpFileWriter, maxFileSizeBytes, r.MediaMetadata) tmpFileWriter.Flush() if fetchError != nil { logFields := log.Fields{ @@ -480,7 +480,7 @@ func (r *downloadRequest) respondFromRemoteFile(w http.ResponseWriter, basePath "Origin": r.MediaMetadata.Origin, } if fetchError == errFileIsTooLarge { - logFields["MaxFileSize"] = maxFileSize + logFields["MaxFileSizeBytes"] = maxFileSizeBytes } r.Logger.WithFields(logFields).Warnln(fetchError) tmpDirErr := os.RemoveAll(string(tmpDir)) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go index 37749ca10..a5085d595 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go @@ -43,7 +43,7 @@ type uploadRequest struct { } // Validate validates the uploadRequest fields -func (r uploadRequest) Validate(maxFileSize types.ContentLength) *util.JSONResponse { +func (r uploadRequest) Validate(maxFileSizeBytes types.ContentLength) *util.JSONResponse { // TODO: Any validation to be done on ContentDisposition? if r.MediaMetadata.ContentLength < 1 { @@ -52,10 +52,10 @@ func (r uploadRequest) Validate(maxFileSize types.ContentLength) *util.JSONRespo JSON: jsonerror.Unknown("HTTP Content-Length request header must be greater than zero."), } } - if maxFileSize > 0 && r.MediaMetadata.ContentLength > maxFileSize { + if maxFileSizeBytes > 0 && r.MediaMetadata.ContentLength > maxFileSizeBytes { return &util.JSONResponse{ Code: 400, - JSON: jsonerror.Unknown(fmt.Sprintf("HTTP Content-Length is greater than the maximum allowed upload size (%v).", maxFileSize)), + JSON: jsonerror.Unknown(fmt.Sprintf("HTTP Content-Length is greater than the maximum allowed upload size (%v).", maxFileSizeBytes)), } } // TODO: Check if the Content-Type is a valid type? @@ -135,7 +135,7 @@ func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util. }, } - if resErr = r.Validate(cfg.MaxFileSize); resErr != nil { + if resErr = r.Validate(cfg.MaxFileSizeBytes); resErr != nil { return *resErr } @@ -161,7 +161,7 @@ func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util. // The limited reader restricts how many bytes are read from the body to the specified maximum bytes // Note: the golang HTTP server closes the request body - limitedBody := io.LimitReader(req.Body, int64(cfg.MaxFileSize)) + limitedBody := io.LimitReader(req.Body, int64(cfg.MaxFileSizeBytes)) hasher := sha256.New() reader := io.TeeReader(limitedBody, hasher) From 7727a8c61e3042b13e48980cb96b369b3184d5f9 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Thu, 18 May 2017 11:44:48 +0200 Subject: [PATCH 056/108] cmd/dendrite-media-api-server: Add MAX_FILE_SIZE_BYTES configuration --- .../dendrite/cmd/dendrite-media-api-server/main.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go b/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go index 3dcf74b12..1d66a4183 100644 --- a/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go +++ b/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go @@ -17,6 +17,7 @@ package main import ( "net/http" "os" + "strconv" "github.com/matrix-org/dendrite/common" "github.com/matrix-org/dendrite/mediaapi/config" @@ -48,11 +49,16 @@ func main() { if serverName == "" { serverName = "localhost" } + maxFileSizeBytes, err := strconv.ParseInt(os.Getenv("MAX_FILE_SIZE_BYTES"), 10, 64) + if err != nil { + maxFileSizeBytes = 10 * 1024 * 1024 + log.Info("Failed to parse MAX_FILE_SIZE_BYTES. Defaulting to %v bytes.", maxFileSizeBytes) + } cfg := &config.MediaAPI{ ServerName: types.ServerName(serverName), BasePath: types.Path(basePath), - MaxFileSizeBytes: 10 * 1024 * 1024, + MaxFileSizeBytes: types.ContentLength(maxFileSizeBytes), DataSource: dataSource, } From bd9db7557a52295cc4f98cfc1cd6682442766a7b Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Thu, 18 May 2017 11:50:24 +0200 Subject: [PATCH 057/108] mediaapi/README: Add link to spec section --- src/github.com/matrix-org/dendrite/mediaapi/README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/README.md b/src/github.com/matrix-org/dendrite/mediaapi/README.md index 2f51e8fe6..de171ea76 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/README.md +++ b/src/github.com/matrix-org/dendrite/mediaapi/README.md @@ -1,3 +1,5 @@ # Media API -This server is responsible for serving `/media` requests +This server is responsible for serving `/media` requests as per: + +http://matrix.org/docs/spec/client_server/r0.2.0.html#id43 From c5cd5a93b960d8bbf7851ad589bbecd5f39c36ba Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Thu, 18 May 2017 11:57:44 +0200 Subject: [PATCH 058/108] mediaapi: Use ServerName type from gomatrixserverlib --- .../dendrite/cmd/dendrite-media-api-server/main.go | 3 ++- .../matrix-org/dendrite/mediaapi/config/config.go | 7 +++++-- .../matrix-org/dendrite/mediaapi/routing/routing.go | 3 ++- .../mediaapi/storage/media_repository_table.go | 3 ++- .../matrix-org/dendrite/mediaapi/storage/storage.go | 3 ++- .../matrix-org/dendrite/mediaapi/types/types.go | 11 ++++++----- .../matrix-org/dendrite/mediaapi/writers/download.go | 5 +++-- 7 files changed, 22 insertions(+), 13 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go b/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go index 1d66a4183..04c395454 100644 --- a/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go +++ b/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go @@ -24,6 +24,7 @@ import ( "github.com/matrix-org/dendrite/mediaapi/routing" "github.com/matrix-org/dendrite/mediaapi/storage" "github.com/matrix-org/dendrite/mediaapi/types" + "github.com/matrix-org/gomatrixserverlib" log "github.com/Sirupsen/logrus" ) @@ -56,7 +57,7 @@ func main() { } cfg := &config.MediaAPI{ - ServerName: types.ServerName(serverName), + ServerName: gomatrixserverlib.ServerName(serverName), BasePath: types.Path(basePath), MaxFileSizeBytes: types.ContentLength(maxFileSizeBytes), DataSource: dataSource, diff --git a/src/github.com/matrix-org/dendrite/mediaapi/config/config.go b/src/github.com/matrix-org/dendrite/mediaapi/config/config.go index 060964d27..86b5c75ba 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/config/config.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/config/config.go @@ -14,12 +14,15 @@ package config -import "github.com/matrix-org/dendrite/mediaapi/types" +import ( + "github.com/matrix-org/dendrite/mediaapi/types" + "github.com/matrix-org/gomatrixserverlib" +) // MediaAPI contains the config information necessary to spin up a mediaapi process. type MediaAPI struct { // The name of the server. This is usually the domain name, e.g 'matrix.org', 'localhost'. - ServerName types.ServerName `yaml:"server_name"` + ServerName gomatrixserverlib.ServerName `yaml:"server_name"` // The base path to where media files will be stored. BasePath types.Path `yaml:"base_path"` // The maximum file size in bytes that is allowed to be stored on this server. diff --git a/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go b/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go index 4c505ffdc..9b9c1d55a 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go @@ -23,6 +23,7 @@ import ( "github.com/matrix-org/dendrite/mediaapi/storage" "github.com/matrix-org/dendrite/mediaapi/types" "github.com/matrix-org/dendrite/mediaapi/writers" + "github.com/matrix-org/gomatrixserverlib" "github.com/matrix-org/util" "github.com/prometheus/client_golang/prometheus" ) @@ -51,7 +52,7 @@ func Setup(servMux *http.ServeMux, httpClient *http.Client, cfg *config.MediaAPI w.Header().Set("Content-Type", "application/json") vars := mux.Vars(req) - writers.Download(w, req, types.ServerName(vars["serverName"]), types.MediaID(vars["mediaId"]), cfg, db, activeRemoteRequests) + writers.Download(w, req, gomatrixserverlib.ServerName(vars["serverName"]), types.MediaID(vars["mediaId"]), cfg, db, activeRemoteRequests) })), ) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/storage/media_repository_table.go b/src/github.com/matrix-org/dendrite/mediaapi/storage/media_repository_table.go index 11b9064f1..31846cf48 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/storage/media_repository_table.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/storage/media_repository_table.go @@ -19,6 +19,7 @@ import ( "time" "github.com/matrix-org/dendrite/mediaapi/types" + "github.com/matrix-org/gomatrixserverlib" ) const mediaSchema = ` @@ -88,7 +89,7 @@ func (s *mediaStatements) insertMedia(mediaMetadata *types.MediaMetadata) error return err } -func (s *mediaStatements) selectMedia(mediaID types.MediaID, mediaOrigin types.ServerName) (*types.MediaMetadata, error) { +func (s *mediaStatements) selectMedia(mediaID types.MediaID, mediaOrigin gomatrixserverlib.ServerName) (*types.MediaMetadata, error) { mediaMetadata := types.MediaMetadata{ MediaID: mediaID, Origin: mediaOrigin, diff --git a/src/github.com/matrix-org/dendrite/mediaapi/storage/storage.go b/src/github.com/matrix-org/dendrite/mediaapi/storage/storage.go index 121a06354..17b770cea 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/storage/storage.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/storage/storage.go @@ -20,6 +20,7 @@ import ( // Import the postgres database driver. _ "github.com/lib/pq" "github.com/matrix-org/dendrite/mediaapi/types" + "github.com/matrix-org/gomatrixserverlib" ) // A Database is used to store room events and stream offsets. @@ -47,7 +48,7 @@ func (d *Database) StoreMediaMetadata(mediaMetadata *types.MediaMetadata) error } // GetMediaMetadata possibly selects the metadata about previously uploaded media from the database. -func (d *Database) GetMediaMetadata(mediaID types.MediaID, mediaOrigin types.ServerName, mediaMetadata *types.MediaMetadata) error { +func (d *Database) GetMediaMetadata(mediaID types.MediaID, mediaOrigin gomatrixserverlib.ServerName, mediaMetadata *types.MediaMetadata) error { metadata, err := d.statements.selectMedia(mediaID, mediaOrigin) mediaMetadata.ContentType = metadata.ContentType mediaMetadata.ContentDisposition = metadata.ContentDisposition diff --git a/src/github.com/matrix-org/dendrite/mediaapi/types/types.go b/src/github.com/matrix-org/dendrite/mediaapi/types/types.go index 34bf80655..2e17ddad1 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/types/types.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/types/types.go @@ -14,7 +14,11 @@ package types -import "sync" +import ( + "sync" + + "github.com/matrix-org/gomatrixserverlib" +) // ContentDisposition is an HTTP Content-Disposition header string type ContentDisposition string @@ -34,9 +38,6 @@ type Path string // MediaID is a string representing the unique identifier for a file (could be a hash but does not have to be) type MediaID string -// ServerName is the host of a matrix homeserver, e.g. matrix.org -type ServerName string - // RequestMethod is an HTTP request method i.e. GET, POST, etc type RequestMethod string @@ -49,7 +50,7 @@ type UnixMs int64 // MediaMetadata is metadata associated with a media file type MediaMetadata struct { MediaID MediaID - Origin ServerName + Origin gomatrixserverlib.ServerName ContentType ContentType ContentDisposition ContentDisposition ContentLength ContentLength diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index e72fcae07..e64350ef0 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -32,6 +32,7 @@ import ( "github.com/matrix-org/dendrite/mediaapi/config" "github.com/matrix-org/dendrite/mediaapi/storage" "github.com/matrix-org/dendrite/mediaapi/types" + "github.com/matrix-org/gomatrixserverlib" "github.com/matrix-org/util" ) @@ -90,7 +91,7 @@ var nTries = 5 // If they are present in the cache, they are served directly. // If they are not present in the cache, they are obtained from the remote server and // simultaneously served back to the client and written into the cache. -func Download(w http.ResponseWriter, req *http.Request, origin types.ServerName, mediaID types.MediaID, cfg *config.MediaAPI, db *storage.Database, activeRemoteRequests *types.ActiveRemoteRequests) { +func Download(w http.ResponseWriter, req *http.Request, origin gomatrixserverlib.ServerName, mediaID types.MediaID, cfg *config.MediaAPI, db *storage.Database, activeRemoteRequests *types.ActiveRemoteRequests) { r := &downloadRequest{ MediaMetadata: &types.MediaMetadata{ MediaID: mediaID, @@ -529,7 +530,7 @@ func (r *downloadRequest) respondFromRemoteFile(w http.ResponseWriter, basePath // Given a matrix server name, attempt to discover URLs to contact the server // on. -func getMatrixUrls(serverName types.ServerName) []string { +func getMatrixUrls(serverName gomatrixserverlib.ServerName) []string { _, srvs, err := net.LookupSRV("matrix", "tcp", string(serverName)) if err != nil { return []string{"https://" + string(serverName) + ":8448"} From 2fca4bbd65c642dd46a68371efdbed76823fcd27 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Thu, 18 May 2017 11:58:41 +0200 Subject: [PATCH 059/108] mediaapi/config: Fix max_file_size_bytes YAML tag --- src/github.com/matrix-org/dendrite/mediaapi/config/config.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/config/config.go b/src/github.com/matrix-org/dendrite/mediaapi/config/config.go index 86b5c75ba..0cfb58401 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/config/config.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/config/config.go @@ -27,7 +27,7 @@ type MediaAPI struct { BasePath types.Path `yaml:"base_path"` // The maximum file size in bytes that is allowed to be stored on this server. // Note that remote files larger than this can still be proxied to a client, they will just not be cached. - MaxFileSizeBytes types.ContentLength `yaml:"base_path"` + MaxFileSizeBytes types.ContentLength `yaml:"max_file_size_bytes"` // The postgres connection config for connecting to the database e.g a postgres:// URI DataSource string `yaml:"database"` } From 28ef35d36a1d569db7daf4a3c1deba914e5e5464 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Thu, 18 May 2017 12:09:33 +0200 Subject: [PATCH 060/108] mediaapi/storage: Rework GetMediaMetadata API to return new MediaMetadata --- .../matrix-org/dendrite/mediaapi/storage/storage.go | 11 ++--------- .../matrix-org/dendrite/mediaapi/writers/download.go | 6 ++++-- .../matrix-org/dendrite/mediaapi/writers/upload.go | 3 ++- 3 files changed, 8 insertions(+), 12 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/storage/storage.go b/src/github.com/matrix-org/dendrite/mediaapi/storage/storage.go index 17b770cea..c5d5b68a6 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/storage/storage.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/storage/storage.go @@ -48,13 +48,6 @@ func (d *Database) StoreMediaMetadata(mediaMetadata *types.MediaMetadata) error } // GetMediaMetadata possibly selects the metadata about previously uploaded media from the database. -func (d *Database) GetMediaMetadata(mediaID types.MediaID, mediaOrigin gomatrixserverlib.ServerName, mediaMetadata *types.MediaMetadata) error { - metadata, err := d.statements.selectMedia(mediaID, mediaOrigin) - mediaMetadata.ContentType = metadata.ContentType - mediaMetadata.ContentDisposition = metadata.ContentDisposition - mediaMetadata.ContentLength = metadata.ContentLength - mediaMetadata.CreationTimestamp = metadata.CreationTimestamp - mediaMetadata.UploadName = metadata.UploadName - mediaMetadata.UserID = metadata.UserID - return err +func (d *Database) GetMediaMetadata(mediaID types.MediaID, mediaOrigin gomatrixserverlib.ServerName) (*types.MediaMetadata, error) { + return d.statements.selectMedia(mediaID, mediaOrigin) } diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index e64350ef0..02c256d4a 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -115,10 +115,11 @@ func Download(w http.ResponseWriter, req *http.Request, origin gomatrixserverlib } // check if we have a record of the media in our database - err := db.GetMediaMetadata(r.MediaMetadata.MediaID, r.MediaMetadata.Origin, r.MediaMetadata) + mediaMetadata, err := db.GetMediaMetadata(r.MediaMetadata.MediaID, r.MediaMetadata.Origin) if err == nil { // If we have a record, we can respond from the local file + r.MediaMetadata = mediaMetadata r.respondFromLocalFile(w, cfg.BasePath) return } else if err == sql.ErrNoRows && r.MediaMetadata.Origin != cfg.ServerName { @@ -131,9 +132,10 @@ func Download(w http.ResponseWriter, req *http.Request, origin gomatrixserverlib for tries := 0; ; tries++ { activeRemoteRequests.Lock() - err = db.GetMediaMetadata(r.MediaMetadata.MediaID, r.MediaMetadata.Origin, r.MediaMetadata) + mediaMetadata, err = db.GetMediaMetadata(r.MediaMetadata.MediaID, r.MediaMetadata.Origin) if err == nil { // If we have a record, we can respond from the local file + r.MediaMetadata = mediaMetadata r.respondFromLocalFile(w, cfg.BasePath) activeRemoteRequests.Unlock() return diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go index a5085d595..ae423d348 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go @@ -194,8 +194,9 @@ func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util. }).Info("File uploaded") // check if we already have a record of the media in our database and if so, we can remove the temporary directory - err = db.GetMediaMetadata(r.MediaMetadata.MediaID, r.MediaMetadata.Origin, &types.MediaMetadata{}) + mediaMetadata, err := db.GetMediaMetadata(r.MediaMetadata.MediaID, r.MediaMetadata.Origin) if err == nil { + r.MediaMetadata = mediaMetadata removeDir(tmpDir, logger) return util.JSONResponse{ Code: 200, From 8085c1f8635c54352da62f80b13195e5c881fa4b Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Thu, 18 May 2017 12:19:03 +0200 Subject: [PATCH 061/108] mediaapi/types: Clarify what is ActiveRemoteRequests.Set's key --- src/github.com/matrix-org/dendrite/mediaapi/types/types.go | 1 + 1 file changed, 1 insertion(+) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/types/types.go b/src/github.com/matrix-org/dendrite/mediaapi/types/types.go index 2e17ddad1..0da5b1017 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/types/types.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/types/types.go @@ -63,5 +63,6 @@ type MediaMetadata struct { // It is used for ensuring multiple requests for the same file do not clobber each other. type ActiveRemoteRequests struct { sync.Mutex + // The string key is an mxc:// URL Set map[string]*sync.Cond } From ec0d584fe702ae4b03098d7fb253228af2874bfd Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Thu, 18 May 2017 12:23:17 +0200 Subject: [PATCH 062/108] cmd/dendrite-media-api-server: Log format string with Infof not Info --- .../matrix-org/dendrite/cmd/dendrite-media-api-server/main.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go b/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go index 04c395454..62d9c83e8 100644 --- a/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go +++ b/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go @@ -53,7 +53,7 @@ func main() { maxFileSizeBytes, err := strconv.ParseInt(os.Getenv("MAX_FILE_SIZE_BYTES"), 10, 64) if err != nil { maxFileSizeBytes = 10 * 1024 * 1024 - log.Info("Failed to parse MAX_FILE_SIZE_BYTES. Defaulting to %v bytes.", maxFileSizeBytes) + log.Infof("Failed to parse MAX_FILE_SIZE_BYTES. Defaulting to %v bytes.", maxFileSizeBytes) } cfg := &config.MediaAPI{ From 04c4a2d05a69f1e079cc8952426f7faa28291556 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Thu, 18 May 2017 15:50:09 +0200 Subject: [PATCH 063/108] cmd/dendrite-media-api-server: Move os.Getenv() for consistency --- .../dendrite/cmd/dendrite-media-api-server/main.go | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go b/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go index 62d9c83e8..e29a96a14 100644 --- a/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go +++ b/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go @@ -30,11 +30,12 @@ import ( ) var ( - bindAddr = os.Getenv("BIND_ADDRESS") - dataSource = os.Getenv("DATABASE") - logDir = os.Getenv("LOG_DIR") - serverName = os.Getenv("SERVER_NAME") - basePath = os.Getenv("BASE_PATH") + bindAddr = os.Getenv("BIND_ADDRESS") + dataSource = os.Getenv("DATABASE") + logDir = os.Getenv("LOG_DIR") + serverName = os.Getenv("SERVER_NAME") + basePath = os.Getenv("BASE_PATH") + maxFileSizeBytesString = os.Getenv("MAX_FILE_SIZE_BYTES") ) func main() { @@ -50,7 +51,7 @@ func main() { if serverName == "" { serverName = "localhost" } - maxFileSizeBytes, err := strconv.ParseInt(os.Getenv("MAX_FILE_SIZE_BYTES"), 10, 64) + maxFileSizeBytes, err := strconv.ParseInt(maxFileSizeBytesString, 10, 64) if err != nil { maxFileSizeBytes = 10 * 1024 * 1024 log.Infof("Failed to parse MAX_FILE_SIZE_BYTES. Defaulting to %v bytes.", maxFileSizeBytes) From 1f2ac60bee5dab11621d986ae463d9165c4c26ba Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Thu, 18 May 2017 15:53:48 +0200 Subject: [PATCH 064/108] mediaapi/routing: Sync make() to makeAPI() as in clientapi --- .../matrix-org/dendrite/mediaapi/routing/routing.go | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go b/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go index 9b9c1d55a..ae4275bb8 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go @@ -35,9 +35,9 @@ const pathPrefixR0 = "/_matrix/media/v1" func Setup(servMux *http.ServeMux, httpClient *http.Client, cfg *config.MediaAPI, db *storage.Database) { apiMux := mux.NewRouter() r0mux := apiMux.PathPrefix(pathPrefixR0).Subrouter() - r0mux.Handle("/upload", make("upload", util.NewJSONRequestHandler(func(req *http.Request) util.JSONResponse { + r0mux.Handle("/upload", makeAPI("upload", func(req *http.Request) util.JSONResponse { return writers.Upload(req, cfg, db) - }))) + })) activeRemoteRequests := &types.ActiveRemoteRequests{ Set: map[string]*sync.Cond{}, @@ -60,7 +60,8 @@ func Setup(servMux *http.ServeMux, httpClient *http.Client, cfg *config.MediaAPI servMux.Handle("/api/", http.StripPrefix("/api", apiMux)) } -// make a util.JSONRequestHandler into an http.Handler -func make(metricsName string, h util.JSONRequestHandler) http.Handler { +// make a util.JSONRequestHandler function into an http.Handler. +func makeAPI(metricsName string, f func(*http.Request) util.JSONResponse) http.Handler { + h := util.NewJSONRequestHandler(f) return prometheus.InstrumentHandler(metricsName, util.MakeJSONAPI(h)) } From 2e795ed8aa5ce50807d739bd22db7dfb575bbd08 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Thu, 18 May 2017 15:57:07 +0200 Subject: [PATCH 065/108] mediaapi/storage: Improve GetMediaMetadata description --- .../matrix-org/dendrite/mediaapi/storage/storage.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/storage/storage.go b/src/github.com/matrix-org/dendrite/mediaapi/storage/storage.go index c5d5b68a6..072ec4f8f 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/storage/storage.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/storage/storage.go @@ -47,7 +47,9 @@ func (d *Database) StoreMediaMetadata(mediaMetadata *types.MediaMetadata) error return d.statements.insertMedia(mediaMetadata) } -// GetMediaMetadata possibly selects the metadata about previously uploaded media from the database. +// GetMediaMetadata returns metadata about media stored on this server. The media could +// have been uploaded to this server or fetched from another server and cached here. +// Returns sql.ErrNoRows if there is no metadata associated with this media. func (d *Database) GetMediaMetadata(mediaID types.MediaID, mediaOrigin gomatrixserverlib.ServerName) (*types.MediaMetadata, error) { return d.statements.selectMedia(mediaID, mediaOrigin) } From 9fc5abdb3f9b644ca0aaece65b30cead384c8037 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Thu, 18 May 2017 16:01:06 +0200 Subject: [PATCH 066/108] mediaapi/writers: Rename utils.go to fileutils.go Better reflects the content of the file. --- .../dendrite/mediaapi/writers/{utils.go => fileutils.go} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/github.com/matrix-org/dendrite/mediaapi/writers/{utils.go => fileutils.go} (100%) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/utils.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go similarity index 100% rename from src/github.com/matrix-org/dendrite/mediaapi/writers/utils.go rename to src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go From f5422787a18e49ab189c847cf560da3313950354 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Thu, 18 May 2017 16:02:43 +0200 Subject: [PATCH 067/108] mediaapi/writers: Move single-value error return assignment into if --- .../matrix-org/dendrite/mediaapi/writers/fileutils.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go index 9317404b6..e2d3daf6f 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go @@ -30,8 +30,7 @@ import ( // createTempDir creates a tmp/ directory within baseDirectory and returns its path func createTempDir(baseDirectory types.Path) (types.Path, error) { baseTmpDir := path.Join(string(baseDirectory), "tmp") - err := os.MkdirAll(baseTmpDir, 0770) - if err != nil { + if err := os.MkdirAll(baseTmpDir, 0770); err != nil { log.Printf("Failed to create base temp dir: %v\n", err) return "", err } From 995e1f2c993ea98382fb3cb137610f3ff511ce37 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Thu, 18 May 2017 17:25:12 +0200 Subject: [PATCH 068/108] cmd/dendrite-media-api-server: Make base path absolute --- .../dendrite/cmd/dendrite-media-api-server/main.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go b/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go index e29a96a14..7f43dc9c3 100644 --- a/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go +++ b/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go @@ -17,6 +17,7 @@ package main import ( "net/http" "os" + "path/filepath" "strconv" "github.com/matrix-org/dendrite/common" @@ -47,6 +48,10 @@ func main() { if basePath == "" { log.Panic("No BASE_PATH environment variable found.") } + absBasePath, err := filepath.Abs(basePath) + if err != nil { + log.Panicf("BASE_PATH is invalid (must be able to make absolute): %v\n", err) + } if serverName == "" { serverName = "localhost" @@ -59,7 +64,7 @@ func main() { cfg := &config.MediaAPI{ ServerName: gomatrixserverlib.ServerName(serverName), - BasePath: types.Path(basePath), + BasePath: types.Path(absBasePath), MaxFileSizeBytes: types.ContentLength(maxFileSizeBytes), DataSource: dataSource, } From 10a2b2f8e6c54ddba2f7aa665bd1eebcd6d82bae Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Thu, 18 May 2017 17:37:32 +0200 Subject: [PATCH 069/108] mediaapi: Also rename all basePath variables to absBasePath for clarity --- .../cmd/dendrite-media-api-server/main.go | 2 +- .../dendrite/mediaapi/config/config.go | 4 ++-- .../dendrite/mediaapi/writers/download.go | 22 +++++++++---------- .../dendrite/mediaapi/writers/fileutils.go | 8 +++---- .../dendrite/mediaapi/writers/upload.go | 4 ++-- 5 files changed, 20 insertions(+), 20 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go b/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go index 7f43dc9c3..1d827eba2 100644 --- a/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go +++ b/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go @@ -64,7 +64,7 @@ func main() { cfg := &config.MediaAPI{ ServerName: gomatrixserverlib.ServerName(serverName), - BasePath: types.Path(absBasePath), + AbsBasePath: types.Path(absBasePath), MaxFileSizeBytes: types.ContentLength(maxFileSizeBytes), DataSource: dataSource, } diff --git a/src/github.com/matrix-org/dendrite/mediaapi/config/config.go b/src/github.com/matrix-org/dendrite/mediaapi/config/config.go index 0cfb58401..c7346d313 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/config/config.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/config/config.go @@ -23,8 +23,8 @@ import ( type MediaAPI struct { // The name of the server. This is usually the domain name, e.g 'matrix.org', 'localhost'. ServerName gomatrixserverlib.ServerName `yaml:"server_name"` - // The base path to where media files will be stored. - BasePath types.Path `yaml:"base_path"` + // The absolute base path to where media files will be stored. + AbsBasePath types.Path `yaml:"abs_base_path"` // The maximum file size in bytes that is allowed to be stored on this server. // Note that remote files larger than this can still be proxied to a client, they will just not be cached. MaxFileSizeBytes types.ContentLength `yaml:"max_file_size_bytes"` diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index 02c256d4a..667849220 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -120,7 +120,7 @@ func Download(w http.ResponseWriter, req *http.Request, origin gomatrixserverlib if err == nil { // If we have a record, we can respond from the local file r.MediaMetadata = mediaMetadata - r.respondFromLocalFile(w, cfg.BasePath) + r.respondFromLocalFile(w, cfg.AbsBasePath) return } else if err == sql.ErrNoRows && r.MediaMetadata.Origin != cfg.ServerName { // If we do not have a record and the origin is remote, we need to fetch it and respond with that file @@ -136,7 +136,7 @@ func Download(w http.ResponseWriter, req *http.Request, origin gomatrixserverlib if err == nil { // If we have a record, we can respond from the local file r.MediaMetadata = mediaMetadata - r.respondFromLocalFile(w, cfg.BasePath) + r.respondFromLocalFile(w, cfg.AbsBasePath) activeRemoteRequests.Unlock() return } @@ -166,7 +166,7 @@ func Download(w http.ResponseWriter, req *http.Request, origin gomatrixserverlib } } - r.respondFromRemoteFile(w, cfg.BasePath, cfg.MaxFileSizeBytes, db, activeRemoteRequests) + r.respondFromRemoteFile(w, cfg.AbsBasePath, cfg.MaxFileSizeBytes, db, activeRemoteRequests) } else { // If we do not have a record and the origin is local, or if we have another error from the database, the file is not found r.Logger.Warnln("Failed to look up file in database:", err) @@ -177,7 +177,7 @@ func Download(w http.ResponseWriter, req *http.Request, origin gomatrixserverlib } } -func (r *downloadRequest) respondFromLocalFile(w http.ResponseWriter, basePath types.Path) { +func (r *downloadRequest) respondFromLocalFile(w http.ResponseWriter, absBasePath types.Path) { r.Logger.WithFields(log.Fields{ "MediaID": r.MediaMetadata.MediaID, "Origin": r.MediaMetadata.Origin, @@ -187,7 +187,7 @@ func (r *downloadRequest) respondFromLocalFile(w http.ResponseWriter, basePath t "Content-Disposition": r.MediaMetadata.ContentDisposition, }).Infof("Downloading file") - filePath := getPathFromMediaMetadata(r.MediaMetadata, basePath) + filePath := getPathFromMediaMetadata(r.MediaMetadata, absBasePath) file, err := os.Open(filePath) if err != nil { // FIXME: Remove erroneous file from database? @@ -351,7 +351,7 @@ func completeRemoteRequest(activeRemoteRequests *types.ActiveRemoteRequests, mxc activeRemoteRequests.Unlock() } -func (r *downloadRequest) commitFileAndMetadata(tmpDir types.Path, basePath types.Path, activeRemoteRequests *types.ActiveRemoteRequests, db *storage.Database, mxcURL string) bool { +func (r *downloadRequest) commitFileAndMetadata(tmpDir types.Path, absBasePath types.Path, activeRemoteRequests *types.ActiveRemoteRequests, db *storage.Database, mxcURL string) bool { updateActiveRemoteRequests := true r.Logger.WithFields(log.Fields{ @@ -366,7 +366,7 @@ func (r *downloadRequest) commitFileAndMetadata(tmpDir types.Path, basePath type // The database is the source of truth so we need to have moved the file first err := moveFile( types.Path(path.Join(string(tmpDir), "content")), - types.Path(getPathFromMediaMetadata(r.MediaMetadata, basePath)), + types.Path(getPathFromMediaMetadata(r.MediaMetadata, absBasePath)), ) if err != nil { tmpDirErr := os.RemoveAll(string(tmpDir)) @@ -387,7 +387,7 @@ func (r *downloadRequest) commitFileAndMetadata(tmpDir types.Path, basePath type // if written to disk, add to db err = db.StoreMediaMetadata(r.MediaMetadata) if err != nil { - finalDir := path.Dir(getPathFromMediaMetadata(r.MediaMetadata, basePath)) + finalDir := path.Dir(getPathFromMediaMetadata(r.MediaMetadata, absBasePath)) finalDirErr := os.RemoveAll(finalDir) if finalDirErr != nil { r.Logger.Warnf("Failed to remove finalDir (%v): %q\n", finalDir, finalDirErr) @@ -403,7 +403,7 @@ func (r *downloadRequest) commitFileAndMetadata(tmpDir types.Path, basePath type return updateActiveRemoteRequests } -func (r *downloadRequest) respondFromRemoteFile(w http.ResponseWriter, basePath types.Path, maxFileSizeBytes types.ContentLength, db *storage.Database, activeRemoteRequests *types.ActiveRemoteRequests) { +func (r *downloadRequest) respondFromRemoteFile(w http.ResponseWriter, absBasePath types.Path, maxFileSizeBytes types.ContentLength, db *storage.Database, activeRemoteRequests *types.ActiveRemoteRequests) { r.Logger.WithFields(log.Fields{ "MediaID": r.MediaMetadata.MediaID, "Origin": r.MediaMetadata.Origin, @@ -459,7 +459,7 @@ func (r *downloadRequest) respondFromRemoteFile(w http.ResponseWriter, basePath w.Header().Set("Content-Security-Policy", contentSecurityPolicy) // create the temporary file writer - tmpFileWriter, tmpFile, tmpDir, errorResponse := createTempFileWriter(basePath, r.Logger) + tmpFileWriter, tmpFile, tmpDir, errorResponse := createTempFileWriter(absBasePath, r.Logger) if errorResponse != nil { r.jsonErrorResponse(w, *errorResponse) return @@ -516,7 +516,7 @@ func (r *downloadRequest) respondFromRemoteFile(w http.ResponseWriter, basePath r.MediaMetadata.ContentLength = types.ContentLength(bytesWritten) r.MediaMetadata.UserID = types.MatrixUserID("@:" + string(r.MediaMetadata.Origin)) - updateActiveRemoteRequests = r.commitFileAndMetadata(tmpDir, basePath, activeRemoteRequests, db, mxcURL) + updateActiveRemoteRequests = r.commitFileAndMetadata(tmpDir, absBasePath, activeRemoteRequests, db, mxcURL) // TODO: generate thumbnails diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go index e2d3daf6f..7045df06a 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go @@ -55,8 +55,8 @@ func createFileWriter(directory types.Path, filename types.Filename) (*bufio.Wri return bufio.NewWriter(file), file, nil } -func createTempFileWriter(basePath types.Path, logger *log.Entry) (*bufio.Writer, *os.File, types.Path, *util.JSONResponse) { - tmpDir, err := createTempDir(basePath) +func createTempFileWriter(absBasePath types.Path, logger *log.Entry) (*bufio.Writer, *os.File, types.Path, *util.JSONResponse) { + tmpDir, err := createTempDir(absBasePath) if err != nil { logger.Infof("Failed to create temp dir %q\n", err) return nil, nil, "", &util.JSONResponse{ @@ -75,9 +75,9 @@ func createTempFileWriter(basePath types.Path, logger *log.Entry) (*bufio.Writer return writer, tmpFile, tmpDir, nil } -func getPathFromMediaMetadata(m *types.MediaMetadata, basePath types.Path) string { +func getPathFromMediaMetadata(m *types.MediaMetadata, absBasePath types.Path) string { return path.Join( - string(basePath), + string(absBasePath), string(m.Origin), string(m.MediaID[:3]), string(m.MediaID[3:]), diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go index ae423d348..668c1e5f9 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go @@ -153,7 +153,7 @@ func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util. "Content-Disposition": r.MediaMetadata.ContentDisposition, }).Info("Uploading file") - writer, file, tmpDir, errorResponse := createTempFileWriter(cfg.BasePath, logger) + writer, file, tmpDir, errorResponse := createTempFileWriter(cfg.AbsBasePath, logger) if errorResponse != nil { return *errorResponse } @@ -210,7 +210,7 @@ func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util. // TODO: generate thumbnails - finalPath := getPathFromMediaMetadata(r.MediaMetadata, cfg.BasePath) + finalPath := getPathFromMediaMetadata(r.MediaMetadata, cfg.AbsBasePath) err = moveFile( types.Path(path.Join(string(tmpDir), "content")), From 00e8fed3a704610f5c85a0dc53ac0537783d771f Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Thu, 18 May 2017 17:39:30 +0200 Subject: [PATCH 070/108] mediaapi/writers: Add validation and error handling to getPathFromMediaMetadata --- .../dendrite/mediaapi/writers/download.go | 27 ++++++++++-- .../dendrite/mediaapi/writers/fileutils.go | 41 ++++++++++++++++--- .../dendrite/mediaapi/writers/upload.go | 10 ++++- 3 files changed, 68 insertions(+), 10 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index 667849220..c9c9be441 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -187,7 +187,16 @@ func (r *downloadRequest) respondFromLocalFile(w http.ResponseWriter, absBasePat "Content-Disposition": r.MediaMetadata.ContentDisposition, }).Infof("Downloading file") - filePath := getPathFromMediaMetadata(r.MediaMetadata, absBasePath) + filePath, err := getPathFromMediaMetadata(r.MediaMetadata, absBasePath) + if err != nil { + // FIXME: Remove erroneous file from database? + r.Logger.Warnln("Failed to get file path from metadata:", err) + r.jsonErrorResponse(w, util.JSONResponse{ + Code: 404, + JSON: jsonerror.NotFound(fmt.Sprintf("File with media ID %q does not exist", r.MediaMetadata.MediaID)), + }) + return + } file, err := os.Open(filePath) if err != nil { // FIXME: Remove erroneous file from database? @@ -364,9 +373,19 @@ func (r *downloadRequest) commitFileAndMetadata(tmpDir types.Path, absBasePath t }).Infof("Storing file metadata to media repository database") // The database is the source of truth so we need to have moved the file first - err := moveFile( + finalPath, err := getPathFromMediaMetadata(r.MediaMetadata, absBasePath) + if err != nil { + r.Logger.Warnf("Failed to get file path from metadata: %q\n", err) + tmpDirErr := os.RemoveAll(string(tmpDir)) + if tmpDirErr != nil { + r.Logger.Warnf("Failed to remove tmpDir (%v): %q\n", tmpDir, tmpDirErr) + } + return updateActiveRemoteRequests + } + + err = moveFile( types.Path(path.Join(string(tmpDir), "content")), - types.Path(getPathFromMediaMetadata(r.MediaMetadata, absBasePath)), + types.Path(finalPath), ) if err != nil { tmpDirErr := os.RemoveAll(string(tmpDir)) @@ -387,7 +406,7 @@ func (r *downloadRequest) commitFileAndMetadata(tmpDir types.Path, absBasePath t // if written to disk, add to db err = db.StoreMediaMetadata(r.MediaMetadata) if err != nil { - finalDir := path.Dir(getPathFromMediaMetadata(r.MediaMetadata, absBasePath)) + finalDir := path.Dir(finalPath) finalDirErr := os.RemoveAll(finalDir) if finalDirErr != nil { r.Logger.Warnf("Failed to remove finalDir (%v): %q\n", finalDir, finalDirErr) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go index 7045df06a..d931707b0 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go @@ -20,6 +20,8 @@ import ( "io/ioutil" "os" "path" + "path/filepath" + "strings" log "github.com/Sirupsen/logrus" "github.com/matrix-org/dendrite/clientapi/jsonerror" @@ -75,13 +77,42 @@ func createTempFileWriter(absBasePath types.Path, logger *log.Entry) (*bufio.Wri return writer, tmpFile, tmpDir, nil } -func getPathFromMediaMetadata(m *types.MediaMetadata, absBasePath types.Path) string { - return path.Join( +// getPathFromMediaMetadata validates and constructs the on-disk path to the media +// based on its origin and mediaID +// If a mediaID is too short, which could happen for other homeserver implementations, +// place it into a short-id subdirectory of the origin directory +// If the mediaID is long enough, we split it in two using one part as a subdirectory +// name and the other part as the file name. This is to allow storage of more files due +// to filesystem limitations on the number of files in a directory. For example, if +// mediaID is 'qwerty', we create subdirectory called 'qwe' and place the file in 'qwe' +// and call it 'rty'. +func getPathFromMediaMetadata(m *types.MediaMetadata, absBasePath types.Path) (string, error) { + var subDir string + var fileName string + + if len(m.MediaID) > 3 { + subDir = string(m.MediaID[:3]) + fileName = string(m.MediaID[3:]) + } else { + subDir = "short-id" + fileName = string(m.MediaID) + } + + filePath, err := filepath.Abs(path.Join( string(absBasePath), string(m.Origin), - string(m.MediaID[:3]), - string(m.MediaID[3:]), - ) + subDir, + fileName, + )) + + // check if the absolute absBasePath is a prefix of the absolute filePath + // if so, no directory escape has occurred and the filePath is valid + // Note: absBasePath is already absolute + if err != nil || strings.HasPrefix(filePath, string(absBasePath)) == false { + return "", fmt.Errorf("Invalid filePath (not within absBasePath %v): %v", absBasePath, filePath) + } + + return filePath, nil } // moveFile attempts to move the file src to dst diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go index 668c1e5f9..44439c850 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go @@ -210,7 +210,15 @@ func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util. // TODO: generate thumbnails - finalPath := getPathFromMediaMetadata(r.MediaMetadata, cfg.AbsBasePath) + finalPath, err := getPathFromMediaMetadata(r.MediaMetadata, cfg.AbsBasePath) + if err != nil { + logger.Warnf("Failed to get file path from metadata: %q\n", err) + removeDir(tmpDir, logger) + return util.JSONResponse{ + Code: 400, + JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload")), + } + } err = moveFile( types.Path(path.Join(string(tmpDir), "content")), From 7af45e466466c317862b59ca63f0d9379dae7b16 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Thu, 18 May 2017 17:56:19 +0200 Subject: [PATCH 071/108] mediaapi/writers/upload: Refactor Upload() into three new functions --- .../dendrite/mediaapi/writers/upload.go | 179 +++++++++++------- 1 file changed, 108 insertions(+), 71 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go index 44439c850..79e5707e9 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go @@ -101,17 +101,12 @@ func removeDir(dir types.Path, logger *log.Entry) { } } -// Upload implements /upload -// -// This endpoint involves uploading potentially significant amounts of data to the homeserver. -// This implementation supports a configurable maximum file size limit in bytes. If a user tries to upload more than this, they will receive an error that their upload is too large. -// Uploaded files are processed piece-wise to avoid DoS attacks which would starve the server of memory. -// TODO: Requests time out if they have not received any data within the configured timeout period. -func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util.JSONResponse { - logger := util.GetLogger(req.Context()) - +// parseAndValidateRequest parses the incoming upload request to validate and extract +// all the metadata about the media being uploaded. Returns either an uploadRequest or +// an error formatted as a util.JSONResponse +func parseAndValidateRequest(req *http.Request, cfg *config.MediaAPI) (*uploadRequest, *util.JSONResponse) { if req.Method != "POST" { - return util.JSONResponse{ + return nil, &util.JSONResponse{ Code: 400, JSON: jsonerror.Unknown("HTTP request method must be POST."), } @@ -121,7 +116,7 @@ func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util. // just accepts a user id for auth userID, resErr := auth.VerifyAccessToken(req) if resErr != nil { - return *resErr + return nil, resErr } r := &uploadRequest{ @@ -136,7 +131,7 @@ func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util. } if resErr = r.Validate(cfg.MaxFileSizeBytes); resErr != nil { - return *resErr + return nil, resErr } if len(r.MediaMetadata.UploadName) > 0 { @@ -145,6 +140,101 @@ func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util. ) } + return r, nil +} + +// writeFileWithLimitAndHash reads data from an io.Reader and writes it to a temporary +// file named 'content' in the returned temporary directory. It only reads up to a limit of +// cfg.MaxFileSizeBytes from the io.Reader. The data written is hashed and the hashsum is +// returned. If any errors occur, a util.JSONResponse error is returned. +func writeFileWithLimitAndHash(r io.Reader, cfg *config.MediaAPI, logger *log.Entry, contentLength types.ContentLength) ([]byte, types.Path, *util.JSONResponse) { + writer, file, tmpDir, errorResponse := createTempFileWriter(cfg.AbsBasePath, logger) + if errorResponse != nil { + return nil, "", errorResponse + } + defer file.Close() + + // The limited reader restricts how many bytes are read from the body to the specified maximum bytes + // Note: the golang HTTP server closes the request body + limitedBody := io.LimitReader(r, int64(cfg.MaxFileSizeBytes)) + hasher := sha256.New() + reader := io.TeeReader(limitedBody, hasher) + + bytesWritten, err := io.Copy(writer, reader) + if err != nil { + logger.Warnf("Failed to copy %q\n", err) + removeDir(tmpDir, logger) + return nil, "", &util.JSONResponse{ + Code: 400, + JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload")), + } + } + + writer.Flush() + + if bytesWritten != int64(contentLength) { + logger.Warnf("Bytes uploaded (%v) != claimed Content-Length (%v)", bytesWritten, contentLength) + } + + return hasher.Sum(nil), tmpDir, nil +} + +// storeFileAndMetadata first moves a temporary file named content from tmpDir to its +// final path (see getPathFromMediaMetadata for details.) Once the file is moved, the +// metadata about the file is written into the media repository database. +// In case of any error, appropriate files and directories are cleaned up a +// util.JSONResponse error is returned. +func storeFileAndMetadata(tmpDir types.Path, absBasePath types.Path, mediaMetadata *types.MediaMetadata, db *storage.Database, logger *log.Entry) *util.JSONResponse { + finalPath, err := getPathFromMediaMetadata(mediaMetadata, absBasePath) + if err != nil { + logger.Warnf("Failed to get file path from metadata: %q\n", err) + removeDir(tmpDir, logger) + return &util.JSONResponse{ + Code: 400, + JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload")), + } + } + + err = moveFile( + types.Path(path.Join(string(tmpDir), "content")), + types.Path(finalPath), + ) + if err != nil { + logger.Warnf("Failed to move file to final destination: %q\n", err) + removeDir(tmpDir, logger) + return &util.JSONResponse{ + Code: 400, + JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload")), + } + } + + err = db.StoreMediaMetadata(mediaMetadata) + if err != nil { + logger.Warnf("Failed to store metadata: %q\n", err) + removeDir(types.Path(path.Dir(finalPath)), logger) + return &util.JSONResponse{ + Code: 400, + JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload")), + } + } + + return nil +} + +// Upload implements /upload +// +// This endpoint involves uploading potentially significant amounts of data to the homeserver. +// This implementation supports a configurable maximum file size limit in bytes. If a user tries to upload more than this, they will receive an error that their upload is too large. +// Uploaded files are processed piece-wise to avoid DoS attacks which would starve the server of memory. +// TODO: Requests time out if they have not received any data within the configured timeout period. +func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util.JSONResponse { + logger := util.GetLogger(req.Context()) + + r, resErr := parseAndValidateRequest(req, cfg) + if resErr != nil { + return *resErr + } + logger.WithFields(log.Fields{ "Origin": r.MediaMetadata.Origin, "UploadName": r.MediaMetadata.UploadName, @@ -153,35 +243,10 @@ func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util. "Content-Disposition": r.MediaMetadata.ContentDisposition, }).Info("Uploading file") - writer, file, tmpDir, errorResponse := createTempFileWriter(cfg.AbsBasePath, logger) - if errorResponse != nil { - return *errorResponse + hash, tmpDir, resErr := writeFileWithLimitAndHash(req.Body, cfg, logger, r.MediaMetadata.ContentLength) + if resErr != nil { + return *resErr } - defer file.Close() - - // The limited reader restricts how many bytes are read from the body to the specified maximum bytes - // Note: the golang HTTP server closes the request body - limitedBody := io.LimitReader(req.Body, int64(cfg.MaxFileSizeBytes)) - hasher := sha256.New() - reader := io.TeeReader(limitedBody, hasher) - - bytesWritten, err := io.Copy(writer, reader) - if err != nil { - logger.Warnf("Failed to copy %q\n", err) - removeDir(tmpDir, logger) - return util.JSONResponse{ - Code: 400, - JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload")), - } - } - - writer.Flush() - - if bytesWritten != int64(r.MediaMetadata.ContentLength) { - logger.Warnf("Bytes uploaded (%v) != claimed Content-Length (%v)", bytesWritten, r.MediaMetadata.ContentLength) - } - - hash := hasher.Sum(nil) r.MediaMetadata.MediaID = types.MediaID(base64.URLEncoding.EncodeToString(hash[:])) logger.WithFields(log.Fields{ @@ -210,37 +275,9 @@ func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util. // TODO: generate thumbnails - finalPath, err := getPathFromMediaMetadata(r.MediaMetadata, cfg.AbsBasePath) - if err != nil { - logger.Warnf("Failed to get file path from metadata: %q\n", err) - removeDir(tmpDir, logger) - return util.JSONResponse{ - Code: 400, - JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload")), - } - } - - err = moveFile( - types.Path(path.Join(string(tmpDir), "content")), - types.Path(finalPath), - ) - if err != nil { - logger.Warnf("Failed to move file to final destination: %q\n", err) - removeDir(tmpDir, logger) - return util.JSONResponse{ - Code: 400, - JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload")), - } - } - - err = db.StoreMediaMetadata(r.MediaMetadata) - if err != nil { - logger.Warnf("Failed to store metadata: %q\n", err) - removeDir(types.Path(path.Dir(finalPath)), logger) - return util.JSONResponse{ - Code: 400, - JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload")), - } + resErr = storeFileAndMetadata(tmpDir, cfg.AbsBasePath, r.MediaMetadata, db, logger) + if resErr != nil { + return *resErr } return util.JSONResponse{ From 5dd90fbff3669cf21bee18a20c3d4500c8177feb Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Thu, 18 May 2017 18:00:56 +0200 Subject: [PATCH 072/108] mediaapi/writers/fileutils: Make note of further file path validation todo --- .../matrix-org/dendrite/mediaapi/writers/fileutils.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go index d931707b0..9a72b378f 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go @@ -105,6 +105,11 @@ func getPathFromMediaMetadata(m *types.MediaMetadata, absBasePath types.Path) (s fileName, )) + // FIXME: + // - validate origin + // - sanitize mediaID (e.g. '/' characters and such) + // - validate length of origin and mediaID according to common filesystem limitations + // check if the absolute absBasePath is a prefix of the absolute filePath // if so, no directory escape has occurred and the filePath is valid // Note: absBasePath is already absolute From cdd4222e458378dc2fac12c200cd23e0f75722a8 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Fri, 19 May 2017 10:46:50 +0200 Subject: [PATCH 073/108] mediaapi/writers/fileutils: Return errors to log using request context --- .../dendrite/mediaapi/writers/download.go | 1 + .../dendrite/mediaapi/writers/fileutils.go | 19 +++++++------------ 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index c9c9be441..fb2e5c95b 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -388,6 +388,7 @@ func (r *downloadRequest) commitFileAndMetadata(tmpDir types.Path, absBasePath t types.Path(finalPath), ) if err != nil { + r.Logger.Warnf("Failed to move file to final destination: %q\n", err) tmpDirErr := os.RemoveAll(string(tmpDir)) if tmpDirErr != nil { r.Logger.Warnf("Failed to remove tmpDir (%v): %q\n", tmpDir, tmpDirErr) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go index 9a72b378f..69f7dd83c 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go @@ -33,13 +33,11 @@ import ( func createTempDir(baseDirectory types.Path) (types.Path, error) { baseTmpDir := path.Join(string(baseDirectory), "tmp") if err := os.MkdirAll(baseTmpDir, 0770); err != nil { - log.Printf("Failed to create base temp dir: %v\n", err) - return "", err + return "", fmt.Errorf("Failed to create base temp dir: %v", err) } tmpDir, err := ioutil.TempDir(baseTmpDir, "") if err != nil { - log.Printf("Failed to create temp dir: %v\n", err) - return "", err + return "", fmt.Errorf("Failed to create temp dir: %v", err) } return types.Path(tmpDir), nil } @@ -50,8 +48,7 @@ func createFileWriter(directory types.Path, filename types.Filename) (*bufio.Wri filePath := path.Join(string(directory), string(filename)) file, err := os.Create(filePath) if err != nil { - log.Printf("Failed to create file: %v\n", err) - return nil, nil, err + return nil, nil, fmt.Errorf("Failed to create file: %v", err) } return bufio.NewWriter(file), file, nil @@ -60,7 +57,7 @@ func createFileWriter(directory types.Path, filename types.Filename) (*bufio.Wri func createTempFileWriter(absBasePath types.Path, logger *log.Entry) (*bufio.Writer, *os.File, types.Path, *util.JSONResponse) { tmpDir, err := createTempDir(absBasePath) if err != nil { - logger.Infof("Failed to create temp dir %q\n", err) + logger.Warnf("Failed to create temp dir: %q\n", err) return nil, nil, "", &util.JSONResponse{ Code: 400, JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload")), @@ -68,7 +65,7 @@ func createTempFileWriter(absBasePath types.Path, logger *log.Entry) (*bufio.Wri } writer, tmpFile, err := createFileWriter(tmpDir, "content") if err != nil { - logger.Infof("Failed to create file writer %q\n", err) + logger.Warnf("Failed to create file writer: %q\n", err) return nil, nil, "", &util.JSONResponse{ Code: 400, JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload")), @@ -126,13 +123,11 @@ func moveFile(src types.Path, dst types.Path) error { err := os.MkdirAll(dstDir, 0770) if err != nil { - log.Printf("Failed to make directory: %q", err) - return err + return fmt.Errorf("Failed to make directory: %q", err) } err = os.Rename(string(src), string(dst)) if err != nil { - log.Printf("Failed to move directory: %q", err) - return err + return fmt.Errorf("Failed to move directory: %q", err) } return nil } From 3e5ac85ce1ca0ced4e639655f11107454c0cd4ce Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Fri, 19 May 2017 10:53:47 +0200 Subject: [PATCH 074/108] mediaapi/writers/upload: Clarify TODO comment --- src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go index 79e5707e9..943fbb659 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go @@ -226,7 +226,7 @@ func storeFileAndMetadata(tmpDir types.Path, absBasePath types.Path, mediaMetada // This endpoint involves uploading potentially significant amounts of data to the homeserver. // This implementation supports a configurable maximum file size limit in bytes. If a user tries to upload more than this, they will receive an error that their upload is too large. // Uploaded files are processed piece-wise to avoid DoS attacks which would starve the server of memory. -// TODO: Requests time out if they have not received any data within the configured timeout period. +// TODO: We should time out requests if they have not received any data within a configured timeout period. func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util.JSONResponse { logger := util.GetLogger(req.Context()) From f7d11f87c1d53bfbeb0f762bc7816c408c7be2a4 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Fri, 19 May 2017 10:59:12 +0200 Subject: [PATCH 075/108] mediaapi/writers/upload: Add comment about why we hash the file data --- .../matrix-org/dendrite/mediaapi/writers/upload.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go index 943fbb659..05db45630 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go @@ -157,6 +157,10 @@ func writeFileWithLimitAndHash(r io.Reader, cfg *config.MediaAPI, logger *log.En // The limited reader restricts how many bytes are read from the body to the specified maximum bytes // Note: the golang HTTP server closes the request body limitedBody := io.LimitReader(r, int64(cfg.MaxFileSizeBytes)) + // The file data is hashed and the hash is returned. The hash is useful as a + // method of deduplicating files to save storage, as well as a way to conduct + // integrity checks on the file data in the repository. The hash gets used as + // the MediaID. hasher := sha256.New() reader := io.TeeReader(limitedBody, hasher) @@ -243,6 +247,9 @@ func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util. "Content-Disposition": r.MediaMetadata.ContentDisposition, }).Info("Uploading file") + // The file data is hashed and the hash is used as the MediaID. The hash is useful as a + // method of deduplicating files to save storage, as well as a way to conduct + // integrity checks on the file data in the repository. hash, tmpDir, resErr := writeFileWithLimitAndHash(req.Body, cfg, logger, r.MediaMetadata.ContentLength) if resErr != nil { return *resErr From 12b0cdde06d5dde2a8ef38425e5062c366f387aa Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Fri, 19 May 2017 11:01:44 +0200 Subject: [PATCH 076/108] mediaapi/writers/upload: Explain the use of TeeReader --- src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go index 05db45630..a5f5894cd 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go @@ -162,6 +162,8 @@ func writeFileWithLimitAndHash(r io.Reader, cfg *config.MediaAPI, logger *log.En // integrity checks on the file data in the repository. The hash gets used as // the MediaID. hasher := sha256.New() + // A TeeReader is used to allow us to read from the limitedBody and simultaneously + // write to the hasher here and to the http.ResponseWriter via the io.Copy call below. reader := io.TeeReader(limitedBody, hasher) bytesWritten, err := io.Copy(writer, reader) From 5d5f15650070b92f66af35b4a2532098b56766fc Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Fri, 19 May 2017 11:34:40 +0200 Subject: [PATCH 077/108] mediaapi/writers/fileutils: Rework file path layout From experience with synapse, splitting the files into subdirectories based on the beginnings of the filenames helps with browsability. As we are using MediaIDs that are base64-encoded, each character has 64 possibilities, which is a nice upper limit on the number of subdirectories in a directory in terms of browsing. We have two levels of single character directories for added convenience, creating up to 4096 buckets. --- .../dendrite/mediaapi/writers/fileutils.go | 41 ++++++++++++------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go index 69f7dd83c..1a519e656 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go @@ -78,29 +78,42 @@ func createTempFileWriter(absBasePath types.Path, logger *log.Entry) (*bufio.Wri // based on its origin and mediaID // If a mediaID is too short, which could happen for other homeserver implementations, // place it into a short-id subdirectory of the origin directory -// If the mediaID is long enough, we split it in two using one part as a subdirectory -// name and the other part as the file name. This is to allow storage of more files due -// to filesystem limitations on the number of files in a directory. For example, if -// mediaID is 'qwerty', we create subdirectory called 'qwe' and place the file in 'qwe' -// and call it 'rty'. +// If the mediaID is long enough, we split it into pieces, creating up to 2 subdirectories +// for more manageable browsing and use the remainder as the file name. For example, if +// mediaID is 'qwerty', we create subdirectories 'q', 'w' within 'q' and place the file +// in 'q/w' calling it 'erty'. If the mediaID is shorter than 3 characters, the last +// character is the file name and the preceding character, if any, is a subdirectory name. func getPathFromMediaMetadata(m *types.MediaMetadata, absBasePath types.Path) (string, error) { - var subDir string - var fileName string + var subPath, fileName string - if len(m.MediaID) > 3 { - subDir = string(m.MediaID[:3]) - fileName = string(m.MediaID[3:]) - } else { - subDir = "short-id" + mediaIDLen := len(m.MediaID) + + switch { + case mediaIDLen < 1: + return "", fmt.Errorf("Invalid filePath (MediaID too short): %q", m.MediaID) + case mediaIDLen < 2: + subPath = "" fileName = string(m.MediaID) + case mediaIDLen < 3: + subPath = string(m.MediaID[0:1]) + fileName = string(m.MediaID[1:]) + default: + subPath = path.Join( + string(m.MediaID[0:1]), + string(m.MediaID[1:2]), + ) + fileName = string(m.MediaID[2:]) } filePath, err := filepath.Abs(path.Join( string(absBasePath), string(m.Origin), - subDir, + subPath, fileName, )) + if err != nil { + return "", fmt.Errorf("Unable to construct filePath: %q", err) + } // FIXME: // - validate origin @@ -110,7 +123,7 @@ func getPathFromMediaMetadata(m *types.MediaMetadata, absBasePath types.Path) (s // check if the absolute absBasePath is a prefix of the absolute filePath // if so, no directory escape has occurred and the filePath is valid // Note: absBasePath is already absolute - if err != nil || strings.HasPrefix(filePath, string(absBasePath)) == false { + if strings.HasPrefix(filePath, string(absBasePath)) == false { return "", fmt.Errorf("Invalid filePath (not within absBasePath %v): %v", absBasePath, filePath) } From 1242fdba22789ea276a3ccc8fd01a6cc6697f466 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Fri, 19 May 2017 12:21:10 +0200 Subject: [PATCH 078/108] mediaapi: Improve logging throughout, leveraging logrus features --- .../cmd/dendrite-media-api-server/main.go | 15 ++-- .../dendrite/mediaapi/writers/download.go | 70 ++++++++++++------- .../dendrite/mediaapi/writers/fileutils.go | 4 +- .../dendrite/mediaapi/writers/upload.go | 17 +++-- 4 files changed, 68 insertions(+), 38 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go b/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go index 1d827eba2..d7191508a 100644 --- a/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go +++ b/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go @@ -50,7 +50,7 @@ func main() { } absBasePath, err := filepath.Abs(basePath) if err != nil { - log.Panicf("BASE_PATH is invalid (must be able to make absolute): %v\n", err) + log.WithError(err).WithField("BASE_PATH", basePath).Panic("BASE_PATH is invalid (must be able to make absolute)") } if serverName == "" { @@ -59,7 +59,7 @@ func main() { maxFileSizeBytes, err := strconv.ParseInt(maxFileSizeBytesString, 10, 64) if err != nil { maxFileSizeBytes = 10 * 1024 * 1024 - log.Infof("Failed to parse MAX_FILE_SIZE_BYTES. Defaulting to %v bytes.", maxFileSizeBytes) + log.WithError(err).WithField("MAX_FILE_SIZE_BYTES", maxFileSizeBytesString).Warnf("Failed to parse MAX_FILE_SIZE_BYTES. Defaulting to %v bytes.", maxFileSizeBytes) } cfg := &config.MediaAPI{ @@ -71,10 +71,17 @@ func main() { db, err := storage.Open(cfg.DataSource) if err != nil { - log.Panicln("Failed to open database:", err) + log.WithError(err).Panic("Failed to open database") } - log.Info("Starting mediaapi") + log.WithFields(log.Fields{ + "BASE_PATH": absBasePath, + "BIND_ADDRESS": bindAddr, + "DATABASE": dataSource, + "LOG_DIR": logDir, + "MAX_FILE_SIZE_BYTES": maxFileSizeBytes, + "SERVER_NAME": serverName, + }).Info("Starting mediaapi") routing.Setup(http.DefaultServeMux, http.DefaultClient, cfg, db) log.Fatal(http.ListenAndServe(bindAddr, nil)) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index fb2e5c95b..54bb3d5b1 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -145,7 +145,7 @@ func Download(w http.ResponseWriter, req *http.Request, origin gomatrixserverlib r.Logger.WithFields(log.Fields{ "MediaID": r.MediaMetadata.MediaID, "Origin": r.MediaMetadata.Origin, - }).Warnf("Other goroutines are trying to download the remote file and failing.") + }).Warn("Other goroutines are trying to download the remote file and failing.") r.jsonErrorResponse(w, util.JSONResponse{ Code: 500, JSON: jsonerror.Unknown(fmt.Sprintf("File with media ID %q could not be downloaded from %q", r.MediaMetadata.MediaID, r.MediaMetadata.Origin)), @@ -156,7 +156,7 @@ func Download(w http.ResponseWriter, req *http.Request, origin gomatrixserverlib r.Logger.WithFields(log.Fields{ "Origin": r.MediaMetadata.Origin, "MediaID": r.MediaMetadata.MediaID, - }).Infof("Waiting for another goroutine to fetch the remote file.") + }).Info("Waiting for another goroutine to fetch the remote file.") activeRemoteRequestCondition.Wait() activeRemoteRequests.Unlock() } else { @@ -169,7 +169,7 @@ func Download(w http.ResponseWriter, req *http.Request, origin gomatrixserverlib r.respondFromRemoteFile(w, cfg.AbsBasePath, cfg.MaxFileSizeBytes, db, activeRemoteRequests) } else { // If we do not have a record and the origin is local, or if we have another error from the database, the file is not found - r.Logger.Warnln("Failed to look up file in database:", err) + r.Logger.WithError(err).Warn("Failed to look up file in database") r.jsonErrorResponse(w, util.JSONResponse{ Code: 404, JSON: jsonerror.NotFound(fmt.Sprintf("File with media ID %q does not exist", r.MediaMetadata.MediaID)), @@ -190,7 +190,7 @@ func (r *downloadRequest) respondFromLocalFile(w http.ResponseWriter, absBasePat filePath, err := getPathFromMediaMetadata(r.MediaMetadata, absBasePath) if err != nil { // FIXME: Remove erroneous file from database? - r.Logger.Warnln("Failed to get file path from metadata:", err) + r.Logger.WithError(err).Warn("Failed to get file path from metadata") r.jsonErrorResponse(w, util.JSONResponse{ Code: 404, JSON: jsonerror.NotFound(fmt.Sprintf("File with media ID %q does not exist", r.MediaMetadata.MediaID)), @@ -200,7 +200,7 @@ func (r *downloadRequest) respondFromLocalFile(w http.ResponseWriter, absBasePat file, err := os.Open(filePath) if err != nil { // FIXME: Remove erroneous file from database? - r.Logger.Warnln("Failed to open file:", err) + r.Logger.WithError(err).Warn("Failed to open file") r.jsonErrorResponse(w, util.JSONResponse{ Code: 404, JSON: jsonerror.NotFound(fmt.Sprintf("File with media ID %q does not exist", r.MediaMetadata.MediaID)), @@ -211,7 +211,7 @@ func (r *downloadRequest) respondFromLocalFile(w http.ResponseWriter, absBasePat stat, err := file.Stat() if err != nil { // FIXME: Remove erroneous file from database? - r.Logger.Warnln("Failed to stat file:", err) + r.Logger.WithError(err).Warn("Failed to stat file") r.jsonErrorResponse(w, util.JSONResponse{ Code: 404, JSON: jsonerror.NotFound(fmt.Sprintf("File with media ID %q does not exist", r.MediaMetadata.MediaID)), @@ -220,7 +220,10 @@ func (r *downloadRequest) respondFromLocalFile(w http.ResponseWriter, absBasePat } if r.MediaMetadata.ContentLength > 0 && int64(r.MediaMetadata.ContentLength) != stat.Size() { - r.Logger.Warnf("File size in database (%v) and on disk (%v) differ.", r.MediaMetadata.ContentLength, stat.Size()) + r.Logger.WithFields(log.Fields{ + "contentLength": r.MediaMetadata.ContentLength, + "fileSize": stat.Size(), + }).Warn("Content-Length in database and on-disk file size differ.") // FIXME: Remove erroneous file from database? } @@ -234,7 +237,7 @@ func (r *downloadRequest) respondFromLocalFile(w http.ResponseWriter, absBasePat w.Header().Set("Content-Security-Policy", contentSecurityPolicy) if bytesResponded, err := io.Copy(w, file); err != nil { - r.Logger.Warnf("Failed to copy from cache %v\n", err) + r.Logger.WithError(err).Warn("Failed to copy from cache") if bytesResponded == 0 { r.jsonErrorResponse(w, util.JSONResponse{ Code: 500, @@ -249,7 +252,7 @@ func (r *downloadRequest) respondFromLocalFile(w http.ResponseWriter, absBasePat func (r *downloadRequest) createRemoteRequest() (*http.Response, *util.JSONResponse) { urls := getMatrixUrls(r.MediaMetadata.Origin) - r.Logger.Printf("Connecting to remote %q\n", urls[0]) + r.Logger.WithField("URL", urls[0]).Info("Connecting to remote") remoteReqAddr := urls[0] + "/_matrix/media/v1/download/" + string(r.MediaMetadata.Origin) + "/" + string(r.MediaMetadata.MediaID) remoteReq, err := http.NewRequest("GET", remoteReqAddr, nil) @@ -272,9 +275,17 @@ func (r *downloadRequest) createRemoteRequest() (*http.Response, *util.JSONRespo } if resp.StatusCode != 200 { - r.Logger.Printf("Server responded with %d\n", resp.StatusCode) + r.Logger.WithFields(log.Fields{ + "Origin": r.MediaMetadata.Origin, + "MediaID": r.MediaMetadata.MediaID, + "StatusCode": resp.StatusCode, + }).Info("Received error response") if resp.StatusCode == 404 { - r.Logger.Warnln("Remote server says file does not exist") + r.Logger.WithFields(log.Fields{ + "Origin": r.MediaMetadata.Origin, + "MediaID": r.MediaMetadata.MediaID, + "StatusCode": resp.StatusCode, + }).Warn("Remote server says file does not exist") return nil, &util.JSONResponse{ Code: 404, JSON: jsonerror.NotFound(fmt.Sprintf("File with media ID %q does not exist", r.MediaMetadata.MediaID)), @@ -339,15 +350,24 @@ func copyToActiveAndPassive(r io.Reader, wActive io.Writer, wPassive io.Writer, } func (r *downloadRequest) closeConnection(w http.ResponseWriter) { - r.Logger.Println("Attempting to close the connection.") + r.Logger.WithFields(log.Fields{ + "Origin": r.MediaMetadata.Origin, + "MediaID": r.MediaMetadata.MediaID, + }).Info("Attempting to close the connection.") hijacker, ok := w.(http.Hijacker) if ok { connection, _, hijackErr := hijacker.Hijack() if hijackErr == nil { - r.Logger.Println("Closing") + r.Logger.WithFields(log.Fields{ + "Origin": r.MediaMetadata.Origin, + "MediaID": r.MediaMetadata.MediaID, + }).Info("Closing") connection.Close() } else { - r.Logger.Printf("Error trying to hijack: %v", hijackErr) + r.Logger.WithError(hijackErr).WithFields(log.Fields{ + "Origin": r.MediaMetadata.Origin, + "MediaID": r.MediaMetadata.MediaID, + }).Warn("Error trying to hijack and close connection") } } } @@ -370,15 +390,15 @@ func (r *downloadRequest) commitFileAndMetadata(tmpDir types.Path, absBasePath t "Content-Length": r.MediaMetadata.ContentLength, "Content-Type": r.MediaMetadata.ContentType, "Content-Disposition": r.MediaMetadata.ContentDisposition, - }).Infof("Storing file metadata to media repository database") + }).Info("Storing file metadata to media repository database") // The database is the source of truth so we need to have moved the file first finalPath, err := getPathFromMediaMetadata(r.MediaMetadata, absBasePath) if err != nil { - r.Logger.Warnf("Failed to get file path from metadata: %q\n", err) + r.Logger.WithError(err).Warn("Failed to get file path from metadata") tmpDirErr := os.RemoveAll(string(tmpDir)) if tmpDirErr != nil { - r.Logger.Warnf("Failed to remove tmpDir (%v): %q\n", tmpDir, tmpDirErr) + r.Logger.WithError(tmpDirErr).WithField("dir", tmpDir).Warn("Failed to remove tmpDir") } return updateActiveRemoteRequests } @@ -388,10 +408,10 @@ func (r *downloadRequest) commitFileAndMetadata(tmpDir types.Path, absBasePath t types.Path(finalPath), ) if err != nil { - r.Logger.Warnf("Failed to move file to final destination: %q\n", err) + r.Logger.WithError(err).WithField("dst", finalPath).Warn("Failed to move file to final destination") tmpDirErr := os.RemoveAll(string(tmpDir)) if tmpDirErr != nil { - r.Logger.Warnf("Failed to remove tmpDir (%v): %q\n", tmpDir, tmpDirErr) + r.Logger.WithError(tmpDirErr).WithField("dir", tmpDir).Warn("Failed to remove tmpDir") } return updateActiveRemoteRequests } @@ -410,7 +430,7 @@ func (r *downloadRequest) commitFileAndMetadata(tmpDir types.Path, absBasePath t finalDir := path.Dir(finalPath) finalDirErr := os.RemoveAll(finalDir) if finalDirErr != nil { - r.Logger.Warnf("Failed to remove finalDir (%v): %q\n", finalDir, finalDirErr) + r.Logger.WithError(finalDirErr).WithField("dir", finalDir).Warn("Failed to remove finalDir") } completeRemoteRequest(activeRemoteRequests, mxcURL) return updateActiveRemoteRequests @@ -418,15 +438,15 @@ func (r *downloadRequest) commitFileAndMetadata(tmpDir types.Path, absBasePath t r.Logger.WithFields(log.Fields{ "Origin": r.MediaMetadata.Origin, "MediaID": r.MediaMetadata.MediaID, - }).Infof("Signalling other goroutines waiting for us to fetch the file.") + }).Info("Signalling other goroutines waiting for us to fetch the file.") completeRemoteRequest(activeRemoteRequests, mxcURL) return updateActiveRemoteRequests } func (r *downloadRequest) respondFromRemoteFile(w http.ResponseWriter, absBasePath types.Path, maxFileSizeBytes types.ContentLength, db *storage.Database, activeRemoteRequests *types.ActiveRemoteRequests) { r.Logger.WithFields(log.Fields{ - "MediaID": r.MediaMetadata.MediaID, "Origin": r.MediaMetadata.Origin, + "MediaID": r.MediaMetadata.MediaID, }).Infof("Fetching remote file") mxcURL := "mxc://" + string(r.MediaMetadata.Origin) + "/" + string(r.MediaMetadata.MediaID) @@ -455,7 +475,7 @@ func (r *downloadRequest) respondFromRemoteFile(w http.ResponseWriter, absBasePa // get metadata from request and set metadata on response contentLength, err := strconv.ParseInt(resp.Header.Get("Content-Length"), 10, 64) if err != nil { - r.Logger.Warn("Failed to parse content length") + r.Logger.WithError(err).Warn("Failed to parse content length") } r.MediaMetadata.ContentLength = types.ContentLength(contentLength) @@ -505,10 +525,10 @@ func (r *downloadRequest) respondFromRemoteFile(w http.ResponseWriter, absBasePa if fetchError == errFileIsTooLarge { logFields["MaxFileSizeBytes"] = maxFileSizeBytes } - r.Logger.WithFields(logFields).Warnln(fetchError) + r.Logger.WithError(fetchError).WithFields(logFields).Warn("Error while fetching file") tmpDirErr := os.RemoveAll(string(tmpDir)) if tmpDirErr != nil { - r.Logger.Warnf("Failed to remove tmpDir (%v): %q\n", tmpDir, tmpDirErr) + r.Logger.WithError(tmpDirErr).WithField("dir", tmpDir).Warn("Failed to remove tmpDir") } // Note: if we have responded with any data in the body at all then we have already sent 200 OK and we can only abort at this point if bytesResponded < 1 { diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go index 1a519e656..2b6bf16be 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go @@ -57,7 +57,7 @@ func createFileWriter(directory types.Path, filename types.Filename) (*bufio.Wri func createTempFileWriter(absBasePath types.Path, logger *log.Entry) (*bufio.Writer, *os.File, types.Path, *util.JSONResponse) { tmpDir, err := createTempDir(absBasePath) if err != nil { - logger.Warnf("Failed to create temp dir: %q\n", err) + logger.WithError(err).WithField("dir", tmpDir).Warn("Failed to create temp dir") return nil, nil, "", &util.JSONResponse{ Code: 400, JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload")), @@ -65,7 +65,7 @@ func createTempFileWriter(absBasePath types.Path, logger *log.Entry) (*bufio.Wri } writer, tmpFile, err := createFileWriter(tmpDir, "content") if err != nil { - logger.Warnf("Failed to create file writer: %q\n", err) + logger.WithError(err).Warn("Failed to create file writer") return nil, nil, "", &util.JSONResponse{ Code: 400, JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload")), diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go index a5f5894cd..38e7c8a61 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go @@ -97,7 +97,7 @@ type uploadResponse struct { func removeDir(dir types.Path, logger *log.Entry) { dirErr := os.RemoveAll(string(dir)) if dirErr != nil { - logger.Warnf("Failed to remove directory (%v): %q\n", dir, dirErr) + logger.WithError(dirErr).WithField("dir", dir).Warn("Failed to remove directory") } } @@ -168,7 +168,7 @@ func writeFileWithLimitAndHash(r io.Reader, cfg *config.MediaAPI, logger *log.En bytesWritten, err := io.Copy(writer, reader) if err != nil { - logger.Warnf("Failed to copy %q\n", err) + logger.WithError(err).Warn("Failed to copy") removeDir(tmpDir, logger) return nil, "", &util.JSONResponse{ Code: 400, @@ -179,7 +179,10 @@ func writeFileWithLimitAndHash(r io.Reader, cfg *config.MediaAPI, logger *log.En writer.Flush() if bytesWritten != int64(contentLength) { - logger.Warnf("Bytes uploaded (%v) != claimed Content-Length (%v)", bytesWritten, contentLength) + logger.WithFields(log.Fields{ + "bytesWritten": bytesWritten, + "contentLength": contentLength, + }).Warn("Fewer bytes written than expected") } return hasher.Sum(nil), tmpDir, nil @@ -193,7 +196,7 @@ func writeFileWithLimitAndHash(r io.Reader, cfg *config.MediaAPI, logger *log.En func storeFileAndMetadata(tmpDir types.Path, absBasePath types.Path, mediaMetadata *types.MediaMetadata, db *storage.Database, logger *log.Entry) *util.JSONResponse { finalPath, err := getPathFromMediaMetadata(mediaMetadata, absBasePath) if err != nil { - logger.Warnf("Failed to get file path from metadata: %q\n", err) + logger.WithError(err).Warn("Failed to get file path from metadata") removeDir(tmpDir, logger) return &util.JSONResponse{ Code: 400, @@ -206,7 +209,7 @@ func storeFileAndMetadata(tmpDir types.Path, absBasePath types.Path, mediaMetada types.Path(finalPath), ) if err != nil { - logger.Warnf("Failed to move file to final destination: %q\n", err) + logger.WithError(err).WithField("dst", finalPath).Warn("Failed to move file to final destination") removeDir(tmpDir, logger) return &util.JSONResponse{ Code: 400, @@ -216,7 +219,7 @@ func storeFileAndMetadata(tmpDir types.Path, absBasePath types.Path, mediaMetada err = db.StoreMediaMetadata(mediaMetadata) if err != nil { - logger.Warnf("Failed to store metadata: %q\n", err) + logger.WithError(err).Warn("Failed to store metadata") removeDir(types.Path(path.Dir(finalPath)), logger) return &util.JSONResponse{ Code: 400, @@ -279,7 +282,7 @@ func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util. }, } } else if err != sql.ErrNoRows { - logger.Warnf("Failed to query database for %v: %q", r.MediaMetadata.MediaID, err) + logger.WithError(err).WithField("MediaID", r.MediaMetadata.MediaID).Warn("Failed to query database") } // TODO: generate thumbnails From 86cb8e32f7396ee97ea17e02cdd2241d3b5b5730 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Fri, 19 May 2017 12:26:27 +0200 Subject: [PATCH 079/108] mediaapi/writers/upload: Clarify order of moving file and storing metadata --- src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go index 38e7c8a61..917060368 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go @@ -190,7 +190,9 @@ func writeFileWithLimitAndHash(r io.Reader, cfg *config.MediaAPI, logger *log.En // storeFileAndMetadata first moves a temporary file named content from tmpDir to its // final path (see getPathFromMediaMetadata for details.) Once the file is moved, the -// metadata about the file is written into the media repository database. +// metadata about the file is written into the media repository database. This order +// of operations is important as it avoids metadata entering the database before the file +// is ready and if we fail to move the file, it never gets added to the database. // In case of any error, appropriate files and directories are cleaned up a // util.JSONResponse error is returned. func storeFileAndMetadata(tmpDir types.Path, absBasePath types.Path, mediaMetadata *types.MediaMetadata, db *storage.Database, logger *log.Entry) *util.JSONResponse { From 318531d01120e0d8d34afde02328e34bd6e6d1c9 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Fri, 19 May 2017 12:27:55 +0200 Subject: [PATCH 080/108] mediaapi/writers/upload: Make assign in-line in if --- src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go index 917060368..f5c8322fd 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go @@ -219,8 +219,7 @@ func storeFileAndMetadata(tmpDir types.Path, absBasePath types.Path, mediaMetada } } - err = db.StoreMediaMetadata(mediaMetadata) - if err != nil { + if err = db.StoreMediaMetadata(mediaMetadata); err != nil { logger.WithError(err).Warn("Failed to store metadata") removeDir(types.Path(path.Dir(finalPath)), logger) return &util.JSONResponse{ From 9af66a196315ecf7cb963aae32bbda4471891a5d Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Mon, 22 May 2017 10:13:37 +0200 Subject: [PATCH 081/108] mediaapi/writers: Reuse removeDir throughout the package --- .../dendrite/mediaapi/writers/download.go | 20 ++++--------------- .../dendrite/mediaapi/writers/fileutils.go | 7 +++++++ .../dendrite/mediaapi/writers/upload.go | 7 ------- 3 files changed, 11 insertions(+), 23 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index 54bb3d5b1..30d262a46 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -396,10 +396,7 @@ func (r *downloadRequest) commitFileAndMetadata(tmpDir types.Path, absBasePath t finalPath, err := getPathFromMediaMetadata(r.MediaMetadata, absBasePath) if err != nil { r.Logger.WithError(err).Warn("Failed to get file path from metadata") - tmpDirErr := os.RemoveAll(string(tmpDir)) - if tmpDirErr != nil { - r.Logger.WithError(tmpDirErr).WithField("dir", tmpDir).Warn("Failed to remove tmpDir") - } + removeDir(tmpDir, r.Logger) return updateActiveRemoteRequests } @@ -409,10 +406,7 @@ func (r *downloadRequest) commitFileAndMetadata(tmpDir types.Path, absBasePath t ) if err != nil { r.Logger.WithError(err).WithField("dst", finalPath).Warn("Failed to move file to final destination") - tmpDirErr := os.RemoveAll(string(tmpDir)) - if tmpDirErr != nil { - r.Logger.WithError(tmpDirErr).WithField("dir", tmpDir).Warn("Failed to remove tmpDir") - } + removeDir(tmpDir, r.Logger) return updateActiveRemoteRequests } @@ -428,10 +422,7 @@ func (r *downloadRequest) commitFileAndMetadata(tmpDir types.Path, absBasePath t err = db.StoreMediaMetadata(r.MediaMetadata) if err != nil { finalDir := path.Dir(finalPath) - finalDirErr := os.RemoveAll(finalDir) - if finalDirErr != nil { - r.Logger.WithError(finalDirErr).WithField("dir", finalDir).Warn("Failed to remove finalDir") - } + removeDir(types.Path(finalDir), r.Logger) completeRemoteRequest(activeRemoteRequests, mxcURL) return updateActiveRemoteRequests } @@ -526,10 +517,7 @@ func (r *downloadRequest) respondFromRemoteFile(w http.ResponseWriter, absBasePa logFields["MaxFileSizeBytes"] = maxFileSizeBytes } r.Logger.WithError(fetchError).WithFields(logFields).Warn("Error while fetching file") - tmpDirErr := os.RemoveAll(string(tmpDir)) - if tmpDirErr != nil { - r.Logger.WithError(tmpDirErr).WithField("dir", tmpDir).Warn("Failed to remove tmpDir") - } + removeDir(tmpDir, r.Logger) // Note: if we have responded with any data in the body at all then we have already sent 200 OK and we can only abort at this point if bytesResponded < 1 { r.jsonErrorResponse(w, util.JSONResponse{ diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go index 2b6bf16be..d158a1029 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go @@ -29,6 +29,13 @@ import ( "github.com/matrix-org/util" ) +func removeDir(dir types.Path, logger *log.Entry) { + dirErr := os.RemoveAll(string(dir)) + if dirErr != nil { + logger.WithError(dirErr).WithField("dir", dir).Warn("Failed to remove directory") + } +} + // createTempDir creates a tmp/ directory within baseDirectory and returns its path func createTempDir(baseDirectory types.Path) (types.Path, error) { baseTmpDir := path.Join(string(baseDirectory), "tmp") diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go index f5c8322fd..132af87fd 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go @@ -94,13 +94,6 @@ type uploadResponse struct { ContentURI string `json:"content_uri"` } -func removeDir(dir types.Path, logger *log.Entry) { - dirErr := os.RemoveAll(string(dir)) - if dirErr != nil { - logger.WithError(dirErr).WithField("dir", dir).Warn("Failed to remove directory") - } -} - // parseAndValidateRequest parses the incoming upload request to validate and extract // all the metadata about the media being uploaded. Returns either an uploadRequest or // an error formatted as a util.JSONResponse From 370cb74d2d330241186a8075c03dfea213373e56 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Mon, 22 May 2017 10:19:52 +0200 Subject: [PATCH 082/108] mediaapi/writers: Reuse same writer code for upload and download This now calculates a hash for downloads from remote servers as well as uploads to this server. --- .../dendrite/mediaapi/writers/download.go | 75 ++-------- .../dendrite/mediaapi/writers/fileutils.go | 129 ++++++++++++++++-- .../dendrite/mediaapi/writers/upload.go | 73 +++------- 3 files changed, 144 insertions(+), 133 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index 30d262a46..58c9b3adc 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -78,11 +78,6 @@ func (r *downloadRequest) jsonErrorResponse(w http.ResponseWriter, res util.JSON w.Write(resBytes) } -var errFileIsTooLarge = fmt.Errorf("file is too large") -var errRead = fmt.Errorf("failed to read response from remote server") -var errResponse = fmt.Errorf("failed to write file data to response body") -var errWrite = fmt.Errorf("failed to write file to disk") - var nTries = 5 // Download implements /download @@ -300,55 +295,6 @@ func (r *downloadRequest) createRemoteRequest() (*http.Response, *util.JSONRespo return resp, nil } -// copyToActiveAndPassive works like io.Copy except it copies from the reader to both of the writers -// If there is an error with the reader or the active writer, that is considered an error -// If there is an error with the passive writer, that is non-critical and copying continues -// maxFileSizeBytes limits the amount of data written to the passive writer -func copyToActiveAndPassive(r io.Reader, wActive io.Writer, wPassive io.Writer, maxFileSizeBytes types.ContentLength, mediaMetadata *types.MediaMetadata) (int64, int64, error) { - var bytesResponded, bytesWritten int64 = 0, 0 - var copyError error - // Note: the buffer size is the same as is used in io.Copy() - buffer := make([]byte, 32*1024) - for { - // read from remote request's response body - bytesRead, readErr := r.Read(buffer) - if bytesRead > 0 { - // write to client request's response body - bytesTemp, respErr := wActive.Write(buffer[:bytesRead]) - if bytesTemp != bytesRead || (respErr != nil && respErr != io.EOF) { - copyError = errResponse - break - } - bytesResponded += int64(bytesTemp) - if copyError == nil { - // Note: if we get here then copyError != errFileIsTooLarge && copyError != errWrite - // as if copyError == errResponse || copyError == errWrite then we would have broken - // out of the loop and there are no other cases - // if larger than maxFileSizeBytes then stop writing to disk and discard cached file - if bytesWritten+int64(len(buffer)) > int64(maxFileSizeBytes) { - copyError = errFileIsTooLarge - } else { - // write to disk - bytesTemp, writeErr := wPassive.Write(buffer[:bytesRead]) - if writeErr != nil && writeErr != io.EOF { - copyError = errWrite - } else { - bytesWritten += int64(bytesTemp) - } - } - } - } - if readErr != nil { - if readErr != io.EOF { - copyError = errRead - } - break - } - } - - return bytesResponded, bytesWritten, copyError -} - func (r *downloadRequest) closeConnection(w http.ResponseWriter) { r.Logger.WithFields(log.Fields{ "Origin": r.MediaMetadata.Origin, @@ -489,14 +435,6 @@ func (r *downloadRequest) respondFromRemoteFile(w http.ResponseWriter, absBasePa " object-src 'self';" w.Header().Set("Content-Security-Policy", contentSecurityPolicy) - // create the temporary file writer - tmpFileWriter, tmpFile, tmpDir, errorResponse := createTempFileWriter(absBasePath, r.Logger) - if errorResponse != nil { - r.jsonErrorResponse(w, *errorResponse) - return - } - defer tmpFile.Close() - // read the remote request's response body // simultaneously write it to the incoming request's response body and the temporary file r.Logger.WithFields(log.Fields{ @@ -504,19 +442,22 @@ func (r *downloadRequest) respondFromRemoteFile(w http.ResponseWriter, absBasePa "Origin": r.MediaMetadata.Origin, }).Infof("Proxying and caching remote file") + // The file data is hashed but is NOT used as the MediaID, unlike in Upload. The hash is useful as a + // method of deduplicating files to save storage, as well as a way to conduct + // integrity checks on the file data in the repository. // bytesResponded is the total number of bytes written to the response to the client request // bytesWritten is the total number of bytes written to disk - bytesResponded, bytesWritten, fetchError := copyToActiveAndPassive(resp.Body, w, tmpFileWriter, maxFileSizeBytes, r.MediaMetadata) - tmpFileWriter.Flush() - if fetchError != nil { + hash, bytesResponded, bytesWritten, tmpDir, copyError := readAndHashAndWriteWithLimit(resp.Body, maxFileSizeBytes, absBasePath, w) + + if copyError != nil { logFields := log.Fields{ "MediaID": r.MediaMetadata.MediaID, "Origin": r.MediaMetadata.Origin, } - if fetchError == errFileIsTooLarge { + if copyError == errFileIsTooLarge { logFields["MaxFileSizeBytes"] = maxFileSizeBytes } - r.Logger.WithError(fetchError).WithFields(logFields).Warn("Error while fetching file") + r.Logger.WithError(copyError).WithFields(logFields).Warn("Error while transferring file") removeDir(tmpDir, r.Logger) // Note: if we have responded with any data in the body at all then we have already sent 200 OK and we can only abort at this point if bytesResponded < 1 { diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go index d158a1029..a11176e97 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go @@ -16,17 +16,20 @@ package writers import ( "bufio" + "crypto/sha256" + "encoding/base64" "fmt" + "hash" + "io" "io/ioutil" + "net/http" "os" "path" "path/filepath" "strings" log "github.com/Sirupsen/logrus" - "github.com/matrix-org/dendrite/clientapi/jsonerror" "github.com/matrix-org/dendrite/mediaapi/types" - "github.com/matrix-org/util" ) func removeDir(dir types.Path, logger *log.Entry) { @@ -61,26 +64,126 @@ func createFileWriter(directory types.Path, filename types.Filename) (*bufio.Wri return bufio.NewWriter(file), file, nil } -func createTempFileWriter(absBasePath types.Path, logger *log.Entry) (*bufio.Writer, *os.File, types.Path, *util.JSONResponse) { +func createTempFileWriter(absBasePath types.Path) (*bufio.Writer, *os.File, types.Path, error) { tmpDir, err := createTempDir(absBasePath) if err != nil { - logger.WithError(err).WithField("dir", tmpDir).Warn("Failed to create temp dir") - return nil, nil, "", &util.JSONResponse{ - Code: 400, - JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload")), - } + return nil, nil, "", fmt.Errorf("Failed to create temp dir: %q", err) } writer, tmpFile, err := createFileWriter(tmpDir, "content") if err != nil { - logger.WithError(err).Warn("Failed to create file writer") - return nil, nil, "", &util.JSONResponse{ - Code: 400, - JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload")), - } + return nil, nil, "", fmt.Errorf("Failed to create file writer: %q", err) } return writer, tmpFile, tmpDir, nil } +var errFileIsTooLarge = fmt.Errorf("file is too large") +var errRead = fmt.Errorf("failed to read response from remote server") +var errResponse = fmt.Errorf("failed to write file data to response body") +var errHash = fmt.Errorf("failed to hash file data") +var errWrite = fmt.Errorf("failed to write file to disk") + +// writeToResponse takes bytesToWrite bytes from buffer and writes them to respWriter +// Returns bytes written and an error. In case of error, or if there is no respWriter, +// the number of bytes written will be 0. +func writeToResponse(respWriter http.ResponseWriter, buffer []byte, bytesToWrite int) (int64, error) { + if respWriter != nil { + bytesWritten, respErr := respWriter.Write(buffer[:bytesToWrite]) + if bytesWritten != bytesToWrite || (respErr != nil && respErr != io.EOF) { + return 0, errResponse + } + return int64(bytesWritten), nil + } + return 0, nil +} + +// writeToDiskAndHasher takes bytesToWrite bytes from buffer and writes them to tmpFileWriter and hasher. +// Returns bytes written and an error. In case of error, including if writing would exceed maxFileSizeBytes, +// the number of bytes written will be 0. +func writeToDiskAndHasher(tmpFileWriter *bufio.Writer, hasher hash.Hash, bytesWritten int64, maxFileSizeBytes types.ContentLength, buffer []byte, bytesToWrite int) (int64, error) { + // if larger than maxFileSizeBytes then stop writing to disk and discard cached file + if bytesWritten+int64(bytesToWrite) > int64(maxFileSizeBytes) { + return 0, errFileIsTooLarge + } + // write to hasher and to disk + bytesTemp, writeErr := tmpFileWriter.Write(buffer[:bytesToWrite]) + bytesHashed, hashErr := hasher.Write(buffer[:bytesToWrite]) + if writeErr != nil && writeErr != io.EOF || bytesTemp != bytesToWrite || bytesTemp != bytesHashed { + return 0, errWrite + } else if hashErr != nil && hashErr != io.EOF { + return 0, errHash + } + return int64(bytesTemp), nil +} + +// readAndHashAndWriteWithLimit works like io.Copy except it copies from the reqReader to the +// optionally-supplied respWriter and a temporary file named 'content' using a bufio.Writer. +// The data written to disk is hashed using the SHA-256 algorithm. +// If there is an error with the reqReader or the respWriter, that is considered an error. +// If there is an error with the hasher or tmpFileWriter, that is non-critical and copying +// to the respWriter continues. +// maxFileSizeBytes limits the amount of data written to disk and the hasher. +// If a respWriter is provided, all the data will be proxied from the reqReader to +// the respWriter, regardless of errors or limits on writing to disk. +// Returns all of the hash sum, bytes written to disk, and temporary directory path, or an error. +func readAndHashAndWriteWithLimit(reqReader io.Reader, maxFileSizeBytes types.ContentLength, absBasePath types.Path, respWriter http.ResponseWriter) (types.Base64Hash, types.ContentLength, types.ContentLength, types.Path, error) { + // create the temporary file writer + tmpFileWriter, tmpFile, tmpDir, err := createTempFileWriter(absBasePath) + if err != nil { + return "", -1, -1, "", err + } + defer tmpFile.Close() + + // The file data is hashed and the hash is returned. The hash is useful as a + // method of deduplicating files to save storage, as well as a way to conduct + // integrity checks on the file data in the repository. The hash gets used as + // the MediaID. + hasher := sha256.New() + + // bytesResponded is the total number of bytes written to the response to the client request + // bytesWritten is the total number of bytes written to disk + var bytesResponded, bytesWritten int64 = 0, 0 + var bytesTemp int64 + var copyError error + // Note: the buffer size is the same as is used in io.Copy() + buffer := make([]byte, 32*1024) + for { + // read from remote request's response body + bytesRead, readErr := reqReader.Read(buffer) + if bytesRead > 0 { + // Note: This code allows proxying files larger than maxFileSizeBytes! + // write to client request's response body + bytesTemp, copyError = writeToResponse(respWriter, buffer, bytesRead) + bytesResponded += bytesTemp + if copyError == nil { + // Note: if we get here then copyError != errFileIsTooLarge && copyError != errWrite + // as if copyError == errResponse || copyError == errWrite then we would have broken + // out of the loop and there are no other cases + bytesTemp, copyError = writeToDiskAndHasher(tmpFileWriter, hasher, bytesWritten, maxFileSizeBytes, buffer, (bytesRead)) + bytesWritten += bytesTemp + // If we do not have a respWriter then we are only writing to the hasher and tmpFileWriter. In that case, if we get an error, we need to break. + if respWriter == nil && copyError != nil { + break + } + } + } + if readErr != nil { + if readErr != io.EOF { + copyError = errRead + } + break + } + } + + if copyError != nil { + return "", -1, -1, "", copyError + } + + tmpFileWriter.Flush() + + hash := hasher.Sum(nil) + return types.Base64Hash(base64.URLEncoding.EncodeToString(hash[:])), types.ContentLength(bytesResponded), types.ContentLength(bytesWritten), tmpDir, nil +} + // getPathFromMediaMetadata validates and constructs the on-disk path to the media // based on its origin and mediaID // If a mediaID is too short, which could happen for other homeserver implementations, diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go index 132af87fd..dc886353f 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go @@ -15,14 +15,10 @@ package writers import ( - "crypto/sha256" "database/sql" - "encoding/base64" "fmt" - "io" "net/http" "net/url" - "os" "path" "strings" @@ -136,51 +132,6 @@ func parseAndValidateRequest(req *http.Request, cfg *config.MediaAPI) (*uploadRe return r, nil } -// writeFileWithLimitAndHash reads data from an io.Reader and writes it to a temporary -// file named 'content' in the returned temporary directory. It only reads up to a limit of -// cfg.MaxFileSizeBytes from the io.Reader. The data written is hashed and the hashsum is -// returned. If any errors occur, a util.JSONResponse error is returned. -func writeFileWithLimitAndHash(r io.Reader, cfg *config.MediaAPI, logger *log.Entry, contentLength types.ContentLength) ([]byte, types.Path, *util.JSONResponse) { - writer, file, tmpDir, errorResponse := createTempFileWriter(cfg.AbsBasePath, logger) - if errorResponse != nil { - return nil, "", errorResponse - } - defer file.Close() - - // The limited reader restricts how many bytes are read from the body to the specified maximum bytes - // Note: the golang HTTP server closes the request body - limitedBody := io.LimitReader(r, int64(cfg.MaxFileSizeBytes)) - // The file data is hashed and the hash is returned. The hash is useful as a - // method of deduplicating files to save storage, as well as a way to conduct - // integrity checks on the file data in the repository. The hash gets used as - // the MediaID. - hasher := sha256.New() - // A TeeReader is used to allow us to read from the limitedBody and simultaneously - // write to the hasher here and to the http.ResponseWriter via the io.Copy call below. - reader := io.TeeReader(limitedBody, hasher) - - bytesWritten, err := io.Copy(writer, reader) - if err != nil { - logger.WithError(err).Warn("Failed to copy") - removeDir(tmpDir, logger) - return nil, "", &util.JSONResponse{ - Code: 400, - JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload")), - } - } - - writer.Flush() - - if bytesWritten != int64(contentLength) { - logger.WithFields(log.Fields{ - "bytesWritten": bytesWritten, - "contentLength": contentLength, - }).Warn("Fewer bytes written than expected") - } - - return hasher.Sum(nil), tmpDir, nil -} - // storeFileAndMetadata first moves a temporary file named content from tmpDir to its // final path (see getPathFromMediaMetadata for details.) Once the file is moved, the // metadata about the file is written into the media repository database. This order @@ -249,11 +200,27 @@ func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util. // The file data is hashed and the hash is used as the MediaID. The hash is useful as a // method of deduplicating files to save storage, as well as a way to conduct // integrity checks on the file data in the repository. - hash, tmpDir, resErr := writeFileWithLimitAndHash(req.Body, cfg, logger, r.MediaMetadata.ContentLength) - if resErr != nil { - return *resErr + // bytesWritten is the total number of bytes written to disk + hash, _, bytesWritten, tmpDir, copyError := readAndHashAndWriteWithLimit(req.Body, cfg.MaxFileSizeBytes, cfg.AbsBasePath, nil) + + if copyError != nil { + logFields := log.Fields{ + "Origin": r.MediaMetadata.Origin, + "MediaID": r.MediaMetadata.MediaID, + } + if copyError == errFileIsTooLarge { + logFields["MaxFileSizeBytes"] = cfg.MaxFileSizeBytes + } + logger.WithError(copyError).WithFields(logFields).Warn("Error while transferring file") + removeDir(tmpDir, logger) + return util.JSONResponse{ + Code: 400, + JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload")), + } } - r.MediaMetadata.MediaID = types.MediaID(base64.URLEncoding.EncodeToString(hash[:])) + + r.MediaMetadata.ContentLength = bytesWritten + r.MediaMetadata.MediaID = types.MediaID(hash) logger.WithFields(log.Fields{ "MediaID": r.MediaMetadata.MediaID, From 5536fec902948e5c184f02497d87115894e10c6a Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Mon, 22 May 2017 10:24:03 +0200 Subject: [PATCH 083/108] mediaapi/writers: Add base64hash to media_repository table A SHA-256 hash sum in golang base64 URLEncoding format (contains only URL-safe characters) is now calculated and stored for every file transferred to this server. Uploads to the server use this hash as the MediaID. Downloads from remote servers retain their MediaID from the remote server, but can use the hash for local deduplication and integrity checking purposes. --- .../mediaapi/storage/media_repository_table.go | 10 +++++++--- .../matrix-org/dendrite/mediaapi/types/types.go | 4 ++++ .../matrix-org/dendrite/mediaapi/writers/download.go | 2 ++ .../matrix-org/dendrite/mediaapi/writers/upload.go | 2 ++ 4 files changed, 15 insertions(+), 3 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/storage/media_repository_table.go b/src/github.com/matrix-org/dendrite/mediaapi/storage/media_repository_table.go index 31846cf48..c19a9d9b3 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/storage/media_repository_table.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/storage/media_repository_table.go @@ -42,6 +42,8 @@ CREATE TABLE IF NOT EXISTS media_repository ( creation_ts BIGINT NOT NULL, -- The file name with which the media was uploaded. upload_name TEXT NOT NULL, + -- A golang base64 URLEncoding string representation of a SHA-256 hash sum of the file data. + base64hash TEXT NOT NULL, -- The user who uploaded the file. Should be a Matrix user ID. user_id TEXT NOT NULL ); @@ -49,12 +51,12 @@ CREATE UNIQUE INDEX IF NOT EXISTS media_repository_index ON media_repository (me ` const insertMediaSQL = ` -INSERT INTO media_repository (media_id, media_origin, content_type, content_disposition, content_length, creation_ts, upload_name, user_id) - VALUES ($1, $2, $3, $4, $5, $6, $7, $8) +INSERT INTO media_repository (media_id, media_origin, content_type, content_disposition, content_length, creation_ts, upload_name, base64hash, user_id) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9) ` const selectMediaSQL = ` -SELECT content_type, content_disposition, content_length, creation_ts, upload_name, user_id FROM media_repository WHERE media_id = $1 AND media_origin = $2 +SELECT content_type, content_disposition, content_length, creation_ts, upload_name, base64hash, user_id FROM media_repository WHERE media_id = $1 AND media_origin = $2 ` type mediaStatements struct { @@ -84,6 +86,7 @@ func (s *mediaStatements) insertMedia(mediaMetadata *types.MediaMetadata) error mediaMetadata.ContentLength, mediaMetadata.CreationTimestamp, mediaMetadata.UploadName, + mediaMetadata.Base64Hash, mediaMetadata.UserID, ) return err @@ -102,6 +105,7 @@ func (s *mediaStatements) selectMedia(mediaID types.MediaID, mediaOrigin gomatri &mediaMetadata.ContentLength, &mediaMetadata.CreationTimestamp, &mediaMetadata.UploadName, + &mediaMetadata.Base64Hash, &mediaMetadata.UserID, ) return &mediaMetadata, err diff --git a/src/github.com/matrix-org/dendrite/mediaapi/types/types.go b/src/github.com/matrix-org/dendrite/mediaapi/types/types.go index 0da5b1017..bef86adcc 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/types/types.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/types/types.go @@ -32,6 +32,9 @@ type ContentType string // Filename is a string representing the name of a file type Filename string +// Base64Hash is a base64 URLEncoding string representation of a SHA-256 hash sum +type Base64Hash string + // Path is an absolute or relative UNIX filesystem path type Path string @@ -56,6 +59,7 @@ type MediaMetadata struct { ContentLength ContentLength CreationTimestamp UnixMs UploadName Filename + Base64Hash Base64Hash UserID MatrixUserID } diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index 58c9b3adc..4dec2452b 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -332,6 +332,7 @@ func (r *downloadRequest) commitFileAndMetadata(tmpDir types.Path, absBasePath t r.Logger.WithFields(log.Fields{ "MediaID": r.MediaMetadata.MediaID, "Origin": r.MediaMetadata.Origin, + "Base64Hash": r.MediaMetadata.Base64Hash, "UploadName": r.MediaMetadata.UploadName, "Content-Length": r.MediaMetadata.ContentLength, "Content-Type": r.MediaMetadata.ContentType, @@ -483,6 +484,7 @@ func (r *downloadRequest) respondFromRemoteFile(w http.ResponseWriter, absBasePa // request's response. bytesWritten is therefore used as it is what would be sent to clients when reading from the local // file. r.MediaMetadata.ContentLength = types.ContentLength(bytesWritten) + r.MediaMetadata.Base64Hash = hash r.MediaMetadata.UserID = types.MatrixUserID("@:" + string(r.MediaMetadata.Origin)) updateActiveRemoteRequests = r.commitFileAndMetadata(tmpDir, absBasePath, activeRemoteRequests, db, mxcURL) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go index dc886353f..92a80f2e2 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go @@ -220,11 +220,13 @@ func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util. } r.MediaMetadata.ContentLength = bytesWritten + r.MediaMetadata.Base64Hash = hash r.MediaMetadata.MediaID = types.MediaID(hash) logger.WithFields(log.Fields{ "MediaID": r.MediaMetadata.MediaID, "Origin": r.MediaMetadata.Origin, + "Base64Hash": r.MediaMetadata.Base64Hash, "UploadName": r.MediaMetadata.UploadName, "Content-Length": r.MediaMetadata.ContentLength, "Content-Type": r.MediaMetadata.ContentType, From 5f604cc41f97351e6635767a153d7279cbe6dd86 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Mon, 22 May 2017 10:26:30 +0200 Subject: [PATCH 084/108] mediaapi/writers/upload: Infof -> Info as no formatting in string --- src/github.com/matrix-org/dendrite/mediaapi/writers/download.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index 4dec2452b..13e24f2ff 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -441,7 +441,7 @@ func (r *downloadRequest) respondFromRemoteFile(w http.ResponseWriter, absBasePa r.Logger.WithFields(log.Fields{ "MediaID": r.MediaMetadata.MediaID, "Origin": r.MediaMetadata.Origin, - }).Infof("Proxying and caching remote file") + }).Info("Proxying and caching remote file") // The file data is hashed but is NOT used as the MediaID, unlike in Upload. The hash is useful as a // method of deduplicating files to save storage, as well as a way to conduct From 3cea54db0bcf568b131432a45b9ff51fe3f97689 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Mon, 22 May 2017 10:27:02 +0200 Subject: [PATCH 085/108] mediaapi/writers/download: Simplify user error message They already known the origin and media ID so it is redundant. --- src/github.com/matrix-org/dendrite/mediaapi/writers/download.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index 13e24f2ff..4af7b47b8 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -464,7 +464,7 @@ func (r *downloadRequest) respondFromRemoteFile(w http.ResponseWriter, absBasePa if bytesResponded < 1 { r.jsonErrorResponse(w, util.JSONResponse{ Code: 502, - JSON: jsonerror.Unknown(fmt.Sprintf("File with media ID %q could not be downloaded from %q", r.MediaMetadata.MediaID, r.MediaMetadata.Origin)), + JSON: jsonerror.Unknown(fmt.Sprintf("File could not be downloaded from remote server")), }) } else { // We attempt to bluntly close the connection because that is the From 8f7ce9adc044747c3a94d4e6fdb4319d1d6c8a9c Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Mon, 22 May 2017 10:27:48 +0200 Subject: [PATCH 086/108] mediaapi/writers/upload: Add note about Content-Disposition override --- src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go index 92a80f2e2..debed48d3 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go @@ -123,6 +123,8 @@ func parseAndValidateRequest(req *http.Request, cfg *config.MediaAPI) (*uploadRe return nil, resErr } + // FIXME: do we want to always override ContentDisposition here or only if + // there is no Content-Disposition header set? if len(r.MediaMetadata.UploadName) > 0 { r.MediaMetadata.ContentDisposition = types.ContentDisposition( "inline; filename*=utf-8''" + url.PathEscape(string(r.MediaMetadata.UploadName)), From 01c565ddfbcae17a4229903c2a6a9983a2909ff0 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Mon, 22 May 2017 10:34:56 +0200 Subject: [PATCH 087/108] mediaapi/writers/fileutils: Store files based on hash, not media ID This avoids having to sanitize the origin and media ID for files from remote servers. It also allows us to deduplicate files across all files uploaded to this homeserver or downloaded from remote homeservers. --- .../dendrite/mediaapi/writers/fileutils.go | 30 ++++++++----------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go index a11176e97..0517bf7ca 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go @@ -196,28 +196,29 @@ func readAndHashAndWriteWithLimit(reqReader io.Reader, maxFileSizeBytes types.Co func getPathFromMediaMetadata(m *types.MediaMetadata, absBasePath types.Path) (string, error) { var subPath, fileName string - mediaIDLen := len(m.MediaID) + hashLen := len(m.Base64Hash) switch { - case mediaIDLen < 1: - return "", fmt.Errorf("Invalid filePath (MediaID too short): %q", m.MediaID) - case mediaIDLen < 2: + case hashLen < 1: + return "", fmt.Errorf("Invalid filePath (Base64Hash too short): %q", m.Base64Hash) + case hashLen > 255: + return "", fmt.Errorf("Invalid filePath (Base64Hash too long - max 255 characters): %q", m.Base64Hash) + case hashLen < 2: subPath = "" - fileName = string(m.MediaID) - case mediaIDLen < 3: - subPath = string(m.MediaID[0:1]) - fileName = string(m.MediaID[1:]) + fileName = string(m.Base64Hash) + case hashLen < 3: + subPath = string(m.Base64Hash[0:1]) + fileName = string(m.Base64Hash[1:]) default: subPath = path.Join( - string(m.MediaID[0:1]), - string(m.MediaID[1:2]), + string(m.Base64Hash[0:1]), + string(m.Base64Hash[1:2]), ) - fileName = string(m.MediaID[2:]) + fileName = string(m.Base64Hash[2:]) } filePath, err := filepath.Abs(path.Join( string(absBasePath), - string(m.Origin), subPath, fileName, )) @@ -225,11 +226,6 @@ func getPathFromMediaMetadata(m *types.MediaMetadata, absBasePath types.Path) (s return "", fmt.Errorf("Unable to construct filePath: %q", err) } - // FIXME: - // - validate origin - // - sanitize mediaID (e.g. '/' characters and such) - // - validate length of origin and mediaID according to common filesystem limitations - // check if the absolute absBasePath is a prefix of the absolute filePath // if so, no directory escape has occurred and the filePath is valid // Note: absBasePath is already absolute From 5d4b5ef1bc216460968543f494f5f38a856d7dc2 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Mon, 22 May 2017 12:19:06 +0200 Subject: [PATCH 088/108] mediaapi/writers/download: Add Base64Hash to useful log messages --- src/github.com/matrix-org/dendrite/mediaapi/writers/download.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index 4af7b47b8..259e6ad89 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -177,6 +177,7 @@ func (r *downloadRequest) respondFromLocalFile(w http.ResponseWriter, absBasePat "MediaID": r.MediaMetadata.MediaID, "Origin": r.MediaMetadata.Origin, "UploadName": r.MediaMetadata.UploadName, + "Base64Hash": r.MediaMetadata.Base64Hash, "Content-Length": r.MediaMetadata.ContentLength, "Content-Type": r.MediaMetadata.ContentType, "Content-Disposition": r.MediaMetadata.ContentDisposition, @@ -495,6 +496,7 @@ func (r *downloadRequest) respondFromRemoteFile(w http.ResponseWriter, absBasePa "MediaID": r.MediaMetadata.MediaID, "Origin": r.MediaMetadata.Origin, "UploadName": r.MediaMetadata.UploadName, + "Base64Hash": r.MediaMetadata.Base64Hash, "Content-Length": r.MediaMetadata.ContentLength, "Content-Type": r.MediaMetadata.ContentType, "Content-Disposition": r.MediaMetadata.ContentDisposition, From 73b419a12688d83c2d16ec2d69f5ea6e5cc797f8 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Mon, 22 May 2017 12:20:07 +0200 Subject: [PATCH 089/108] mediaapi/writers: Factor out common moveFileWithHashCheck functionality --- .../dendrite/mediaapi/writers/download.go | 26 +++++++------- .../dendrite/mediaapi/writers/fileutils.go | 35 +++++++++++++++++++ .../dendrite/mediaapi/writers/upload.go | 26 ++++++-------- 3 files changed, 57 insertions(+), 30 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index 259e6ad89..ab713fa5f 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -341,21 +341,14 @@ func (r *downloadRequest) commitFileAndMetadata(tmpDir types.Path, absBasePath t }).Info("Storing file metadata to media repository database") // The database is the source of truth so we need to have moved the file first - finalPath, err := getPathFromMediaMetadata(r.MediaMetadata, absBasePath) + finalPath, duplicate, err := moveFileWithHashCheck(tmpDir, r.MediaMetadata, absBasePath, r.Logger) if err != nil { - r.Logger.WithError(err).Warn("Failed to get file path from metadata") - removeDir(tmpDir, r.Logger) + r.Logger.WithError(err).Error("Failed to move file.") return updateActiveRemoteRequests } - - err = moveFile( - types.Path(path.Join(string(tmpDir), "content")), - types.Path(finalPath), - ) - if err != nil { - r.Logger.WithError(err).WithField("dst", finalPath).Warn("Failed to move file to final destination") - removeDir(tmpDir, r.Logger) - return updateActiveRemoteRequests + if duplicate == true { + r.Logger.WithField("dst", finalPath).Info("File was stored previously - discarding duplicate") + // Continue on to store the metadata in the database } // Writing the metadata to the media repository database and removing the mxcURL from activeRemoteRequests needs to be atomic. @@ -369,8 +362,13 @@ func (r *downloadRequest) commitFileAndMetadata(tmpDir types.Path, absBasePath t // if written to disk, add to db err = db.StoreMediaMetadata(r.MediaMetadata) if err != nil { - finalDir := path.Dir(finalPath) - removeDir(types.Path(finalDir), r.Logger) + // If the file is a duplicate (has the same hash as an existing file) then + // there is valid metadata in the database for that file. As such we only + // remove the file if it is not a duplicate. + if duplicate == false { + finalDir := path.Dir(finalPath) + removeDir(types.Path(finalDir), r.Logger) + } completeRemoteRequest(activeRemoteRequests, mxcURL) return updateActiveRemoteRequests } diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go index 0517bf7ca..d99167545 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go @@ -250,3 +250,38 @@ func moveFile(src types.Path, dst types.Path) error { } return nil } + +// moveFileWithHashCheck attempts to move the file src to dst and checks for hash collisions based on metadata +// Check if destination file exists. As the destination is based on a hash of the file data, +// if it exists and the content length does not match then there is a hash collision for two different files. If +// it exists and the content length matches, it is believable that it is the same file and we can just +// discard the temporary file. +func moveFileWithHashCheck(tmpDir types.Path, mediaMetadata *types.MediaMetadata, absBasePath types.Path, logger *log.Entry) (string, bool, error) { + duplicate := false + finalPath, err := getPathFromMediaMetadata(mediaMetadata, absBasePath) + if err != nil { + removeDir(tmpDir, logger) + return "", duplicate, fmt.Errorf("failed to get file path from metadata: %q", err) + } + + var stat os.FileInfo + if stat, err = os.Stat(finalPath); os.IsExist(err) { + duplicate = true + if stat.Size() == int64(mediaMetadata.ContentLength) { + removeDir(tmpDir, logger) + return finalPath, duplicate, nil + } + // Remove the tmpDir as we anyway cannot cache the file on disk due to the hash collision + removeDir(tmpDir, logger) + return "", duplicate, fmt.Errorf("downloaded file with hash collision but different file size (%v)", finalPath) + } + err = moveFile( + types.Path(path.Join(string(tmpDir), "content")), + types.Path(finalPath), + ) + if err != nil { + removeDir(tmpDir, logger) + return "", duplicate, fmt.Errorf("failed to move file to final destination (%v): %q", finalPath, err) + } + return finalPath, duplicate, nil +} diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go index debed48d3..947d7d94b 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go @@ -142,32 +142,26 @@ func parseAndValidateRequest(req *http.Request, cfg *config.MediaAPI) (*uploadRe // In case of any error, appropriate files and directories are cleaned up a // util.JSONResponse error is returned. func storeFileAndMetadata(tmpDir types.Path, absBasePath types.Path, mediaMetadata *types.MediaMetadata, db *storage.Database, logger *log.Entry) *util.JSONResponse { - finalPath, err := getPathFromMediaMetadata(mediaMetadata, absBasePath) + finalPath, duplicate, err := moveFileWithHashCheck(tmpDir, mediaMetadata, absBasePath, logger) if err != nil { - logger.WithError(err).Warn("Failed to get file path from metadata") - removeDir(tmpDir, logger) + logger.WithError(err).Error("Failed to move file.") return &util.JSONResponse{ Code: 400, JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload")), } } - - err = moveFile( - types.Path(path.Join(string(tmpDir), "content")), - types.Path(finalPath), - ) - if err != nil { - logger.WithError(err).WithField("dst", finalPath).Warn("Failed to move file to final destination") - removeDir(tmpDir, logger) - return &util.JSONResponse{ - Code: 400, - JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload")), - } + if duplicate == true { + logger.WithField("dst", finalPath).Info("File was stored previously - discarding duplicate") } if err = db.StoreMediaMetadata(mediaMetadata); err != nil { logger.WithError(err).Warn("Failed to store metadata") - removeDir(types.Path(path.Dir(finalPath)), logger) + // If the file is a duplicate (has the same hash as an existing file) then + // there is valid metadata in the database for that file. As such we only + // remove the file if it is not a duplicate. + if duplicate == false { + removeDir(types.Path(path.Dir(finalPath)), logger) + } return &util.JSONResponse{ Code: 400, JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload")), From 7bdcece102639fac495ed567b14a409c4a7e72c8 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Mon, 22 May 2017 14:02:08 +0200 Subject: [PATCH 090/108] mediaapi/writers/fileutils: Group error variables --- .../dendrite/mediaapi/writers/fileutils.go | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go index d99167545..b1123bc4a 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go @@ -76,11 +76,13 @@ func createTempFileWriter(absBasePath types.Path) (*bufio.Writer, *os.File, type return writer, tmpFile, tmpDir, nil } -var errFileIsTooLarge = fmt.Errorf("file is too large") -var errRead = fmt.Errorf("failed to read response from remote server") -var errResponse = fmt.Errorf("failed to write file data to response body") -var errHash = fmt.Errorf("failed to hash file data") -var errWrite = fmt.Errorf("failed to write file to disk") +var ( + errFileIsTooLarge = fmt.Errorf("file is too large") + errRead = fmt.Errorf("failed to read response from remote server") + errResponse = fmt.Errorf("failed to write file data to response body") + errHash = fmt.Errorf("failed to hash file data") + errWrite = fmt.Errorf("failed to write file to disk") +) // writeToResponse takes bytesToWrite bytes from buffer and writes them to respWriter // Returns bytes written and an error. In case of error, or if there is no respWriter, From 8684f80ebd00ed31e14b7bd57db27f46c02f9b97 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Mon, 22 May 2017 14:12:11 +0200 Subject: [PATCH 091/108] mediaapi/fileutils: Move fileutils to its own package --- .../{writers => fileutils}/fileutils.go | 34 ++++++++++--------- .../dendrite/mediaapi/writers/download.go | 13 +++---- .../dendrite/mediaapi/writers/upload.go | 13 +++---- 3 files changed, 32 insertions(+), 28 deletions(-) rename src/github.com/matrix-org/dendrite/mediaapi/{writers => fileutils}/fileutils.go (91%) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go b/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go similarity index 91% rename from src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go rename to src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go index b1123bc4a..99d6dbd2c 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/fileutils.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package writers +package fileutils import ( "bufio" @@ -32,7 +32,8 @@ import ( "github.com/matrix-org/dendrite/mediaapi/types" ) -func removeDir(dir types.Path, logger *log.Entry) { +// RemoveDir removes a directory and logs a warning in case of errors +func RemoveDir(dir types.Path, logger *log.Entry) { dirErr := os.RemoveAll(string(dir)) if dirErr != nil { logger.WithError(dirErr).WithField("dir", dir).Warn("Failed to remove directory") @@ -77,7 +78,8 @@ func createTempFileWriter(absBasePath types.Path) (*bufio.Writer, *os.File, type } var ( - errFileIsTooLarge = fmt.Errorf("file is too large") + // ErrFileIsTooLarge indicates that the uploaded file is larger than the configured maximum file size + ErrFileIsTooLarge = fmt.Errorf("file is too large") errRead = fmt.Errorf("failed to read response from remote server") errResponse = fmt.Errorf("failed to write file data to response body") errHash = fmt.Errorf("failed to hash file data") @@ -104,7 +106,7 @@ func writeToResponse(respWriter http.ResponseWriter, buffer []byte, bytesToWrite func writeToDiskAndHasher(tmpFileWriter *bufio.Writer, hasher hash.Hash, bytesWritten int64, maxFileSizeBytes types.ContentLength, buffer []byte, bytesToWrite int) (int64, error) { // if larger than maxFileSizeBytes then stop writing to disk and discard cached file if bytesWritten+int64(bytesToWrite) > int64(maxFileSizeBytes) { - return 0, errFileIsTooLarge + return 0, ErrFileIsTooLarge } // write to hasher and to disk bytesTemp, writeErr := tmpFileWriter.Write(buffer[:bytesToWrite]) @@ -117,7 +119,7 @@ func writeToDiskAndHasher(tmpFileWriter *bufio.Writer, hasher hash.Hash, bytesWr return int64(bytesTemp), nil } -// readAndHashAndWriteWithLimit works like io.Copy except it copies from the reqReader to the +// ReadAndHashAndWriteWithLimit works like io.Copy except it copies from the reqReader to the // optionally-supplied respWriter and a temporary file named 'content' using a bufio.Writer. // The data written to disk is hashed using the SHA-256 algorithm. // If there is an error with the reqReader or the respWriter, that is considered an error. @@ -127,7 +129,7 @@ func writeToDiskAndHasher(tmpFileWriter *bufio.Writer, hasher hash.Hash, bytesWr // If a respWriter is provided, all the data will be proxied from the reqReader to // the respWriter, regardless of errors or limits on writing to disk. // Returns all of the hash sum, bytes written to disk, and temporary directory path, or an error. -func readAndHashAndWriteWithLimit(reqReader io.Reader, maxFileSizeBytes types.ContentLength, absBasePath types.Path, respWriter http.ResponseWriter) (types.Base64Hash, types.ContentLength, types.ContentLength, types.Path, error) { +func ReadAndHashAndWriteWithLimit(reqReader io.Reader, maxFileSizeBytes types.ContentLength, absBasePath types.Path, respWriter http.ResponseWriter) (types.Base64Hash, types.ContentLength, types.ContentLength, types.Path, error) { // create the temporary file writer tmpFileWriter, tmpFile, tmpDir, err := createTempFileWriter(absBasePath) if err != nil { @@ -157,7 +159,7 @@ func readAndHashAndWriteWithLimit(reqReader io.Reader, maxFileSizeBytes types.Co bytesTemp, copyError = writeToResponse(respWriter, buffer, bytesRead) bytesResponded += bytesTemp if copyError == nil { - // Note: if we get here then copyError != errFileIsTooLarge && copyError != errWrite + // Note: if we get here then copyError != ErrFileIsTooLarge && copyError != errWrite // as if copyError == errResponse || copyError == errWrite then we would have broken // out of the loop and there are no other cases bytesTemp, copyError = writeToDiskAndHasher(tmpFileWriter, hasher, bytesWritten, maxFileSizeBytes, buffer, (bytesRead)) @@ -186,7 +188,7 @@ func readAndHashAndWriteWithLimit(reqReader io.Reader, maxFileSizeBytes types.Co return types.Base64Hash(base64.URLEncoding.EncodeToString(hash[:])), types.ContentLength(bytesResponded), types.ContentLength(bytesWritten), tmpDir, nil } -// getPathFromMediaMetadata validates and constructs the on-disk path to the media +// GetPathFromMediaMetadata validates and constructs the on-disk path to the media // based on its origin and mediaID // If a mediaID is too short, which could happen for other homeserver implementations, // place it into a short-id subdirectory of the origin directory @@ -195,7 +197,7 @@ func readAndHashAndWriteWithLimit(reqReader io.Reader, maxFileSizeBytes types.Co // mediaID is 'qwerty', we create subdirectories 'q', 'w' within 'q' and place the file // in 'q/w' calling it 'erty'. If the mediaID is shorter than 3 characters, the last // character is the file name and the preceding character, if any, is a subdirectory name. -func getPathFromMediaMetadata(m *types.MediaMetadata, absBasePath types.Path) (string, error) { +func GetPathFromMediaMetadata(m *types.MediaMetadata, absBasePath types.Path) (string, error) { var subPath, fileName string hashLen := len(m.Base64Hash) @@ -253,16 +255,16 @@ func moveFile(src types.Path, dst types.Path) error { return nil } -// moveFileWithHashCheck attempts to move the file src to dst and checks for hash collisions based on metadata +// MoveFileWithHashCheck attempts to move the file src to dst and checks for hash collisions based on metadata // Check if destination file exists. As the destination is based on a hash of the file data, // if it exists and the content length does not match then there is a hash collision for two different files. If // it exists and the content length matches, it is believable that it is the same file and we can just // discard the temporary file. -func moveFileWithHashCheck(tmpDir types.Path, mediaMetadata *types.MediaMetadata, absBasePath types.Path, logger *log.Entry) (string, bool, error) { +func MoveFileWithHashCheck(tmpDir types.Path, mediaMetadata *types.MediaMetadata, absBasePath types.Path, logger *log.Entry) (string, bool, error) { duplicate := false - finalPath, err := getPathFromMediaMetadata(mediaMetadata, absBasePath) + finalPath, err := GetPathFromMediaMetadata(mediaMetadata, absBasePath) if err != nil { - removeDir(tmpDir, logger) + RemoveDir(tmpDir, logger) return "", duplicate, fmt.Errorf("failed to get file path from metadata: %q", err) } @@ -270,11 +272,11 @@ func moveFileWithHashCheck(tmpDir types.Path, mediaMetadata *types.MediaMetadata if stat, err = os.Stat(finalPath); os.IsExist(err) { duplicate = true if stat.Size() == int64(mediaMetadata.ContentLength) { - removeDir(tmpDir, logger) + RemoveDir(tmpDir, logger) return finalPath, duplicate, nil } // Remove the tmpDir as we anyway cannot cache the file on disk due to the hash collision - removeDir(tmpDir, logger) + RemoveDir(tmpDir, logger) return "", duplicate, fmt.Errorf("downloaded file with hash collision but different file size (%v)", finalPath) } err = moveFile( @@ -282,7 +284,7 @@ func moveFileWithHashCheck(tmpDir types.Path, mediaMetadata *types.MediaMetadata types.Path(finalPath), ) if err != nil { - removeDir(tmpDir, logger) + RemoveDir(tmpDir, logger) return "", duplicate, fmt.Errorf("failed to move file to final destination (%v): %q", finalPath, err) } return finalPath, duplicate, nil diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index ab713fa5f..eed3bb8a7 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -30,6 +30,7 @@ import ( log "github.com/Sirupsen/logrus" "github.com/matrix-org/dendrite/clientapi/jsonerror" "github.com/matrix-org/dendrite/mediaapi/config" + "github.com/matrix-org/dendrite/mediaapi/fileutils" "github.com/matrix-org/dendrite/mediaapi/storage" "github.com/matrix-org/dendrite/mediaapi/types" "github.com/matrix-org/gomatrixserverlib" @@ -183,7 +184,7 @@ func (r *downloadRequest) respondFromLocalFile(w http.ResponseWriter, absBasePat "Content-Disposition": r.MediaMetadata.ContentDisposition, }).Infof("Downloading file") - filePath, err := getPathFromMediaMetadata(r.MediaMetadata, absBasePath) + filePath, err := fileutils.GetPathFromMediaMetadata(r.MediaMetadata, absBasePath) if err != nil { // FIXME: Remove erroneous file from database? r.Logger.WithError(err).Warn("Failed to get file path from metadata") @@ -341,7 +342,7 @@ func (r *downloadRequest) commitFileAndMetadata(tmpDir types.Path, absBasePath t }).Info("Storing file metadata to media repository database") // The database is the source of truth so we need to have moved the file first - finalPath, duplicate, err := moveFileWithHashCheck(tmpDir, r.MediaMetadata, absBasePath, r.Logger) + finalPath, duplicate, err := fileutils.MoveFileWithHashCheck(tmpDir, r.MediaMetadata, absBasePath, r.Logger) if err != nil { r.Logger.WithError(err).Error("Failed to move file.") return updateActiveRemoteRequests @@ -367,7 +368,7 @@ func (r *downloadRequest) commitFileAndMetadata(tmpDir types.Path, absBasePath t // remove the file if it is not a duplicate. if duplicate == false { finalDir := path.Dir(finalPath) - removeDir(types.Path(finalDir), r.Logger) + fileutils.RemoveDir(types.Path(finalDir), r.Logger) } completeRemoteRequest(activeRemoteRequests, mxcURL) return updateActiveRemoteRequests @@ -447,18 +448,18 @@ func (r *downloadRequest) respondFromRemoteFile(w http.ResponseWriter, absBasePa // integrity checks on the file data in the repository. // bytesResponded is the total number of bytes written to the response to the client request // bytesWritten is the total number of bytes written to disk - hash, bytesResponded, bytesWritten, tmpDir, copyError := readAndHashAndWriteWithLimit(resp.Body, maxFileSizeBytes, absBasePath, w) + hash, bytesResponded, bytesWritten, tmpDir, copyError := fileutils.ReadAndHashAndWriteWithLimit(resp.Body, maxFileSizeBytes, absBasePath, w) if copyError != nil { logFields := log.Fields{ "MediaID": r.MediaMetadata.MediaID, "Origin": r.MediaMetadata.Origin, } - if copyError == errFileIsTooLarge { + if copyError == fileutils.ErrFileIsTooLarge { logFields["MaxFileSizeBytes"] = maxFileSizeBytes } r.Logger.WithError(copyError).WithFields(logFields).Warn("Error while transferring file") - removeDir(tmpDir, r.Logger) + fileutils.RemoveDir(tmpDir, r.Logger) // Note: if we have responded with any data in the body at all then we have already sent 200 OK and we can only abort at this point if bytesResponded < 1 { r.jsonErrorResponse(w, util.JSONResponse{ diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go index 947d7d94b..497cae79e 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go @@ -26,6 +26,7 @@ import ( "github.com/matrix-org/dendrite/clientapi/auth" "github.com/matrix-org/dendrite/clientapi/jsonerror" "github.com/matrix-org/dendrite/mediaapi/config" + "github.com/matrix-org/dendrite/mediaapi/fileutils" "github.com/matrix-org/dendrite/mediaapi/storage" "github.com/matrix-org/dendrite/mediaapi/types" "github.com/matrix-org/util" @@ -142,7 +143,7 @@ func parseAndValidateRequest(req *http.Request, cfg *config.MediaAPI) (*uploadRe // In case of any error, appropriate files and directories are cleaned up a // util.JSONResponse error is returned. func storeFileAndMetadata(tmpDir types.Path, absBasePath types.Path, mediaMetadata *types.MediaMetadata, db *storage.Database, logger *log.Entry) *util.JSONResponse { - finalPath, duplicate, err := moveFileWithHashCheck(tmpDir, mediaMetadata, absBasePath, logger) + finalPath, duplicate, err := fileutils.MoveFileWithHashCheck(tmpDir, mediaMetadata, absBasePath, logger) if err != nil { logger.WithError(err).Error("Failed to move file.") return &util.JSONResponse{ @@ -160,7 +161,7 @@ func storeFileAndMetadata(tmpDir types.Path, absBasePath types.Path, mediaMetada // there is valid metadata in the database for that file. As such we only // remove the file if it is not a duplicate. if duplicate == false { - removeDir(types.Path(path.Dir(finalPath)), logger) + fileutils.RemoveDir(types.Path(path.Dir(finalPath)), logger) } return &util.JSONResponse{ Code: 400, @@ -197,18 +198,18 @@ func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util. // method of deduplicating files to save storage, as well as a way to conduct // integrity checks on the file data in the repository. // bytesWritten is the total number of bytes written to disk - hash, _, bytesWritten, tmpDir, copyError := readAndHashAndWriteWithLimit(req.Body, cfg.MaxFileSizeBytes, cfg.AbsBasePath, nil) + hash, _, bytesWritten, tmpDir, copyError := fileutils.ReadAndHashAndWriteWithLimit(req.Body, cfg.MaxFileSizeBytes, cfg.AbsBasePath, nil) if copyError != nil { logFields := log.Fields{ "Origin": r.MediaMetadata.Origin, "MediaID": r.MediaMetadata.MediaID, } - if copyError == errFileIsTooLarge { + if copyError == fileutils.ErrFileIsTooLarge { logFields["MaxFileSizeBytes"] = cfg.MaxFileSizeBytes } logger.WithError(copyError).WithFields(logFields).Warn("Error while transferring file") - removeDir(tmpDir, logger) + fileutils.RemoveDir(tmpDir, logger) return util.JSONResponse{ Code: 400, JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload")), @@ -233,7 +234,7 @@ func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util. mediaMetadata, err := db.GetMediaMetadata(r.MediaMetadata.MediaID, r.MediaMetadata.Origin) if err == nil { r.MediaMetadata = mediaMetadata - removeDir(tmpDir, logger) + fileutils.RemoveDir(tmpDir, logger) return util.JSONResponse{ Code: 200, JSON: uploadResponse{ From 6fe6f45093e63e49819e41b4fb87d7137e7c83c8 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Mon, 22 May 2017 14:13:57 +0200 Subject: [PATCH 092/108] mediaapi/writers: Remove unnecessary comparison to true --- src/github.com/matrix-org/dendrite/mediaapi/writers/download.go | 2 +- src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index eed3bb8a7..4c682ce99 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -347,7 +347,7 @@ func (r *downloadRequest) commitFileAndMetadata(tmpDir types.Path, absBasePath t r.Logger.WithError(err).Error("Failed to move file.") return updateActiveRemoteRequests } - if duplicate == true { + if duplicate { r.Logger.WithField("dst", finalPath).Info("File was stored previously - discarding duplicate") // Continue on to store the metadata in the database } diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go index 497cae79e..f8e635ece 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go @@ -151,7 +151,7 @@ func storeFileAndMetadata(tmpDir types.Path, absBasePath types.Path, mediaMetada JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload")), } } - if duplicate == true { + if duplicate { logger.WithField("dst", finalPath).Info("File was stored previously - discarding duplicate") } From 1e639b08682a2de83055fc5ab51664faef183fb6 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Mon, 22 May 2017 14:23:46 +0200 Subject: [PATCH 093/108] mediaapi/config: Document that MaxFileSizeBytes 0 means unlimited --- .../dendrite/cmd/dendrite-media-api-server/main.go | 11 ++++++----- .../matrix-org/dendrite/mediaapi/config/config.go | 1 + 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go b/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go index d7191508a..b053076a2 100644 --- a/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go +++ b/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go @@ -31,11 +31,12 @@ import ( ) var ( - bindAddr = os.Getenv("BIND_ADDRESS") - dataSource = os.Getenv("DATABASE") - logDir = os.Getenv("LOG_DIR") - serverName = os.Getenv("SERVER_NAME") - basePath = os.Getenv("BASE_PATH") + bindAddr = os.Getenv("BIND_ADDRESS") + dataSource = os.Getenv("DATABASE") + logDir = os.Getenv("LOG_DIR") + serverName = os.Getenv("SERVER_NAME") + basePath = os.Getenv("BASE_PATH") + // Note: if the MAX_FILE_SIZE_BYTES is set to 0, it will be unlimited maxFileSizeBytesString = os.Getenv("MAX_FILE_SIZE_BYTES") ) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/config/config.go b/src/github.com/matrix-org/dendrite/mediaapi/config/config.go index c7346d313..6163f2315 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/config/config.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/config/config.go @@ -27,6 +27,7 @@ type MediaAPI struct { AbsBasePath types.Path `yaml:"abs_base_path"` // The maximum file size in bytes that is allowed to be stored on this server. // Note that remote files larger than this can still be proxied to a client, they will just not be cached. + // Note: if MaxFileSizeBytes is set to 0, the size is unlimited. MaxFileSizeBytes types.ContentLength `yaml:"max_file_size_bytes"` // The postgres connection config for connecting to the database e.g a postgres:// URI DataSource string `yaml:"database"` From 2004dfcfcf4389a9ad3d454c2d7f58d5be39af83 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Mon, 22 May 2017 15:07:45 +0200 Subject: [PATCH 094/108] mediaapi: ContentLength -> FileSizeBytes --- .../cmd/dendrite-media-api-server/main.go | 2 +- .../dendrite/mediaapi/config/config.go | 2 +- .../dendrite/mediaapi/fileutils/fileutils.go | 12 +++++----- .../storage/media_repository_table.go | 10 ++++----- .../dendrite/mediaapi/types/types.go | 6 ++--- .../dendrite/mediaapi/writers/download.go | 22 +++++++++---------- .../dendrite/mediaapi/writers/upload.go | 14 ++++++------ 7 files changed, 34 insertions(+), 34 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go b/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go index b053076a2..298762482 100644 --- a/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go +++ b/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go @@ -66,7 +66,7 @@ func main() { cfg := &config.MediaAPI{ ServerName: gomatrixserverlib.ServerName(serverName), AbsBasePath: types.Path(absBasePath), - MaxFileSizeBytes: types.ContentLength(maxFileSizeBytes), + MaxFileSizeBytes: types.FileSizeBytes(maxFileSizeBytes), DataSource: dataSource, } diff --git a/src/github.com/matrix-org/dendrite/mediaapi/config/config.go b/src/github.com/matrix-org/dendrite/mediaapi/config/config.go index 6163f2315..a2d8f43c6 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/config/config.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/config/config.go @@ -28,7 +28,7 @@ type MediaAPI struct { // The maximum file size in bytes that is allowed to be stored on this server. // Note that remote files larger than this can still be proxied to a client, they will just not be cached. // Note: if MaxFileSizeBytes is set to 0, the size is unlimited. - MaxFileSizeBytes types.ContentLength `yaml:"max_file_size_bytes"` + MaxFileSizeBytes types.FileSizeBytes `yaml:"max_file_size_bytes"` // The postgres connection config for connecting to the database e.g a postgres:// URI DataSource string `yaml:"database"` } diff --git a/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go b/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go index 99d6dbd2c..5db185e8d 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go @@ -103,7 +103,7 @@ func writeToResponse(respWriter http.ResponseWriter, buffer []byte, bytesToWrite // writeToDiskAndHasher takes bytesToWrite bytes from buffer and writes them to tmpFileWriter and hasher. // Returns bytes written and an error. In case of error, including if writing would exceed maxFileSizeBytes, // the number of bytes written will be 0. -func writeToDiskAndHasher(tmpFileWriter *bufio.Writer, hasher hash.Hash, bytesWritten int64, maxFileSizeBytes types.ContentLength, buffer []byte, bytesToWrite int) (int64, error) { +func writeToDiskAndHasher(tmpFileWriter *bufio.Writer, hasher hash.Hash, bytesWritten int64, maxFileSizeBytes types.FileSizeBytes, buffer []byte, bytesToWrite int) (int64, error) { // if larger than maxFileSizeBytes then stop writing to disk and discard cached file if bytesWritten+int64(bytesToWrite) > int64(maxFileSizeBytes) { return 0, ErrFileIsTooLarge @@ -129,7 +129,7 @@ func writeToDiskAndHasher(tmpFileWriter *bufio.Writer, hasher hash.Hash, bytesWr // If a respWriter is provided, all the data will be proxied from the reqReader to // the respWriter, regardless of errors or limits on writing to disk. // Returns all of the hash sum, bytes written to disk, and temporary directory path, or an error. -func ReadAndHashAndWriteWithLimit(reqReader io.Reader, maxFileSizeBytes types.ContentLength, absBasePath types.Path, respWriter http.ResponseWriter) (types.Base64Hash, types.ContentLength, types.ContentLength, types.Path, error) { +func ReadAndHashAndWriteWithLimit(reqReader io.Reader, maxFileSizeBytes types.FileSizeBytes, absBasePath types.Path, respWriter http.ResponseWriter) (types.Base64Hash, types.FileSizeBytes, types.FileSizeBytes, types.Path, error) { // create the temporary file writer tmpFileWriter, tmpFile, tmpDir, err := createTempFileWriter(absBasePath) if err != nil { @@ -185,7 +185,7 @@ func ReadAndHashAndWriteWithLimit(reqReader io.Reader, maxFileSizeBytes types.Co tmpFileWriter.Flush() hash := hasher.Sum(nil) - return types.Base64Hash(base64.URLEncoding.EncodeToString(hash[:])), types.ContentLength(bytesResponded), types.ContentLength(bytesWritten), tmpDir, nil + return types.Base64Hash(base64.URLEncoding.EncodeToString(hash[:])), types.FileSizeBytes(bytesResponded), types.FileSizeBytes(bytesWritten), tmpDir, nil } // GetPathFromMediaMetadata validates and constructs the on-disk path to the media @@ -257,8 +257,8 @@ func moveFile(src types.Path, dst types.Path) error { // MoveFileWithHashCheck attempts to move the file src to dst and checks for hash collisions based on metadata // Check if destination file exists. As the destination is based on a hash of the file data, -// if it exists and the content length does not match then there is a hash collision for two different files. If -// it exists and the content length matches, it is believable that it is the same file and we can just +// if it exists and the file size does not match then there is a hash collision for two different files. If +// it exists and the file size matches, it is believable that it is the same file and we can just // discard the temporary file. func MoveFileWithHashCheck(tmpDir types.Path, mediaMetadata *types.MediaMetadata, absBasePath types.Path, logger *log.Entry) (string, bool, error) { duplicate := false @@ -271,7 +271,7 @@ func MoveFileWithHashCheck(tmpDir types.Path, mediaMetadata *types.MediaMetadata var stat os.FileInfo if stat, err = os.Stat(finalPath); os.IsExist(err) { duplicate = true - if stat.Size() == int64(mediaMetadata.ContentLength) { + if stat.Size() == int64(mediaMetadata.FileSizeBytes) { RemoveDir(tmpDir, logger) return finalPath, duplicate, nil } diff --git a/src/github.com/matrix-org/dendrite/mediaapi/storage/media_repository_table.go b/src/github.com/matrix-org/dendrite/mediaapi/storage/media_repository_table.go index c19a9d9b3..a3b1c7594 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/storage/media_repository_table.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/storage/media_repository_table.go @@ -37,7 +37,7 @@ CREATE TABLE IF NOT EXISTS media_repository ( -- The HTTP Content-Disposition header for the media file as specified when uploading. content_disposition TEXT NOT NULL, -- Size of the media file in bytes. - content_length BIGINT NOT NULL, + file_size_bytes BIGINT NOT NULL, -- When the content was uploaded in UNIX epoch ms. creation_ts BIGINT NOT NULL, -- The file name with which the media was uploaded. @@ -51,12 +51,12 @@ CREATE UNIQUE INDEX IF NOT EXISTS media_repository_index ON media_repository (me ` const insertMediaSQL = ` -INSERT INTO media_repository (media_id, media_origin, content_type, content_disposition, content_length, creation_ts, upload_name, base64hash, user_id) +INSERT INTO media_repository (media_id, media_origin, content_type, content_disposition, file_size_bytes, creation_ts, upload_name, base64hash, user_id) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9) ` const selectMediaSQL = ` -SELECT content_type, content_disposition, content_length, creation_ts, upload_name, base64hash, user_id FROM media_repository WHERE media_id = $1 AND media_origin = $2 +SELECT content_type, content_disposition, file_size_bytes, creation_ts, upload_name, base64hash, user_id FROM media_repository WHERE media_id = $1 AND media_origin = $2 ` type mediaStatements struct { @@ -83,7 +83,7 @@ func (s *mediaStatements) insertMedia(mediaMetadata *types.MediaMetadata) error mediaMetadata.Origin, mediaMetadata.ContentType, mediaMetadata.ContentDisposition, - mediaMetadata.ContentLength, + mediaMetadata.FileSizeBytes, mediaMetadata.CreationTimestamp, mediaMetadata.UploadName, mediaMetadata.Base64Hash, @@ -102,7 +102,7 @@ func (s *mediaStatements) selectMedia(mediaID types.MediaID, mediaOrigin gomatri ).Scan( &mediaMetadata.ContentType, &mediaMetadata.ContentDisposition, - &mediaMetadata.ContentLength, + &mediaMetadata.FileSizeBytes, &mediaMetadata.CreationTimestamp, &mediaMetadata.UploadName, &mediaMetadata.Base64Hash, diff --git a/src/github.com/matrix-org/dendrite/mediaapi/types/types.go b/src/github.com/matrix-org/dendrite/mediaapi/types/types.go index bef86adcc..cef390cf0 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/types/types.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/types/types.go @@ -23,8 +23,8 @@ import ( // ContentDisposition is an HTTP Content-Disposition header string type ContentDisposition string -// ContentLength is an HTTP Content-Length header which is a number of bytes to be expected in a request body -type ContentLength int64 +// FileSizeBytes is a file size in bytes +type FileSizeBytes int64 // ContentType is an HTTP Content-Type header string representing the MIME type of a request body type ContentType string @@ -56,7 +56,7 @@ type MediaMetadata struct { Origin gomatrixserverlib.ServerName ContentType ContentType ContentDisposition ContentDisposition - ContentLength ContentLength + FileSizeBytes FileSizeBytes CreationTimestamp UnixMs UploadName Filename Base64Hash Base64Hash diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index 4c682ce99..98083c756 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -179,7 +179,7 @@ func (r *downloadRequest) respondFromLocalFile(w http.ResponseWriter, absBasePat "Origin": r.MediaMetadata.Origin, "UploadName": r.MediaMetadata.UploadName, "Base64Hash": r.MediaMetadata.Base64Hash, - "Content-Length": r.MediaMetadata.ContentLength, + "FileSizeBytes": r.MediaMetadata.FileSizeBytes, "Content-Type": r.MediaMetadata.ContentType, "Content-Disposition": r.MediaMetadata.ContentDisposition, }).Infof("Downloading file") @@ -216,11 +216,11 @@ func (r *downloadRequest) respondFromLocalFile(w http.ResponseWriter, absBasePat return } - if r.MediaMetadata.ContentLength > 0 && int64(r.MediaMetadata.ContentLength) != stat.Size() { + if r.MediaMetadata.FileSizeBytes > 0 && int64(r.MediaMetadata.FileSizeBytes) != stat.Size() { r.Logger.WithFields(log.Fields{ - "contentLength": r.MediaMetadata.ContentLength, - "fileSize": stat.Size(), - }).Warn("Content-Length in database and on-disk file size differ.") + "fileSizeDatabase": r.MediaMetadata.FileSizeBytes, + "fileSizeDisk": stat.Size(), + }).Warn("File size in database and on-disk differ.") // FIXME: Remove erroneous file from database? } @@ -336,7 +336,7 @@ func (r *downloadRequest) commitFileAndMetadata(tmpDir types.Path, absBasePath t "Origin": r.MediaMetadata.Origin, "Base64Hash": r.MediaMetadata.Base64Hash, "UploadName": r.MediaMetadata.UploadName, - "Content-Length": r.MediaMetadata.ContentLength, + "FileSizeBytes": r.MediaMetadata.FileSizeBytes, "Content-Type": r.MediaMetadata.ContentType, "Content-Disposition": r.MediaMetadata.ContentDisposition, }).Info("Storing file metadata to media repository database") @@ -381,7 +381,7 @@ func (r *downloadRequest) commitFileAndMetadata(tmpDir types.Path, absBasePath t return updateActiveRemoteRequests } -func (r *downloadRequest) respondFromRemoteFile(w http.ResponseWriter, absBasePath types.Path, maxFileSizeBytes types.ContentLength, db *storage.Database, activeRemoteRequests *types.ActiveRemoteRequests) { +func (r *downloadRequest) respondFromRemoteFile(w http.ResponseWriter, absBasePath types.Path, maxFileSizeBytes types.FileSizeBytes, db *storage.Database, activeRemoteRequests *types.ActiveRemoteRequests) { r.Logger.WithFields(log.Fields{ "Origin": r.MediaMetadata.Origin, "MediaID": r.MediaMetadata.MediaID, @@ -415,7 +415,7 @@ func (r *downloadRequest) respondFromRemoteFile(w http.ResponseWriter, absBasePa if err != nil { r.Logger.WithError(err).Warn("Failed to parse content length") } - r.MediaMetadata.ContentLength = types.ContentLength(contentLength) + r.MediaMetadata.FileSizeBytes = types.FileSizeBytes(contentLength) r.MediaMetadata.ContentType = types.ContentType(resp.Header.Get("Content-Type")) r.MediaMetadata.ContentDisposition = types.ContentDisposition(resp.Header.Get("Content-Disposition")) @@ -428,7 +428,7 @@ func (r *downloadRequest) respondFromRemoteFile(w http.ResponseWriter, absBasePa }).Infof("Connected to remote") w.Header().Set("Content-Type", string(r.MediaMetadata.ContentType)) - w.Header().Set("Content-Length", strconv.FormatInt(int64(r.MediaMetadata.ContentLength), 10)) + w.Header().Set("Content-Length", strconv.FormatInt(int64(r.MediaMetadata.FileSizeBytes), 10)) contentSecurityPolicy := "default-src 'none';" + " script-src 'none';" + " plugin-types application/pdf;" + @@ -483,7 +483,7 @@ func (r *downloadRequest) respondFromRemoteFile(w http.ResponseWriter, absBasePa // It's possible the bytesWritten to the temporary file is different to the reported Content-Length from the remote // request's response. bytesWritten is therefore used as it is what would be sent to clients when reading from the local // file. - r.MediaMetadata.ContentLength = types.ContentLength(bytesWritten) + r.MediaMetadata.FileSizeBytes = types.FileSizeBytes(bytesWritten) r.MediaMetadata.Base64Hash = hash r.MediaMetadata.UserID = types.MatrixUserID("@:" + string(r.MediaMetadata.Origin)) @@ -496,7 +496,7 @@ func (r *downloadRequest) respondFromRemoteFile(w http.ResponseWriter, absBasePa "Origin": r.MediaMetadata.Origin, "UploadName": r.MediaMetadata.UploadName, "Base64Hash": r.MediaMetadata.Base64Hash, - "Content-Length": r.MediaMetadata.ContentLength, + "FileSizeBytes": r.MediaMetadata.FileSizeBytes, "Content-Type": r.MediaMetadata.ContentType, "Content-Disposition": r.MediaMetadata.ContentDisposition, }).Infof("Remote file cached") diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go index f8e635ece..a515e09bc 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go @@ -40,16 +40,16 @@ type uploadRequest struct { } // Validate validates the uploadRequest fields -func (r uploadRequest) Validate(maxFileSizeBytes types.ContentLength) *util.JSONResponse { +func (r uploadRequest) Validate(maxFileSizeBytes types.FileSizeBytes) *util.JSONResponse { // TODO: Any validation to be done on ContentDisposition? - if r.MediaMetadata.ContentLength < 1 { + if r.MediaMetadata.FileSizeBytes < 1 { return &util.JSONResponse{ Code: 400, JSON: jsonerror.Unknown("HTTP Content-Length request header must be greater than zero."), } } - if maxFileSizeBytes > 0 && r.MediaMetadata.ContentLength > maxFileSizeBytes { + if maxFileSizeBytes > 0 && r.MediaMetadata.FileSizeBytes > maxFileSizeBytes { return &util.JSONResponse{ Code: 400, JSON: jsonerror.Unknown(fmt.Sprintf("HTTP Content-Length is greater than the maximum allowed upload size (%v).", maxFileSizeBytes)), @@ -113,7 +113,7 @@ func parseAndValidateRequest(req *http.Request, cfg *config.MediaAPI) (*uploadRe MediaMetadata: &types.MediaMetadata{ Origin: cfg.ServerName, ContentDisposition: types.ContentDisposition(req.Header.Get("Content-Disposition")), - ContentLength: types.ContentLength(req.ContentLength), + FileSizeBytes: types.FileSizeBytes(req.ContentLength), ContentType: types.ContentType(req.Header.Get("Content-Type")), UploadName: types.Filename(req.FormValue("filename")), UserID: types.MatrixUserID(userID), @@ -189,7 +189,7 @@ func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util. logger.WithFields(log.Fields{ "Origin": r.MediaMetadata.Origin, "UploadName": r.MediaMetadata.UploadName, - "Content-Length": r.MediaMetadata.ContentLength, + "FileSizeBytes": r.MediaMetadata.FileSizeBytes, "Content-Type": r.MediaMetadata.ContentType, "Content-Disposition": r.MediaMetadata.ContentDisposition, }).Info("Uploading file") @@ -216,7 +216,7 @@ func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util. } } - r.MediaMetadata.ContentLength = bytesWritten + r.MediaMetadata.FileSizeBytes = bytesWritten r.MediaMetadata.Base64Hash = hash r.MediaMetadata.MediaID = types.MediaID(hash) @@ -225,7 +225,7 @@ func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util. "Origin": r.MediaMetadata.Origin, "Base64Hash": r.MediaMetadata.Base64Hash, "UploadName": r.MediaMetadata.UploadName, - "Content-Length": r.MediaMetadata.ContentLength, + "FileSizeBytes": r.MediaMetadata.FileSizeBytes, "Content-Type": r.MediaMetadata.ContentType, "Content-Disposition": r.MediaMetadata.ContentDisposition, }).Info("File uploaded") From 2f6db5d317b34814b0ea3f009514707c996dcb8e Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Mon, 22 May 2017 15:11:28 +0200 Subject: [PATCH 095/108] mediaapi/storage: StoreMediaMetadata error if origin and id not unique in db --- src/github.com/matrix-org/dendrite/mediaapi/storage/storage.go | 1 + 1 file changed, 1 insertion(+) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/storage/storage.go b/src/github.com/matrix-org/dendrite/mediaapi/storage/storage.go index 072ec4f8f..429af6d14 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/storage/storage.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/storage/storage.go @@ -43,6 +43,7 @@ func Open(dataSourceName string) (*Database, error) { } // StoreMediaMetadata inserts the metadata about the uploaded media into the database. +// Returns an error if the combination of MediaID and Origin are not unique in the table. func (d *Database) StoreMediaMetadata(mediaMetadata *types.MediaMetadata) error { return d.statements.insertMedia(mediaMetadata) } From 703685f0dda40aa9a8f2d5bae823306e68a6d4cf Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Mon, 22 May 2017 15:13:35 +0200 Subject: [PATCH 096/108] mediaapi/storage: Correct comment about Database --- src/github.com/matrix-org/dendrite/mediaapi/storage/storage.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/storage/storage.go b/src/github.com/matrix-org/dendrite/mediaapi/storage/storage.go index 429af6d14..630809cbe 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/storage/storage.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/storage/storage.go @@ -23,7 +23,7 @@ import ( "github.com/matrix-org/gomatrixserverlib" ) -// A Database is used to store room events and stream offsets. +// A Database is used to store metadata about a repository of media files. type Database struct { statements statements db *sql.DB From 6d000794eca3feb428232fc0f03442be93925565 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Mon, 22 May 2017 15:16:53 +0200 Subject: [PATCH 097/108] mediaapi/fileutils: Fix and simplify API doc --- .../dendrite/mediaapi/fileutils/fileutils.go | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go b/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go index 5db185e8d..6a520ee05 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go @@ -189,14 +189,10 @@ func ReadAndHashAndWriteWithLimit(reqReader io.Reader, maxFileSizeBytes types.Fi } // GetPathFromMediaMetadata validates and constructs the on-disk path to the media -// based on its origin and mediaID -// If a mediaID is too short, which could happen for other homeserver implementations, -// place it into a short-id subdirectory of the origin directory -// If the mediaID is long enough, we split it into pieces, creating up to 2 subdirectories -// for more manageable browsing and use the remainder as the file name. For example, if -// mediaID is 'qwerty', we create subdirectories 'q', 'w' within 'q' and place the file -// in 'q/w' calling it 'erty'. If the mediaID is shorter than 3 characters, the last -// character is the file name and the preceding character, if any, is a subdirectory name. +// based on its Base64Hash +// If the Base64Hash is long enough, we split it into pieces, creating up to 2 subdirectories +// for more manageable browsing and use the remainder as the file name. +// For example, if Base64Hash is 'qwerty', the path will be 'q/w/erty'. func GetPathFromMediaMetadata(m *types.MediaMetadata, absBasePath types.Path) (string, error) { var subPath, fileName string From 0ecb645f27221e782002089cfba3f3d922b4a036 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Mon, 22 May 2017 15:21:56 +0200 Subject: [PATCH 098/108] mediaapi/fileutils: Move logic from function to call site Makes for better readability. The only reason it was inside the function was to avoid being too complex for gocyclo checks. --- .../dendrite/mediaapi/fileutils/fileutils.go | 20 +++++++++---------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go b/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go index 6a520ee05..3220949a3 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go @@ -87,17 +87,13 @@ var ( ) // writeToResponse takes bytesToWrite bytes from buffer and writes them to respWriter -// Returns bytes written and an error. In case of error, or if there is no respWriter, -// the number of bytes written will be 0. +// Returns bytes written and an error. In case of error, the number of bytes written will be 0. func writeToResponse(respWriter http.ResponseWriter, buffer []byte, bytesToWrite int) (int64, error) { - if respWriter != nil { - bytesWritten, respErr := respWriter.Write(buffer[:bytesToWrite]) - if bytesWritten != bytesToWrite || (respErr != nil && respErr != io.EOF) { - return 0, errResponse - } - return int64(bytesWritten), nil + bytesWritten, respErr := respWriter.Write(buffer[:bytesToWrite]) + if bytesWritten != bytesToWrite || (respErr != nil && respErr != io.EOF) { + return 0, errResponse } - return 0, nil + return int64(bytesWritten), nil } // writeToDiskAndHasher takes bytesToWrite bytes from buffer and writes them to tmpFileWriter and hasher. @@ -156,8 +152,10 @@ func ReadAndHashAndWriteWithLimit(reqReader io.Reader, maxFileSizeBytes types.Fi if bytesRead > 0 { // Note: This code allows proxying files larger than maxFileSizeBytes! // write to client request's response body - bytesTemp, copyError = writeToResponse(respWriter, buffer, bytesRead) - bytesResponded += bytesTemp + if respWriter != nil { + bytesTemp, copyError = writeToResponse(respWriter, buffer, bytesRead) + bytesResponded += bytesTemp + } if copyError == nil { // Note: if we get here then copyError != ErrFileIsTooLarge && copyError != errWrite // as if copyError == errResponse || copyError == errWrite then we would have broken From 227fe6425274efc5585d500816b03a1409422a76 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Mon, 22 May 2017 15:38:51 +0200 Subject: [PATCH 099/108] mediaapi/writers: Pass used member instead of entire struct --- .../dendrite/mediaapi/fileutils/fileutils.go | 22 +++++++++---------- .../dendrite/mediaapi/writers/download.go | 2 +- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go b/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go index 3220949a3..e2bba003c 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go @@ -191,28 +191,28 @@ func ReadAndHashAndWriteWithLimit(reqReader io.Reader, maxFileSizeBytes types.Fi // If the Base64Hash is long enough, we split it into pieces, creating up to 2 subdirectories // for more manageable browsing and use the remainder as the file name. // For example, if Base64Hash is 'qwerty', the path will be 'q/w/erty'. -func GetPathFromMediaMetadata(m *types.MediaMetadata, absBasePath types.Path) (string, error) { +func GetPathFromMediaMetadata(base64Hash types.Base64Hash, absBasePath types.Path) (string, error) { var subPath, fileName string - hashLen := len(m.Base64Hash) + hashLen := len(base64Hash) switch { case hashLen < 1: - return "", fmt.Errorf("Invalid filePath (Base64Hash too short): %q", m.Base64Hash) + return "", fmt.Errorf("Invalid filePath (Base64Hash too short): %q", base64Hash) case hashLen > 255: - return "", fmt.Errorf("Invalid filePath (Base64Hash too long - max 255 characters): %q", m.Base64Hash) + return "", fmt.Errorf("Invalid filePath (Base64Hash too long - max 255 characters): %q", base64Hash) case hashLen < 2: subPath = "" - fileName = string(m.Base64Hash) + fileName = string(base64Hash) case hashLen < 3: - subPath = string(m.Base64Hash[0:1]) - fileName = string(m.Base64Hash[1:]) + subPath = string(base64Hash[0:1]) + fileName = string(base64Hash[1:]) default: subPath = path.Join( - string(m.Base64Hash[0:1]), - string(m.Base64Hash[1:2]), + string(base64Hash[0:1]), + string(base64Hash[1:2]), ) - fileName = string(m.Base64Hash[2:]) + fileName = string(base64Hash[2:]) } filePath, err := filepath.Abs(path.Join( @@ -256,7 +256,7 @@ func moveFile(src types.Path, dst types.Path) error { // discard the temporary file. func MoveFileWithHashCheck(tmpDir types.Path, mediaMetadata *types.MediaMetadata, absBasePath types.Path, logger *log.Entry) (string, bool, error) { duplicate := false - finalPath, err := GetPathFromMediaMetadata(mediaMetadata, absBasePath) + finalPath, err := GetPathFromMediaMetadata(mediaMetadata.Base64Hash, absBasePath) if err != nil { RemoveDir(tmpDir, logger) return "", duplicate, fmt.Errorf("failed to get file path from metadata: %q", err) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index 98083c756..ffb27cdee 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -184,7 +184,7 @@ func (r *downloadRequest) respondFromLocalFile(w http.ResponseWriter, absBasePat "Content-Disposition": r.MediaMetadata.ContentDisposition, }).Infof("Downloading file") - filePath, err := fileutils.GetPathFromMediaMetadata(r.MediaMetadata, absBasePath) + filePath, err := fileutils.GetPathFromMediaMetadata(r.MediaMetadata.Base64Hash, absBasePath) if err != nil { // FIXME: Remove erroneous file from database? r.Logger.WithError(err).Warn("Failed to get file path from metadata") From ac356721e6bb9b22b0c78c33d44de351c2540038 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Mon, 22 May 2017 16:24:08 +0200 Subject: [PATCH 100/108] mediaapi/fileutils: Rename and simplify documentation of WriteTempFile --- .../dendrite/mediaapi/fileutils/fileutils.go | 23 ++++++++----------- .../dendrite/mediaapi/writers/download.go | 2 +- .../dendrite/mediaapi/writers/upload.go | 2 +- 3 files changed, 12 insertions(+), 15 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go b/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go index e2bba003c..cf487a878 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go @@ -115,17 +115,15 @@ func writeToDiskAndHasher(tmpFileWriter *bufio.Writer, hasher hash.Hash, bytesWr return int64(bytesTemp), nil } -// ReadAndHashAndWriteWithLimit works like io.Copy except it copies from the reqReader to the -// optionally-supplied respWriter and a temporary file named 'content' using a bufio.Writer. -// The data written to disk is hashed using the SHA-256 algorithm. -// If there is an error with the reqReader or the respWriter, that is considered an error. -// If there is an error with the hasher or tmpFileWriter, that is non-critical and copying -// to the respWriter continues. -// maxFileSizeBytes limits the amount of data written to disk and the hasher. -// If a respWriter is provided, all the data will be proxied from the reqReader to -// the respWriter, regardless of errors or limits on writing to disk. -// Returns all of the hash sum, bytes written to disk, and temporary directory path, or an error. -func ReadAndHashAndWriteWithLimit(reqReader io.Reader, maxFileSizeBytes types.FileSizeBytes, absBasePath types.Path, respWriter http.ResponseWriter) (types.Base64Hash, types.FileSizeBytes, types.FileSizeBytes, types.Path, error) { +// WriteTempFile writes to a new temporary file +// * creates a temporary file +// * writes data from reqReader to disk and simultaneously hash it +// * the amount of data written to disk and hashed is limited by maxFileSizeBytes +// * if a respWriter is supplied, the data is also simultaneously written to that +// * data written to the respWriter is _not_ limited to maxFileSizeBytes such that +// the homeserver can proxy files larger than it is willing to cache +// Returns all of the hash sum, bytes written to disk, bytes proxied, and temporary directory path, or an error. +func WriteTempFile(reqReader io.Reader, maxFileSizeBytes types.FileSizeBytes, absBasePath types.Path, respWriter http.ResponseWriter) (types.Base64Hash, types.FileSizeBytes, types.FileSizeBytes, types.Path, error) { // create the temporary file writer tmpFileWriter, tmpFile, tmpDir, err := createTempFileWriter(absBasePath) if err != nil { @@ -135,8 +133,7 @@ func ReadAndHashAndWriteWithLimit(reqReader io.Reader, maxFileSizeBytes types.Fi // The file data is hashed and the hash is returned. The hash is useful as a // method of deduplicating files to save storage, as well as a way to conduct - // integrity checks on the file data in the repository. The hash gets used as - // the MediaID. + // integrity checks on the file data in the repository. hasher := sha256.New() // bytesResponded is the total number of bytes written to the response to the client request diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index ffb27cdee..2f8185f5f 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -448,7 +448,7 @@ func (r *downloadRequest) respondFromRemoteFile(w http.ResponseWriter, absBasePa // integrity checks on the file data in the repository. // bytesResponded is the total number of bytes written to the response to the client request // bytesWritten is the total number of bytes written to disk - hash, bytesResponded, bytesWritten, tmpDir, copyError := fileutils.ReadAndHashAndWriteWithLimit(resp.Body, maxFileSizeBytes, absBasePath, w) + hash, bytesResponded, bytesWritten, tmpDir, copyError := fileutils.WriteTempFile(resp.Body, maxFileSizeBytes, absBasePath, w) if copyError != nil { logFields := log.Fields{ diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go index a515e09bc..0b7c15b10 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go @@ -198,7 +198,7 @@ func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util. // method of deduplicating files to save storage, as well as a way to conduct // integrity checks on the file data in the repository. // bytesWritten is the total number of bytes written to disk - hash, _, bytesWritten, tmpDir, copyError := fileutils.ReadAndHashAndWriteWithLimit(req.Body, cfg.MaxFileSizeBytes, cfg.AbsBasePath, nil) + hash, _, bytesWritten, tmpDir, copyError := fileutils.WriteTempFile(req.Body, cfg.MaxFileSizeBytes, cfg.AbsBasePath, nil) if copyError != nil { logFields := log.Fields{ From f6f33e80684c08fbe87fe74febce85f83c37cd7d Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Mon, 22 May 2017 16:24:41 +0200 Subject: [PATCH 101/108] mediaapi/fileutils: Fix missing error check lost-in-refactoring --- .../matrix-org/dendrite/mediaapi/fileutils/fileutils.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go b/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go index cf487a878..6779be537 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go @@ -152,6 +152,9 @@ func WriteTempFile(reqReader io.Reader, maxFileSizeBytes types.FileSizeBytes, ab if respWriter != nil { bytesTemp, copyError = writeToResponse(respWriter, buffer, bytesRead) bytesResponded += bytesTemp + if copyError != nil { + break + } } if copyError == nil { // Note: if we get here then copyError != ErrFileIsTooLarge && copyError != errWrite From 0fcf74014d623a359ea49d6685be8e6ee18f6869 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Mon, 22 May 2017 16:41:07 +0200 Subject: [PATCH 102/108] mediaapi/fileutils: Improve function name and API docs --- .../matrix-org/dendrite/mediaapi/fileutils/fileutils.go | 9 ++++----- .../matrix-org/dendrite/mediaapi/writers/download.go | 2 +- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go b/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go index 6779be537..64f4e46da 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go @@ -186,12 +186,11 @@ func WriteTempFile(reqReader io.Reader, maxFileSizeBytes types.FileSizeBytes, ab return types.Base64Hash(base64.URLEncoding.EncodeToString(hash[:])), types.FileSizeBytes(bytesResponded), types.FileSizeBytes(bytesWritten), tmpDir, nil } -// GetPathFromMediaMetadata validates and constructs the on-disk path to the media -// based on its Base64Hash +// GetPathFromBase64Hash evaluates the path to a media file from its Base64Hash // If the Base64Hash is long enough, we split it into pieces, creating up to 2 subdirectories // for more manageable browsing and use the remainder as the file name. // For example, if Base64Hash is 'qwerty', the path will be 'q/w/erty'. -func GetPathFromMediaMetadata(base64Hash types.Base64Hash, absBasePath types.Path) (string, error) { +func GetPathFromBase64Hash(base64Hash types.Base64Hash, absBasePath types.Path) (string, error) { var subPath, fileName string hashLen := len(base64Hash) @@ -249,14 +248,14 @@ func moveFile(src types.Path, dst types.Path) error { return nil } -// MoveFileWithHashCheck attempts to move the file src to dst and checks for hash collisions based on metadata +// MoveFileWithHashCheck checks for hash collisions when moving a temporary file to its destination based on metadata // Check if destination file exists. As the destination is based on a hash of the file data, // if it exists and the file size does not match then there is a hash collision for two different files. If // it exists and the file size matches, it is believable that it is the same file and we can just // discard the temporary file. func MoveFileWithHashCheck(tmpDir types.Path, mediaMetadata *types.MediaMetadata, absBasePath types.Path, logger *log.Entry) (string, bool, error) { duplicate := false - finalPath, err := GetPathFromMediaMetadata(mediaMetadata.Base64Hash, absBasePath) + finalPath, err := GetPathFromBase64Hash(mediaMetadata.Base64Hash, absBasePath) if err != nil { RemoveDir(tmpDir, logger) return "", duplicate, fmt.Errorf("failed to get file path from metadata: %q", err) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index 2f8185f5f..7eae64620 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -184,7 +184,7 @@ func (r *downloadRequest) respondFromLocalFile(w http.ResponseWriter, absBasePat "Content-Disposition": r.MediaMetadata.ContentDisposition, }).Infof("Downloading file") - filePath, err := fileutils.GetPathFromMediaMetadata(r.MediaMetadata.Base64Hash, absBasePath) + filePath, err := fileutils.GetPathFromBase64Hash(r.MediaMetadata.Base64Hash, absBasePath) if err != nil { // FIXME: Remove erroneous file from database? r.Logger.WithError(err).Warn("Failed to get file path from metadata") From 8626308dac5fafb7458d8d55f98032b6913e1c2a Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Mon, 22 May 2017 16:41:46 +0200 Subject: [PATCH 103/108] mediaapi/writers/download: getMatrixUrls -> getMatrixURLs --- .../matrix-org/dendrite/mediaapi/writers/download.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index 7eae64620..910341dee 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -247,7 +247,7 @@ func (r *downloadRequest) respondFromLocalFile(w http.ResponseWriter, absBasePat } func (r *downloadRequest) createRemoteRequest() (*http.Response, *util.JSONResponse) { - urls := getMatrixUrls(r.MediaMetadata.Origin) + urls := getMatrixURLs(r.MediaMetadata.Origin) r.Logger.WithField("URL", urls[0]).Info("Connecting to remote") @@ -504,7 +504,7 @@ func (r *downloadRequest) respondFromRemoteFile(w http.ResponseWriter, absBasePa // Given a matrix server name, attempt to discover URLs to contact the server // on. -func getMatrixUrls(serverName gomatrixserverlib.ServerName) []string { +func getMatrixURLs(serverName gomatrixserverlib.ServerName) []string { _, srvs, err := net.LookupSRV("matrix", "tcp", string(serverName)) if err != nil { return []string{"https://" + string(serverName) + ":8448"} From 48c3a505cd7edcc50dee64e1c19a1ea235a71530 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Mon, 22 May 2017 17:05:45 +0200 Subject: [PATCH 104/108] mediaapi/writers/upload: Use Logger and MediaMetadata from uploadRequest Also make storeFileAndMetadata a method on uploadRequest. --- .../dendrite/mediaapi/writers/upload.go | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go index 0b7c15b10..02e24e77c 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go @@ -37,10 +37,11 @@ import ( // NOTE: The members come from HTTP request metadata such as headers, query parameters or can be derived from such type uploadRequest struct { MediaMetadata *types.MediaMetadata + Logger *log.Entry } // Validate validates the uploadRequest fields -func (r uploadRequest) Validate(maxFileSizeBytes types.FileSizeBytes) *util.JSONResponse { +func (r *uploadRequest) Validate(maxFileSizeBytes types.FileSizeBytes) *util.JSONResponse { // TODO: Any validation to be done on ContentDisposition? if r.MediaMetadata.FileSizeBytes < 1 { @@ -118,6 +119,7 @@ func parseAndValidateRequest(req *http.Request, cfg *config.MediaAPI) (*uploadRe UploadName: types.Filename(req.FormValue("filename")), UserID: types.MatrixUserID(userID), }, + Logger: util.GetLogger(req.Context()), } if resErr = r.Validate(cfg.MaxFileSizeBytes); resErr != nil { @@ -142,26 +144,26 @@ func parseAndValidateRequest(req *http.Request, cfg *config.MediaAPI) (*uploadRe // is ready and if we fail to move the file, it never gets added to the database. // In case of any error, appropriate files and directories are cleaned up a // util.JSONResponse error is returned. -func storeFileAndMetadata(tmpDir types.Path, absBasePath types.Path, mediaMetadata *types.MediaMetadata, db *storage.Database, logger *log.Entry) *util.JSONResponse { - finalPath, duplicate, err := fileutils.MoveFileWithHashCheck(tmpDir, mediaMetadata, absBasePath, logger) +func (r *uploadRequest) storeFileAndMetadata(tmpDir types.Path, absBasePath types.Path, db *storage.Database) *util.JSONResponse { + finalPath, duplicate, err := fileutils.MoveFileWithHashCheck(tmpDir, r.MediaMetadata, absBasePath, r.Logger) if err != nil { - logger.WithError(err).Error("Failed to move file.") + r.Logger.WithError(err).Error("Failed to move file.") return &util.JSONResponse{ Code: 400, JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload")), } } if duplicate { - logger.WithField("dst", finalPath).Info("File was stored previously - discarding duplicate") + r.Logger.WithField("dst", finalPath).Info("File was stored previously - discarding duplicate") } - if err = db.StoreMediaMetadata(mediaMetadata); err != nil { - logger.WithError(err).Warn("Failed to store metadata") + if err = db.StoreMediaMetadata(r.MediaMetadata); err != nil { + r.Logger.WithError(err).Warn("Failed to store metadata") // If the file is a duplicate (has the same hash as an existing file) then // there is valid metadata in the database for that file. As such we only // remove the file if it is not a duplicate. if duplicate == false { - fileutils.RemoveDir(types.Path(path.Dir(finalPath)), logger) + fileutils.RemoveDir(types.Path(path.Dir(finalPath)), r.Logger) } return &util.JSONResponse{ Code: 400, @@ -179,14 +181,12 @@ func storeFileAndMetadata(tmpDir types.Path, absBasePath types.Path, mediaMetada // Uploaded files are processed piece-wise to avoid DoS attacks which would starve the server of memory. // TODO: We should time out requests if they have not received any data within a configured timeout period. func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util.JSONResponse { - logger := util.GetLogger(req.Context()) - r, resErr := parseAndValidateRequest(req, cfg) if resErr != nil { return *resErr } - logger.WithFields(log.Fields{ + r.Logger.WithFields(log.Fields{ "Origin": r.MediaMetadata.Origin, "UploadName": r.MediaMetadata.UploadName, "FileSizeBytes": r.MediaMetadata.FileSizeBytes, @@ -208,8 +208,8 @@ func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util. if copyError == fileutils.ErrFileIsTooLarge { logFields["MaxFileSizeBytes"] = cfg.MaxFileSizeBytes } - logger.WithError(copyError).WithFields(logFields).Warn("Error while transferring file") - fileutils.RemoveDir(tmpDir, logger) + r.Logger.WithError(copyError).WithFields(logFields).Warn("Error while transferring file") + fileutils.RemoveDir(tmpDir, r.Logger) return util.JSONResponse{ Code: 400, JSON: jsonerror.Unknown(fmt.Sprintf("Failed to upload")), @@ -220,7 +220,7 @@ func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util. r.MediaMetadata.Base64Hash = hash r.MediaMetadata.MediaID = types.MediaID(hash) - logger.WithFields(log.Fields{ + r.Logger.WithFields(log.Fields{ "MediaID": r.MediaMetadata.MediaID, "Origin": r.MediaMetadata.Origin, "Base64Hash": r.MediaMetadata.Base64Hash, @@ -234,7 +234,7 @@ func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util. mediaMetadata, err := db.GetMediaMetadata(r.MediaMetadata.MediaID, r.MediaMetadata.Origin) if err == nil { r.MediaMetadata = mediaMetadata - fileutils.RemoveDir(tmpDir, logger) + fileutils.RemoveDir(tmpDir, r.Logger) return util.JSONResponse{ Code: 200, JSON: uploadResponse{ @@ -242,12 +242,12 @@ func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util. }, } } else if err != sql.ErrNoRows { - logger.WithError(err).WithField("MediaID", r.MediaMetadata.MediaID).Warn("Failed to query database") + r.Logger.WithError(err).WithField("MediaID", r.MediaMetadata.MediaID).Warn("Failed to query database") } // TODO: generate thumbnails - resErr = storeFileAndMetadata(tmpDir, cfg.AbsBasePath, r.MediaMetadata, db, logger) + resErr = r.storeFileAndMetadata(tmpDir, cfg.AbsBasePath, db) if resErr != nil { return *resErr } From 5e338cc80513e65cabf3dd9956172766326b9f6d Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Mon, 22 May 2017 17:08:54 +0200 Subject: [PATCH 105/108] mediaapi/writers/download: Fix reference to spec for /download --- src/github.com/matrix-org/dendrite/mediaapi/writers/download.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index 910341dee..2c1065a3b 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -38,7 +38,7 @@ import ( ) // downloadRequest metadata included in or derivable from an download request -// https://matrix.org/docs/spec/client_server/r0.2.0.html#post-matrix-media-r0-download +// https://matrix.org/docs/spec/client_server/r0.2.0.html#get-matrix-media-r0-download-servername-mediaid type downloadRequest struct { MediaMetadata *types.MediaMetadata Logger *log.Entry From 3d8aec28b144ee571f4740ab216b5997a1de15e4 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Mon, 22 May 2017 20:54:03 +0200 Subject: [PATCH 106/108] mediaapi/writers/download: Remove unnecessary retry loop --- .../dendrite/mediaapi/writers/download.go | 56 +++++++++---------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index 2c1065a3b..bdb63d4a7 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -79,8 +79,6 @@ func (r *downloadRequest) jsonErrorResponse(w http.ResponseWriter, res util.JSON w.Write(resBytes) } -var nTries = 5 - // Download implements /download // Files from this server (i.e. origin == cfg.ServerName) are served directly // Files from remote servers (i.e. origin != cfg.ServerName) are cached locally. @@ -126,41 +124,43 @@ func Download(w http.ResponseWriter, req *http.Request, origin gomatrixserverlib mxcURL := "mxc://" + string(r.MediaMetadata.Origin) + "/" + string(r.MediaMetadata.MediaID) - for tries := 0; ; tries++ { + activeRemoteRequests.Lock() + mediaMetadata, err = db.GetMediaMetadata(r.MediaMetadata.MediaID, r.MediaMetadata.Origin) + if err == nil { + // If we have a record, we can respond from the local file + r.MediaMetadata = mediaMetadata + r.respondFromLocalFile(w, cfg.AbsBasePath) + activeRemoteRequests.Unlock() + return + } + if activeRemoteRequestCondition, ok := activeRemoteRequests.Set[mxcURL]; ok { + r.Logger.WithFields(log.Fields{ + "Origin": r.MediaMetadata.Origin, + "MediaID": r.MediaMetadata.MediaID, + }).Info("Waiting for another goroutine to fetch the remote file.") + activeRemoteRequestCondition.Wait() + activeRemoteRequests.Unlock() activeRemoteRequests.Lock() mediaMetadata, err = db.GetMediaMetadata(r.MediaMetadata.MediaID, r.MediaMetadata.Origin) if err == nil { // If we have a record, we can respond from the local file r.MediaMetadata = mediaMetadata r.respondFromLocalFile(w, cfg.AbsBasePath) - activeRemoteRequests.Unlock() - return - } - if activeRemoteRequestCondition, ok := activeRemoteRequests.Set[mxcURL]; ok { - if tries >= nTries { - r.Logger.WithFields(log.Fields{ - "MediaID": r.MediaMetadata.MediaID, - "Origin": r.MediaMetadata.Origin, - }).Warn("Other goroutines are trying to download the remote file and failing.") - r.jsonErrorResponse(w, util.JSONResponse{ - Code: 500, - JSON: jsonerror.Unknown(fmt.Sprintf("File with media ID %q could not be downloaded from %q", r.MediaMetadata.MediaID, r.MediaMetadata.Origin)), - }) - activeRemoteRequests.Unlock() - return - } - r.Logger.WithFields(log.Fields{ - "Origin": r.MediaMetadata.Origin, - "MediaID": r.MediaMetadata.MediaID, - }).Info("Waiting for another goroutine to fetch the remote file.") - activeRemoteRequestCondition.Wait() - activeRemoteRequests.Unlock() } else { - activeRemoteRequests.Set[mxcURL] = &sync.Cond{L: activeRemoteRequests} - activeRemoteRequests.Unlock() - break + r.Logger.WithFields(log.Fields{ + "MediaID": r.MediaMetadata.MediaID, + "Origin": r.MediaMetadata.Origin, + }).Warn("Other goroutine failed to fetch remote file.") + r.jsonErrorResponse(w, util.JSONResponse{ + Code: 500, + JSON: jsonerror.Unknown(fmt.Sprintf("File with media ID %q could not be downloaded from %q", r.MediaMetadata.MediaID, r.MediaMetadata.Origin)), + }) } + activeRemoteRequests.Unlock() + return } + activeRemoteRequests.Set[mxcURL] = &sync.Cond{L: activeRemoteRequests} + activeRemoteRequests.Unlock() r.respondFromRemoteFile(w, cfg.AbsBasePath, cfg.MaxFileSizeBytes, db, activeRemoteRequests) } else { From f88c537f4368d6d868f459959dc2b77f73fc74fd Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Mon, 22 May 2017 21:03:57 +0200 Subject: [PATCH 107/108] mediaapi/writers/download: Separate 404 and 500 due to db query failure --- .../dendrite/mediaapi/writers/download.go | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index bdb63d4a7..bc6fb6d98 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -163,13 +163,23 @@ func Download(w http.ResponseWriter, req *http.Request, origin gomatrixserverlib activeRemoteRequests.Unlock() r.respondFromRemoteFile(w, cfg.AbsBasePath, cfg.MaxFileSizeBytes, db, activeRemoteRequests) - } else { - // If we do not have a record and the origin is local, or if we have another error from the database, the file is not found + } else if err == sql.ErrNoRows && r.MediaMetadata.Origin == cfg.ServerName { + // If we do not have a record and the origin is local, the file is not found r.Logger.WithError(err).Warn("Failed to look up file in database") r.jsonErrorResponse(w, util.JSONResponse{ Code: 404, JSON: jsonerror.NotFound(fmt.Sprintf("File with media ID %q does not exist", r.MediaMetadata.MediaID)), }) + } else { + // Another error from the database + r.Logger.WithError(err).WithFields(log.Fields{ + "MediaID": r.MediaMetadata.MediaID, + "Origin": r.MediaMetadata.Origin, + }).Error("Error querying the database.") + r.jsonErrorResponse(w, util.JSONResponse{ + Code: 500, + JSON: jsonerror.Unknown("Internal server error"), + }) } } From cd7e04ec0d33029e08ed257de41fba96c4f9283d Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Mon, 22 May 2017 21:06:14 +0200 Subject: [PATCH 108/108] mediaapi/writers/download: Move comment next to code to which it refers --- .../matrix-org/dendrite/mediaapi/writers/download.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go index bc6fb6d98..b49255562 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -118,12 +118,12 @@ func Download(w http.ResponseWriter, req *http.Request, origin gomatrixserverlib return } else if err == sql.ErrNoRows && r.MediaMetadata.Origin != cfg.ServerName { // If we do not have a record and the origin is remote, we need to fetch it and respond with that file - // The following code using activeRemoteRequests is avoiding duplication of fetches from the remote server in the case - // of multiple simultaneous incoming requests for the same remote file - it will be downloaded once, cached and served - // to all clients. mxcURL := "mxc://" + string(r.MediaMetadata.Origin) + "/" + string(r.MediaMetadata.MediaID) + // The following code using activeRemoteRequests is avoiding duplication of fetches from the remote server in the case + // of multiple simultaneous incoming requests for the same remote file - it will be downloaded once, cached and served + // to all clients. activeRemoteRequests.Lock() mediaMetadata, err = db.GetMediaMetadata(r.MediaMetadata.MediaID, r.MediaMetadata.Origin) if err == nil {