mirror of
https://github.com/matrix-org/dendrite.git
synced 2025-12-26 08:13:09 -06:00
401 lines
12 KiB
Go
401 lines
12 KiB
Go
// Copyright 2017 Vector Creations Ltd
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package routing
|
|
|
|
import (
|
|
"context"
|
|
"crypto/rand"
|
|
"encoding/hex"
|
|
"fmt"
|
|
"io/ioutil"
|
|
"net/http"
|
|
"net/url"
|
|
"path"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"golang.org/x/net/html"
|
|
|
|
"github.com/matrix-org/dendrite/clientapi/jsonerror"
|
|
"github.com/matrix-org/dendrite/mediaapi/fileutils"
|
|
"github.com/matrix-org/dendrite/mediaapi/storage"
|
|
"github.com/matrix-org/dendrite/mediaapi/types"
|
|
"github.com/matrix-org/dendrite/setup/config"
|
|
"github.com/matrix-org/util"
|
|
log "github.com/sirupsen/logrus"
|
|
)
|
|
|
|
// the parameters included in the incoming preview_url request
|
|
type PreviewUrlRequest struct {
|
|
url url.URL
|
|
ts types.UnixMs
|
|
}
|
|
|
|
// metadata of the url (media/html) to be previewed
|
|
type mediaInfo struct {
|
|
MediaMetadata *types.MediaMetadata
|
|
Logger *log.Entry
|
|
}
|
|
|
|
// PreviewUrlResponse defines the format of the JSON response
|
|
// https://matrix.org/docs/spec/client_server/latest#get-matrix-media-r0-preview-url
|
|
// TODO: add more fields to the response
|
|
type PreviewUrlResponse struct {
|
|
MatrixImageSize int64 `json:"matrix:image:size"`
|
|
OgImage string `json:"og:image"`
|
|
OgSiteName string `json:"og:site_name"`
|
|
OgType string `json:"og:type"`
|
|
OgTitle string `json:"og:title"`
|
|
OgUrl string `json:"og:url"`
|
|
OgDescription string `json:"og:description"`
|
|
}
|
|
|
|
// PreviewUrl implements GET /preview_url.
|
|
// Current implementation gets the url, parses the meta tags to obtain OG
|
|
// data and returns a JSONResponse for the client to process.
|
|
// TODO: avoid this endpoint in encrypted rooms
|
|
func PreviewUrl(
|
|
req *http.Request, cfg *config.MediaAPI, db storage.Database) util.JSONResponse {
|
|
|
|
// parsing the request
|
|
r, err := parseRequest(req)
|
|
if err != nil {
|
|
return *err
|
|
}
|
|
|
|
// downloading metadata for the url
|
|
mediaInfo, err := r.downloadUrl(cfg, req.Context(), db)
|
|
if err != nil {
|
|
return *err
|
|
}
|
|
|
|
// preparing the response
|
|
response, err := mediaInfo.prepareResponse(cfg)
|
|
if err != nil {
|
|
return *err
|
|
}
|
|
|
|
return util.JSONResponse{
|
|
Code: http.StatusOK,
|
|
JSON: response,
|
|
}
|
|
}
|
|
|
|
// parseRequest parses the incoming preview request to extract the url and ts.
|
|
// Returns either a parsed PreviewUrlRequest or error formatted as
|
|
// util.JSONResponse
|
|
func parseRequest(req *http.Request) (*PreviewUrlRequest, *util.JSONResponse) {
|
|
|
|
// get the url to be previewed from the request
|
|
urlToPreview := req.URL.Query().Get("url")
|
|
if len(urlToPreview) == 0 {
|
|
return nil, &util.JSONResponse{
|
|
Code: http.StatusBadRequest,
|
|
JSON: jsonerror.MissingArgument("Missing URL"),
|
|
}
|
|
}
|
|
|
|
// parse the url
|
|
parsedUrl, err := url.Parse(urlToPreview)
|
|
if err != nil {
|
|
return nil, &util.JSONResponse{
|
|
Code: http.StatusBadRequest,
|
|
JSON: jsonerror.InvalidArgumentValue(
|
|
"Unable to parse url " + err.Error()),
|
|
}
|
|
}
|
|
|
|
// get the ts if provided in the request
|
|
if tsStr := req.URL.Query().Get("ts"); len(tsStr) > 0 {
|
|
ts, err := strconv.ParseInt(tsStr, 10, 64)
|
|
if err != nil {
|
|
return nil, &util.JSONResponse{
|
|
Code: http.StatusBadRequest,
|
|
JSON: jsonerror.InvalidArgumentValue(
|
|
"Couldn't parse 'ts' to a valid integer" + err.Error()),
|
|
}
|
|
}
|
|
|
|
request := &PreviewUrlRequest{
|
|
url: *parsedUrl,
|
|
// Convert timestamp to ms
|
|
ts: types.UnixMs(ts / 1000000),
|
|
}
|
|
|
|
return request, nil
|
|
}
|
|
|
|
// set ts to current time if none provided
|
|
ts := time.Now().UnixNano()
|
|
|
|
request := &PreviewUrlRequest{
|
|
url: *parsedUrl,
|
|
ts: types.UnixMs(ts / 1000000),
|
|
}
|
|
|
|
return request, nil
|
|
}
|
|
|
|
// downloadUrl downloads the url and saves the response body.
|
|
// Returns either mediaInfo (metadata about the file to preview) or error
|
|
// formatted as util.JSONResponse.
|
|
// Current implementation for saving response is heavily similar to /upload,
|
|
// need to work on this.
|
|
// TODO: better logging
|
|
func (previewReq *PreviewUrlRequest) downloadUrl(cfg *config.MediaAPI, ctx context.Context, db storage.Database) (*mediaInfo, *util.JSONResponse) {
|
|
|
|
urlString := previewReq.url.String()
|
|
|
|
// Get the URL
|
|
response, err := http.Get(urlString)
|
|
if err != nil {
|
|
return nil, &util.JSONResponse{
|
|
Code: http.StatusBadRequest,
|
|
JSON: jsonerror.InvalidArgumentValue("Couldn't get the URL " + err.Error()),
|
|
}
|
|
}
|
|
|
|
// TODO: check for other status codes too
|
|
if response.StatusCode == 404 {
|
|
return nil, &util.JSONResponse{
|
|
Code: http.StatusBadRequest,
|
|
JSON: jsonerror.NotFound("Given url returned 404"),
|
|
}
|
|
}
|
|
|
|
// Save the response body to the temporary dir
|
|
// TODO: should be a new directory like url_cache maybe and
|
|
hash, bytesWritten, tmpDir, err := fileutils.WriteTempFile(ctx, response.Body, cfg.AbsBasePath)
|
|
if err != nil {
|
|
return nil, &util.JSONResponse{
|
|
Code: http.StatusBadRequest,
|
|
JSON: jsonerror.Unknown("Failed to store URL response " + err.Error()),
|
|
}
|
|
}
|
|
|
|
// metadata of the media/webpage to preview
|
|
urlMetadata := &mediaInfo{
|
|
MediaMetadata: &types.MediaMetadata{
|
|
Origin: cfg.Matrix.ServerName,
|
|
FileSizeBytes: bytesWritten,
|
|
Base64Hash: hash,
|
|
ContentType: types.ContentType(response.Header["Content-Type"][0]),
|
|
},
|
|
Logger: util.GetLogger(ctx).WithField("Origin", cfg.Matrix.ServerName),
|
|
}
|
|
|
|
// :-\
|
|
// Look up the media by the file hash. If we already have the file but under a
|
|
// different media ID then we won't upload the file again - instead we'll just
|
|
// add a new metadata entry that refers to the same file.
|
|
existingMetadata, err := db.GetMediaMetadataByHash(
|
|
ctx, hash, urlMetadata.MediaMetadata.Origin,
|
|
)
|
|
if err != nil {
|
|
fileutils.RemoveDir(tmpDir, urlMetadata.Logger)
|
|
urlMetadata.Logger.WithError(err).Error("Error querying the database by hash.")
|
|
resErr := jsonerror.InternalServerError()
|
|
return nil, &resErr
|
|
}
|
|
if existingMetadata != nil {
|
|
// The file already exists, delete the uploaded temporary file.
|
|
defer fileutils.RemoveDir(tmpDir, urlMetadata.Logger)
|
|
// The file already exists. Make a new media ID up for it.
|
|
mediaID, merr := urlMetadata.generateMediaID(ctx, db)
|
|
if merr != nil {
|
|
urlMetadata.Logger.WithError(merr).Error("Failed to generate media ID for existing file")
|
|
resErr := jsonerror.InternalServerError()
|
|
return nil, &resErr
|
|
}
|
|
|
|
// Then amend the upload metadata.
|
|
urlMetadata.MediaMetadata = &types.MediaMetadata{
|
|
MediaID: mediaID,
|
|
Origin: urlMetadata.MediaMetadata.Origin,
|
|
ContentType: urlMetadata.MediaMetadata.ContentType,
|
|
FileSizeBytes: urlMetadata.MediaMetadata.FileSizeBytes,
|
|
CreationTimestamp: urlMetadata.MediaMetadata.CreationTimestamp,
|
|
UploadName: urlMetadata.MediaMetadata.UploadName,
|
|
Base64Hash: hash,
|
|
UserID: urlMetadata.MediaMetadata.UserID,
|
|
}
|
|
} else {
|
|
// The file doesn't exist. Update the request metadata.
|
|
urlMetadata.MediaMetadata.FileSizeBytes = bytesWritten
|
|
urlMetadata.MediaMetadata.Base64Hash = hash
|
|
urlMetadata.MediaMetadata.MediaID, err = urlMetadata.generateMediaID(ctx, db)
|
|
if err != nil {
|
|
fileutils.RemoveDir(tmpDir, urlMetadata.Logger)
|
|
urlMetadata.Logger.WithError(err).Error("Failed to generate media ID for new download")
|
|
resErr := jsonerror.InternalServerError()
|
|
return nil, &resErr
|
|
}
|
|
}
|
|
|
|
urlMetadata.Logger = urlMetadata.Logger.WithField("media_id", urlMetadata.MediaMetadata.MediaID)
|
|
urlMetadata.Logger.WithFields(log.Fields{
|
|
"Base64Hash": urlMetadata.MediaMetadata.Base64Hash,
|
|
"UploadName": urlMetadata.MediaMetadata.UploadName,
|
|
"FileSizeBytes": urlMetadata.MediaMetadata.FileSizeBytes,
|
|
"ContentType": urlMetadata.MediaMetadata.ContentType,
|
|
}).Info("File downloaded")
|
|
|
|
if resErr := urlMetadata.storeFileAndMetadata(ctx, tmpDir, cfg.AbsBasePath, db); resErr != nil {
|
|
return nil, resErr
|
|
}
|
|
|
|
return urlMetadata, nil
|
|
}
|
|
|
|
func (m *mediaInfo) generateMediaID(ctx context.Context, db storage.Database) (types.MediaID, error) {
|
|
for {
|
|
// First try generating a media ID. We'll do this by
|
|
// generating some random bytes and then hex-encoding.
|
|
mediaIDBytes := make([]byte, 32)
|
|
_, err := rand.Read(mediaIDBytes)
|
|
if err != nil {
|
|
return "", fmt.Errorf("rand.Read: %w", err)
|
|
}
|
|
mediaID := types.MediaID(hex.EncodeToString(mediaIDBytes))
|
|
// Then we will check if this media ID already exists in
|
|
// our database. If it does then we had best generate a
|
|
// new one.
|
|
existingMetadata, err := db.GetMediaMetadata(ctx, mediaID, m.MediaMetadata.Origin)
|
|
if err != nil {
|
|
return "", fmt.Errorf("db.GetMediaMetadata: %w", err)
|
|
}
|
|
if existingMetadata != nil {
|
|
// The media ID was already used - repeat the process
|
|
// and generate a new one instead.
|
|
continue
|
|
}
|
|
// The media ID was not already used - let's return that.
|
|
return mediaID, nil
|
|
}
|
|
}
|
|
|
|
func (r *mediaInfo) storeFileAndMetadata(
|
|
ctx context.Context,
|
|
tmpDir types.Path,
|
|
absBasePath config.Path,
|
|
db storage.Database,
|
|
) *util.JSONResponse {
|
|
finalPath, duplicate, err := fileutils.MoveFileWithHashCheck(tmpDir, r.MediaMetadata, absBasePath, r.Logger)
|
|
if err != nil {
|
|
r.Logger.WithError(err).Error("Failed to move file.")
|
|
return &util.JSONResponse{
|
|
Code: http.StatusBadRequest,
|
|
JSON: jsonerror.Unknown("Failed to upload"),
|
|
}
|
|
}
|
|
if duplicate {
|
|
r.Logger.WithField("dst", finalPath).Info("File was stored previously - discarding duplicate")
|
|
}
|
|
|
|
if err = db.StoreMediaMetadata(ctx, r.MediaMetadata); err != nil {
|
|
r.Logger.WithError(err).Warn("Failed to store metadata")
|
|
// If the file is a duplicate (has the same hash as an existing file) then
|
|
// there is valid metadata in the database for that file. As such we only
|
|
// remove the file if it is not a duplicate.
|
|
if !duplicate {
|
|
fileutils.RemoveDir(types.Path(path.Dir(string(finalPath))), r.Logger)
|
|
}
|
|
return &util.JSONResponse{
|
|
Code: http.StatusBadRequest,
|
|
JSON: jsonerror.Unknown("Failed to upload"),
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// parseContent returns the data stored in content attributes
|
|
func parseContent(node *html.Node) string {
|
|
// iterating the attributes of the tag to get content
|
|
for _, attr := range node.Attr {
|
|
if attr.Key == "content" {
|
|
content := attr.Val
|
|
return content
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
// prepareResponse prepares the response to be returned to the client
|
|
func (mediaInfo *mediaInfo) prepareResponse(cfg *config.MediaAPI) (*PreviewUrlResponse, *util.JSONResponse) {
|
|
|
|
// Reading the file in which the response body was stored and parsing the html.
|
|
pathToFile, err := fileutils.GetPathFromBase64Hash(mediaInfo.MediaMetadata.Base64Hash, cfg.AbsBasePath)
|
|
if err != nil {
|
|
return nil, &util.JSONResponse{
|
|
Code: http.StatusInternalServerError,
|
|
JSON: jsonerror.Unknown("Couldn't get the path to the stored response" + err.Error()),
|
|
}
|
|
}
|
|
|
|
fileString, err := ioutil.ReadFile(pathToFile)
|
|
if err != nil {
|
|
return nil, &util.JSONResponse{
|
|
Code: http.StatusInternalServerError,
|
|
JSON: jsonerror.Unknown("Couldn't read the stored response body" + err.Error()),
|
|
}
|
|
}
|
|
|
|
file := strings.NewReader(string(fileString))
|
|
|
|
tree, err := html.Parse(file)
|
|
if err != nil {
|
|
return nil, &util.JSONResponse{
|
|
Code: http.StatusInternalServerError,
|
|
JSON: jsonerror.Unknown("Couldn't get the path to the stored file " + err.Error()),
|
|
}
|
|
}
|
|
|
|
// map for storing the content values
|
|
m := make(map[string]string)
|
|
|
|
// Iterating the *html.Node, looking for og data
|
|
var f func(*html.Node)
|
|
f = func(n *html.Node) {
|
|
//check if meta tag
|
|
if n.Type == html.ElementNode && n.Data == "meta" {
|
|
//parse attributes of the tag
|
|
for _, a := range n.Attr {
|
|
if a.Key == "property" && strings.HasPrefix(a.Val, "og:") {
|
|
slice := strings.Split(a.Val, ":")
|
|
m[slice[1]] = parseContent(n)
|
|
}
|
|
}
|
|
}
|
|
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
|
f(c)
|
|
}
|
|
}
|
|
|
|
f(tree)
|
|
|
|
response := &PreviewUrlResponse{
|
|
OgImage: m["image"],
|
|
OgSiteName: m["site_name"],
|
|
OgType: m["type"],
|
|
OgTitle: m["title"],
|
|
OgUrl: m["url"],
|
|
OgDescription: m["description"],
|
|
}
|
|
|
|
return response, nil
|
|
}
|