mirror of
https://github.com/matrix-org/dendrite.git
synced 2025-12-26 08:13:09 -06:00
basic url previews
Signed-off-by: Rudraksh Pareek <54525605+DelusionalOptimist@users.noreply.github.com>
This commit is contained in:
parent
59e1b96114
commit
e5a59b7580
400
mediaapi/routing/preview_url.go
Normal file
400
mediaapi/routing/preview_url.go
Normal file
|
|
@ -0,0 +1,400 @@
|
|||
// Copyright 2017 Vector Creations Ltd
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package routing
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/rand"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"path"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"golang.org/x/net/html"
|
||||
|
||||
"github.com/matrix-org/dendrite/clientapi/jsonerror"
|
||||
"github.com/matrix-org/dendrite/mediaapi/fileutils"
|
||||
"github.com/matrix-org/dendrite/mediaapi/storage"
|
||||
"github.com/matrix-org/dendrite/mediaapi/types"
|
||||
"github.com/matrix-org/dendrite/setup/config"
|
||||
"github.com/matrix-org/util"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// the parameters included in the incoming preview_url request
|
||||
type PreviewUrlRequest struct {
|
||||
url url.URL
|
||||
ts types.UnixMs
|
||||
}
|
||||
|
||||
// metadata of the url (media/html) to be previewed
|
||||
type mediaInfo struct {
|
||||
MediaMetadata *types.MediaMetadata
|
||||
Logger *log.Entry
|
||||
}
|
||||
|
||||
// PreviewUrlResponse defines the format of the JSON response
|
||||
// https://matrix.org/docs/spec/client_server/latest#get-matrix-media-r0-preview-url
|
||||
// TODO: add more fields to the response
|
||||
type PreviewUrlResponse struct {
|
||||
MatrixImageSize int64 `json:"matrix:image:size"`
|
||||
OgImage string `json:"og:image"`
|
||||
OgSiteName string `json:"og:site_name"`
|
||||
OgType string `json:"og:type"`
|
||||
OgTitle string `json:"og:title"`
|
||||
OgUrl string `json:"og:url"`
|
||||
OgDescription string `json:"og:description"`
|
||||
}
|
||||
|
||||
// PreviewUrl implements GET /preview_url.
|
||||
// Current implementation gets the url, parses the meta tags to obtain OG
|
||||
// data and returns a JSONResponse for the client to process.
|
||||
// TODO: avoid this endpoint in encrypted rooms
|
||||
func PreviewUrl(
|
||||
req *http.Request, cfg *config.MediaAPI, db storage.Database) util.JSONResponse {
|
||||
|
||||
// parsing the request
|
||||
r, err := parseRequest(req)
|
||||
if err != nil {
|
||||
return *err
|
||||
}
|
||||
|
||||
// downloading metadata for the url
|
||||
mediaInfo, err := r.downloadUrl(cfg, req.Context(), db)
|
||||
if err != nil {
|
||||
return *err
|
||||
}
|
||||
|
||||
// preparing the response
|
||||
response, err := mediaInfo.prepareResponse(cfg)
|
||||
if err != nil {
|
||||
return *err
|
||||
}
|
||||
|
||||
return util.JSONResponse{
|
||||
Code: http.StatusOK,
|
||||
JSON: response,
|
||||
}
|
||||
}
|
||||
|
||||
// parseRequest parses the incoming preview request to extract the url and ts.
|
||||
// Returns either a parsed PreviewUrlRequest or error formatted as
|
||||
// util.JSONResponse
|
||||
func parseRequest(req *http.Request) (*PreviewUrlRequest, *util.JSONResponse) {
|
||||
|
||||
// get the url to be previewed from the request
|
||||
urlToPreview := req.URL.Query().Get("url")
|
||||
if len(urlToPreview) == 0 {
|
||||
return nil, &util.JSONResponse{
|
||||
Code: http.StatusBadRequest,
|
||||
JSON: jsonerror.MissingArgument("Missing URL"),
|
||||
}
|
||||
}
|
||||
|
||||
// parse the url
|
||||
parsedUrl, err := url.Parse(urlToPreview)
|
||||
if err != nil {
|
||||
return nil, &util.JSONResponse{
|
||||
Code: http.StatusBadRequest,
|
||||
JSON: jsonerror.InvalidArgumentValue(
|
||||
"Unable to parse url " + err.Error()),
|
||||
}
|
||||
}
|
||||
|
||||
// get the ts if provided in the request
|
||||
if tsStr := req.URL.Query().Get("ts"); len(tsStr) > 0 {
|
||||
ts, err := strconv.ParseInt(tsStr, 10, 64)
|
||||
if err != nil {
|
||||
return nil, &util.JSONResponse{
|
||||
Code: http.StatusBadRequest,
|
||||
JSON: jsonerror.InvalidArgumentValue(
|
||||
"Couldn't parse 'ts' to a valid integer" + err.Error()),
|
||||
}
|
||||
}
|
||||
|
||||
request := &PreviewUrlRequest{
|
||||
url: *parsedUrl,
|
||||
// Convert timestamp to ms
|
||||
ts: types.UnixMs(ts / 1000000),
|
||||
}
|
||||
|
||||
return request, nil
|
||||
}
|
||||
|
||||
// set ts to current time if none provided
|
||||
ts := time.Now().UnixNano()
|
||||
|
||||
request := &PreviewUrlRequest{
|
||||
url: *parsedUrl,
|
||||
ts: types.UnixMs(ts / 1000000),
|
||||
}
|
||||
|
||||
return request, nil
|
||||
}
|
||||
|
||||
// downloadUrl downloads the url and saves the response body.
|
||||
// Returns either mediaInfo (metadata about the file to preview) or error
|
||||
// formatted as util.JSONResponse.
|
||||
// Current implementation for saving response is heavily similar to /upload,
|
||||
// need to work on this.
|
||||
// TODO: better logging
|
||||
func (previewReq *PreviewUrlRequest) downloadUrl(cfg *config.MediaAPI, ctx context.Context, db storage.Database) (*mediaInfo, *util.JSONResponse) {
|
||||
|
||||
urlString := previewReq.url.String()
|
||||
|
||||
// Get the URL
|
||||
response, err := http.Get(urlString)
|
||||
if err != nil {
|
||||
return nil, &util.JSONResponse{
|
||||
Code: http.StatusBadRequest,
|
||||
JSON: jsonerror.InvalidArgumentValue("Couldn't get the URL " + err.Error()),
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: check for other status codes too
|
||||
if response.StatusCode == 404 {
|
||||
return nil, &util.JSONResponse{
|
||||
Code: http.StatusBadRequest,
|
||||
JSON: jsonerror.NotFound("Given url returned 404"),
|
||||
}
|
||||
}
|
||||
|
||||
// Save the response body to the temporary dir
|
||||
// TODO: should be a new directory like url_cache maybe and
|
||||
hash, bytesWritten, tmpDir, err := fileutils.WriteTempFile(ctx, response.Body, cfg.AbsBasePath)
|
||||
if err != nil {
|
||||
return nil, &util.JSONResponse{
|
||||
Code: http.StatusBadRequest,
|
||||
JSON: jsonerror.Unknown("Failed to store URL response " + err.Error()),
|
||||
}
|
||||
}
|
||||
|
||||
// metadata of the media/webpage to preview
|
||||
urlMetadata := &mediaInfo{
|
||||
MediaMetadata: &types.MediaMetadata{
|
||||
Origin: cfg.Matrix.ServerName,
|
||||
FileSizeBytes: bytesWritten,
|
||||
Base64Hash: hash,
|
||||
ContentType: types.ContentType(response.Header["Content-Type"][0]),
|
||||
},
|
||||
Logger: util.GetLogger(ctx).WithField("Origin", cfg.Matrix.ServerName),
|
||||
}
|
||||
|
||||
// :-\
|
||||
// Look up the media by the file hash. If we already have the file but under a
|
||||
// different media ID then we won't upload the file again - instead we'll just
|
||||
// add a new metadata entry that refers to the same file.
|
||||
existingMetadata, err := db.GetMediaMetadataByHash(
|
||||
ctx, hash, urlMetadata.MediaMetadata.Origin,
|
||||
)
|
||||
if err != nil {
|
||||
fileutils.RemoveDir(tmpDir, urlMetadata.Logger)
|
||||
urlMetadata.Logger.WithError(err).Error("Error querying the database by hash.")
|
||||
resErr := jsonerror.InternalServerError()
|
||||
return nil, &resErr
|
||||
}
|
||||
if existingMetadata != nil {
|
||||
// The file already exists, delete the uploaded temporary file.
|
||||
defer fileutils.RemoveDir(tmpDir, urlMetadata.Logger)
|
||||
// The file already exists. Make a new media ID up for it.
|
||||
mediaID, merr := urlMetadata.generateMediaID(ctx, db)
|
||||
if merr != nil {
|
||||
urlMetadata.Logger.WithError(merr).Error("Failed to generate media ID for existing file")
|
||||
resErr := jsonerror.InternalServerError()
|
||||
return nil, &resErr
|
||||
}
|
||||
|
||||
// Then amend the upload metadata.
|
||||
urlMetadata.MediaMetadata = &types.MediaMetadata{
|
||||
MediaID: mediaID,
|
||||
Origin: urlMetadata.MediaMetadata.Origin,
|
||||
ContentType: urlMetadata.MediaMetadata.ContentType,
|
||||
FileSizeBytes: urlMetadata.MediaMetadata.FileSizeBytes,
|
||||
CreationTimestamp: urlMetadata.MediaMetadata.CreationTimestamp,
|
||||
UploadName: urlMetadata.MediaMetadata.UploadName,
|
||||
Base64Hash: hash,
|
||||
UserID: urlMetadata.MediaMetadata.UserID,
|
||||
}
|
||||
} else {
|
||||
// The file doesn't exist. Update the request metadata.
|
||||
urlMetadata.MediaMetadata.FileSizeBytes = bytesWritten
|
||||
urlMetadata.MediaMetadata.Base64Hash = hash
|
||||
urlMetadata.MediaMetadata.MediaID, err = urlMetadata.generateMediaID(ctx, db)
|
||||
if err != nil {
|
||||
fileutils.RemoveDir(tmpDir, urlMetadata.Logger)
|
||||
urlMetadata.Logger.WithError(err).Error("Failed to generate media ID for new download")
|
||||
resErr := jsonerror.InternalServerError()
|
||||
return nil, &resErr
|
||||
}
|
||||
}
|
||||
|
||||
urlMetadata.Logger = urlMetadata.Logger.WithField("media_id", urlMetadata.MediaMetadata.MediaID)
|
||||
urlMetadata.Logger.WithFields(log.Fields{
|
||||
"Base64Hash": urlMetadata.MediaMetadata.Base64Hash,
|
||||
"UploadName": urlMetadata.MediaMetadata.UploadName,
|
||||
"FileSizeBytes": urlMetadata.MediaMetadata.FileSizeBytes,
|
||||
"ContentType": urlMetadata.MediaMetadata.ContentType,
|
||||
}).Info("File downloaded")
|
||||
|
||||
if resErr := urlMetadata.storeFileAndMetadata(ctx, tmpDir, cfg.AbsBasePath, db); resErr != nil {
|
||||
return nil, resErr
|
||||
}
|
||||
|
||||
return urlMetadata, nil
|
||||
}
|
||||
|
||||
func (m *mediaInfo) generateMediaID(ctx context.Context, db storage.Database) (types.MediaID, error) {
|
||||
for {
|
||||
// First try generating a media ID. We'll do this by
|
||||
// generating some random bytes and then hex-encoding.
|
||||
mediaIDBytes := make([]byte, 32)
|
||||
_, err := rand.Read(mediaIDBytes)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("rand.Read: %w", err)
|
||||
}
|
||||
mediaID := types.MediaID(hex.EncodeToString(mediaIDBytes))
|
||||
// Then we will check if this media ID already exists in
|
||||
// our database. If it does then we had best generate a
|
||||
// new one.
|
||||
existingMetadata, err := db.GetMediaMetadata(ctx, mediaID, m.MediaMetadata.Origin)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("db.GetMediaMetadata: %w", err)
|
||||
}
|
||||
if existingMetadata != nil {
|
||||
// The media ID was already used - repeat the process
|
||||
// and generate a new one instead.
|
||||
continue
|
||||
}
|
||||
// The media ID was not already used - let's return that.
|
||||
return mediaID, nil
|
||||
}
|
||||
}
|
||||
|
||||
func (r *mediaInfo) storeFileAndMetadata(
|
||||
ctx context.Context,
|
||||
tmpDir types.Path,
|
||||
absBasePath config.Path,
|
||||
db storage.Database,
|
||||
) *util.JSONResponse {
|
||||
finalPath, duplicate, err := fileutils.MoveFileWithHashCheck(tmpDir, r.MediaMetadata, absBasePath, r.Logger)
|
||||
if err != nil {
|
||||
r.Logger.WithError(err).Error("Failed to move file.")
|
||||
return &util.JSONResponse{
|
||||
Code: http.StatusBadRequest,
|
||||
JSON: jsonerror.Unknown("Failed to upload"),
|
||||
}
|
||||
}
|
||||
if duplicate {
|
||||
r.Logger.WithField("dst", finalPath).Info("File was stored previously - discarding duplicate")
|
||||
}
|
||||
|
||||
if err = db.StoreMediaMetadata(ctx, r.MediaMetadata); err != nil {
|
||||
r.Logger.WithError(err).Warn("Failed to store metadata")
|
||||
// If the file is a duplicate (has the same hash as an existing file) then
|
||||
// there is valid metadata in the database for that file. As such we only
|
||||
// remove the file if it is not a duplicate.
|
||||
if !duplicate {
|
||||
fileutils.RemoveDir(types.Path(path.Dir(string(finalPath))), r.Logger)
|
||||
}
|
||||
return &util.JSONResponse{
|
||||
Code: http.StatusBadRequest,
|
||||
JSON: jsonerror.Unknown("Failed to upload"),
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// parseContent returns the data stored in content attributes
|
||||
func parseContent(node *html.Node) string {
|
||||
// iterating the attributes of the tag to get content
|
||||
for _, attr := range node.Attr {
|
||||
if attr.Key == "content" {
|
||||
content := attr.Val
|
||||
return content
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// prepareResponse prepares the response to be returned to the client
|
||||
func (mediaInfo *mediaInfo) prepareResponse(cfg *config.MediaAPI) (*PreviewUrlResponse, *util.JSONResponse) {
|
||||
|
||||
// Reading the file in which the response body was stored and parsing the html.
|
||||
pathToFile, err := fileutils.GetPathFromBase64Hash(mediaInfo.MediaMetadata.Base64Hash, cfg.AbsBasePath)
|
||||
if err != nil {
|
||||
return nil, &util.JSONResponse{
|
||||
Code: http.StatusInternalServerError,
|
||||
JSON: jsonerror.Unknown("Couldn't get the path to the stored response" + err.Error()),
|
||||
}
|
||||
}
|
||||
|
||||
fileString, err := ioutil.ReadFile(pathToFile)
|
||||
if err != nil {
|
||||
return nil, &util.JSONResponse{
|
||||
Code: http.StatusInternalServerError,
|
||||
JSON: jsonerror.Unknown("Couldn't read the stored response body" + err.Error()),
|
||||
}
|
||||
}
|
||||
|
||||
file := strings.NewReader(string(fileString))
|
||||
|
||||
tree, err := html.Parse(file)
|
||||
if err != nil {
|
||||
return nil, &util.JSONResponse{
|
||||
Code: http.StatusInternalServerError,
|
||||
JSON: jsonerror.Unknown("Couldn't get the path to the stored file " + err.Error()),
|
||||
}
|
||||
}
|
||||
|
||||
// map for storing the content values
|
||||
m := make(map[string]string)
|
||||
|
||||
// Iterating the *html.Node, looking for og data
|
||||
var f func(*html.Node)
|
||||
f = func(n *html.Node) {
|
||||
//check if meta tag
|
||||
if n.Type == html.ElementNode && n.Data == "meta" {
|
||||
//parse attributes of the tag
|
||||
for _, a := range n.Attr {
|
||||
if a.Key == "property" && strings.HasPrefix(a.Val, "og:") {
|
||||
slice := strings.Split(a.Val, ":")
|
||||
m[slice[1]] = parseContent(n)
|
||||
}
|
||||
}
|
||||
}
|
||||
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
||||
f(c)
|
||||
}
|
||||
}
|
||||
|
||||
f(tree)
|
||||
|
||||
response := &PreviewUrlResponse{
|
||||
OgImage: m["image"],
|
||||
OgSiteName: m["site_name"],
|
||||
OgType: m["type"],
|
||||
OgTitle: m["title"],
|
||||
OgUrl: m["url"],
|
||||
OgDescription: m["description"],
|
||||
}
|
||||
|
||||
return response, nil
|
||||
}
|
||||
|
|
@ -80,6 +80,12 @@ func Setup(
|
|||
return Config(req, cfg)
|
||||
}),
|
||||
).Methods(http.MethodGet, http.MethodOptions)
|
||||
|
||||
r0mux.Handle("/preview_url",
|
||||
httputil.MakeAuthAPI("preview_url", userAPI, func(req *http.Request, dev *userapi.Device) util.JSONResponse {
|
||||
return PreviewUrl(req, cfg, db)
|
||||
}),
|
||||
).Methods(http.MethodGet, http.MethodOptions)
|
||||
}
|
||||
|
||||
func makeDownloadAPI(
|
||||
|
|
|
|||
Loading…
Reference in a new issue