Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Companion: Use GET instead of HEAD for getURLMeta + Cut off length of file names #3048

Merged
merged 11 commits into from Sep 30, 2021
7 changes: 6 additions & 1 deletion packages/@uppy/companion/src/server/Uploader.js
Expand Up @@ -16,6 +16,9 @@ const logger = require('./logger')
const headerSanitize = require('./header-blacklist')
const redis = require('./redis')

// Need to limit length or we can get
// "MetadataTooLarge: Your metadata headers exceed the maximum allowed metadata size" in tus / S3
const MAX_FILENAME_LENGTH = 500
const DEFAULT_FIELD_NAME = 'files[]'
const PROTOCOLS = Object.freeze({
multipart: 'multipart',
Expand Down Expand Up @@ -58,7 +61,9 @@ class Uploader {
this.path = `${this.options.pathPrefix}/${Uploader.FILE_NAME_PREFIX}-${this.token}`
this.options.metadata = this.options.metadata || {}
this.options.fieldname = this.options.fieldname || DEFAULT_FIELD_NAME
this.uploadFileName = this.options.metadata.name || path.basename(this.path)
this.uploadFileName = this.options.metadata.name
? this.options.metadata.name.substring(0, MAX_FILENAME_LENGTH)
: path.basename(this.path)
this.streamsEnded = false
this.uploadStopped = false
this.writeStream = fs.createWriteStream(this.path, { mode: 0o666 }) // no executable files
Expand Down
154 changes: 79 additions & 75 deletions packages/@uppy/companion/src/server/controllers/url.js
Expand Up @@ -2,81 +2,11 @@ const router = require('express').Router
const request = require('request')
const { URL } = require('url')
const validator = require('validator')

const Uploader = require('../Uploader')
const reqUtil = require('../helpers/request')
const { getURLMeta, getRedirectEvaluator, getProtectedHttpAgent } = require('../helpers/request')
const logger = require('../logger')

module.exports = () => {
return router()
.post('/meta', meta)
.post('/get', get)
}

/**
* Fteches the size and content type of a URL
*
* @param {object} req expressJS request object
* @param {object} res expressJS response object
*/
const meta = (req, res) => {
logger.debug('URL file import handler running', null, req.id)
const { debug } = req.companion.options
if (!validateURL(req.body.url, debug)) {
logger.debug('Invalid request body detected. Exiting url meta handler.', null, req.id)
return res.status(400).json({ error: 'Invalid request body' })
}

reqUtil.getURLMeta(req.body.url, !debug)
.then((meta) => res.json(meta))
.catch((err) => {
logger.error(err, 'controller.url.meta.error', req.id)
// @todo send more meaningful error message and status code to client if possible
return res.status(err.status || 500).json({ message: 'failed to fetch URL metadata' })
})
}

/**
* Handles the reques of import a file from a remote URL, and then
* subsequently uploading it to the specified destination.
*
* @param {object} req expressJS request object
* @param {object} res expressJS response object
*/
const get = (req, res) => {
logger.debug('URL file import handler running', null, req.id)
const { debug } = req.companion.options
if (!validateURL(req.body.url, debug)) {
logger.debug('Invalid request body detected. Exiting url import handler.', null, req.id)
return res.status(400).json({ error: 'Invalid request body' })
}

reqUtil.getURLMeta(req.body.url, !debug)
.then(({ size }) => {
// @ts-ignore
logger.debug('Instantiating uploader.', null, req.id)
const uploader = new Uploader(Uploader.reqToOptions(req, size))

if (uploader.hasError()) {
const response = uploader.getResponse()
res.status(response.status).json(response.body)
return
}

logger.debug('Waiting for socket connection before beginning remote download.', null, req.id)
uploader.onSocketReady(() => {
logger.debug('Socket connection received. Starting remote download.', null, req.id)
downloadURL(req.body.url, uploader.handleChunk.bind(uploader), !debug, req.id)
})

const response = uploader.getResponse()
res.status(response.status).json(response.body)
}).catch((err) => {
logger.error(err, 'controller.url.get.error', req.id)
// @todo send more meaningful error message and status code to client if possible
return res.status(err.status || 500).json({ message: 'failed to fetch URL metadata' })
})
}

/**
* Validates that the download URL is secure
*
Expand Down Expand Up @@ -113,14 +43,14 @@ const validateURL = (url, debug) => {
* @param {string} url
* @param {downloadCallback} onDataChunk
* @param {boolean} blockLocalIPs
* @param {string=} traceId
* @param {string} traceId
*/
const downloadURL = (url, onDataChunk, blockLocalIPs, traceId) => {
const opts = {
uri: url,
method: 'GET',
followRedirect: reqUtil.getRedirectEvaluator(url, blockLocalIPs),
agentClass: reqUtil.getProtectedHttpAgent((new URL(url)).protocol, blockLocalIPs),
followRedirect: getRedirectEvaluator(url, blockLocalIPs),
agentClass: getProtectedHttpAgent((new URL(url)).protocol, blockLocalIPs),
}

request(opts)
Expand All @@ -138,3 +68,77 @@ const downloadURL = (url, onDataChunk, blockLocalIPs, traceId) => {
onDataChunk(err, null)
})
}

/**
* Fteches the size and content type of a URL
*
* @param {object} req expressJS request object
* @param {object} res expressJS response object
*/
const meta = async (req, res) => {
try {
logger.debug('URL file import handler running', null, req.id)
const { debug } = req.companion.options
if (!validateURL(req.body.url, debug)) {
logger.debug('Invalid request body detected. Exiting url meta handler.', null, req.id)
return res.status(400).json({ error: 'Invalid request body' })
}

const urlMeta = await getURLMeta(req.body.url, !debug)
return res.json(urlMeta)
} catch (err) {
logger.error(err, 'controller.url.meta.error', req.id)
// @todo send more meaningful error message and status code to client if possible
return res.status(err.status || 500).json({ message: 'failed to fetch URL metadata' })
}
}

/**
* Handles the reques of import a file from a remote URL, and then
* subsequently uploading it to the specified destination.
*
* @param {object} req expressJS request object
* @param {object} res expressJS response object
*/
const get = async (req, res) => {
try {
logger.debug('URL file import handler running', null, req.id)
const { debug } = req.companion.options
if (!validateURL(req.body.url, debug)) {
logger.debug('Invalid request body detected. Exiting url import handler.', null, req.id)
res.status(400).json({ error: 'Invalid request body' })
return
}

const { size } = await getURLMeta(req.body.url, !debug)

// @ts-ignore
logger.debug('Instantiating uploader.', null, req.id)
const uploader = new Uploader(Uploader.reqToOptions(req, size))

if (uploader.hasError()) {
const response = uploader.getResponse()
res.status(response.status).json(response.body)
return
}

logger.debug('Waiting for socket connection before beginning remote download.', null, req.id)
uploader.onSocketReady(() => {
logger.debug('Socket connection received. Starting remote download.', null, req.id)
downloadURL(req.body.url, uploader.handleChunk.bind(uploader), !debug, req.id)
})

const response = uploader.getResponse()

// NOTE: Uploader will continue running after the http request is responded
res.status(response.status).json(response.body)
} catch (err) {
logger.error(err, 'controller.url.get.error', req.id)
// @todo send more meaningful error message and status code to client if possible
res.status(err.status || 500).json({ message: 'failed to fetch URL metadata' })
}
}

module.exports = () => router()
.post('/meta', meta)
.post('/get', get)
55 changes: 28 additions & 27 deletions packages/@uppy/companion/src/server/helpers/request.js
Expand Up @@ -106,20 +106,6 @@ module.exports.getRedirectEvaluator = (rawRequestURL, blockPrivateIPs) => {
}
}

/**
* Returns http Agent that will prevent requests to private IPs (to preven SSRF)
*
* @param {string} protocol http or http: or https: or https protocol needed for the request
* @param {boolean} blockPrivateIPs if set to false, this protection will be disabled
*/
module.exports.getProtectedHttpAgent = (protocol, blockPrivateIPs) => {
if (blockPrivateIPs) {
return protocol.startsWith('https') ? HttpsAgent : HttpAgent
}

return protocol.startsWith('https') ? https.Agent : http.Agent
}

function dnsLookup (hostname, options, callback) {
dns.lookup(hostname, options, (err, addresses, maybeFamily) => {
if (err) {
Expand All @@ -141,54 +127,69 @@ function dnsLookup (hostname, options, callback) {

class HttpAgent extends http.Agent {
createConnection (options, callback) {
options.lookup = dnsLookup
if (isIPAddress(options.host) && isPrivateIP(options.host)) {
callback(new Error(FORBIDDEN_IP_ADDRESS))
return
return undefined
}
// @ts-ignore
return super.createConnection(options, callback)
return super.createConnection({ ...options, lookup: dnsLookup }, callback)
}
}

class HttpsAgent extends https.Agent {
createConnection (options, callback) {
options.lookup = dnsLookup
if (isIPAddress(options.host) && isPrivateIP(options.host)) {
callback(new Error(FORBIDDEN_IP_ADDRESS))
return
return undefined
}
// @ts-ignore
return super.createConnection(options, callback)
return super.createConnection({ ...options, lookup: dnsLookup }, callback)
}
}

/**
* Returns http Agent that will prevent requests to private IPs (to preven SSRF)
*
* @param {string} protocol http or http: or https: or https protocol needed for the request
* @param {boolean} blockPrivateIPs if set to false, this protection will be disabled
*/
module.exports.getProtectedHttpAgent = (protocol, blockPrivateIPs) => {
if (blockPrivateIPs) {
return protocol.startsWith('https') ? HttpsAgent : HttpAgent
}

return protocol.startsWith('https') ? https.Agent : http.Agent
}

/**
* Gets the size and content type of a url's content
*
* @param {string} url
* @param {boolean=} blockLocalIPs
* @param {boolean} blockLocalIPs
* @returns {Promise<{type: string, size: number}>}
*/
exports.getURLMeta = (url, blockLocalIPs = false) => {
return new Promise((resolve, reject) => {
const opts = {
uri: url,
method: 'HEAD',
method: 'GET',
followRedirect: exports.getRedirectEvaluator(url, blockLocalIPs),
agentClass: exports.getProtectedHttpAgent((new URL(url)).protocol, blockLocalIPs),
}

request(opts, (err, response) => {
if (err || response.statusCode >= 300) {
const req = request(opts, (err) => {
if (err) reject(err)
})
req.on('response', (response) => {
if (response.statusCode >= 300) {
// @todo possibly set a status code in the error object to get a more helpful
// hint at what the cause of error is.
err = err || new Error(`URL server responded with status: ${response.statusCode}`)
reject(err)
reject(new Error(`URL server responded with status: ${response.statusCode}`))
} else {
req.abort() // No need to get the rest of the response, as we only want header
resolve({
type: response.headers['content-type'],
size: parseInt(response.headers['content-length']),
size: parseInt(response.headers['content-length'], 10),
})
}
})
Expand Down
4 changes: 2 additions & 2 deletions packages/@uppy/utils/src/delay.test.js
Expand Up @@ -30,7 +30,7 @@ describe('delay', () => {

// should have rejected before the timer is done
const time = Date.now() - start
expect(time).toBeGreaterThanOrEqual(50)
expect(time).toBeLessThan(100)
expect(time).toBeGreaterThanOrEqual(30)
expect(time).toBeLessThan(70)
Comment on lines -33 to +34
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tbh using Date API is probably not a great idea for this kind of tests, it could use a refactor to performance.now(). But a change for another time, it should not block this PR.

})
})