Skip to content

Commit fc5af64

Browse files
committedMar 24, 2024·
fix: force protocol to be lowercase for better protocol filtering
* fix: force protocol to be lowercase for better protocol filtering See https://datatracker.ietf.org/doc/html/rfc3986#section-3.1 "Although schemes are case-insensitive, the canonical form is lowercase" * Update snapshots * Rewrite to use new URL if available, and reject weird/invalid protocols * Fix prefix pattern, ? handling, add comments and tests * Remove new URL to simplify code * Lint fixes
1 parent 085e21b commit fc5af64

File tree

5 files changed

+108
-7
lines changed

5 files changed

+108
-7
lines changed
 

‎docs/options.md

+2
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ You can provide an object of options as the last argument to [`katex.render` and
5050
- `trust`: `boolean` or `function` (default: `false`). If `false` (do not trust input), prevent any commands like `\includegraphics` that could enable adverse behavior, rendering them instead in `errorColor`. If `true` (trust input), allow all such commands. Provide a custom function `handler(context)` to customize behavior depending on the context (command, arguments e.g. a URL, etc.). A list of possible contexts:
5151

5252
- `{command: "\\url", url, protocol}`
53+
where `protocol` is a lowercased string like `"http"` or `"https"`
54+
that appears before a colon, or `"_relative"` for relative URLs.
5355
- `{command: "\\href", url, protocol}`
5456
- `{command: "\\includegraphics", url, protocol}`
5557
- `{command: "\\htmlClass", class}`

‎src/Settings.js

+5-1
Original file line numberDiff line numberDiff line change
@@ -346,7 +346,11 @@ export default class Settings {
346346
*/
347347
isTrusted(context: AnyTrustContext): boolean {
348348
if (context.url && !context.protocol) {
349-
context.protocol = utils.protocolFromUrl(context.url);
349+
const protocol = utils.protocolFromUrl(context.url);
350+
if (protocol == null) {
351+
return false;
352+
}
353+
context.protocol = protocol;
350354
}
351355
const trust = typeof this.trust === "function"
352356
? this.trust(context)

‎src/utils.js

+23-4
Original file line numberDiff line numberDiff line change
@@ -93,11 +93,30 @@ export const assert = function<T>(value: ?T): T {
9393

9494
/**
9595
* Return the protocol of a URL, or "_relative" if the URL does not specify a
96-
* protocol (and thus is relative).
96+
* protocol (and thus is relative), or `null` if URL has invalid protocol
97+
* (so should be outright rejected).
9798
*/
98-
export const protocolFromUrl = function(url: string): string {
99-
const protocol = /^\s*([^\\/#]*?)(?::|&#0*58|&#x0*3a)/i.exec(url);
100-
return (protocol != null ? protocol[1] : "_relative");
99+
export const protocolFromUrl = function(url: string): string | null {
100+
// Check for possible leading protocol.
101+
// https://url.spec.whatwg.org/#url-parsing strips leading whitespace
102+
// (U+20) or C0 control (U+00-U+1F) characters.
103+
// eslint-disable-next-line no-control-regex
104+
const protocol = /^[\x00-\x20]*([^\\/#?]*?)(:|&#0*58|&#x0*3a|&colon)/i
105+
.exec(url);
106+
if (!protocol) {
107+
return "_relative";
108+
}
109+
// Reject weird colons
110+
if (protocol[2] !== ":") {
111+
return null;
112+
}
113+
// Reject invalid characters in scheme according to
114+
// https://datatracker.ietf.org/doc/html/rfc3986#section-3.1
115+
if (!/^[a-zA-Z][a-zA-Z0-9+\-.]*$/.test(protocol[1])) {
116+
return null;
117+
}
118+
// Lowercase the protocol
119+
return protocol[1].toLowerCase();
101120
};
102121

103122
export default {

‎test/__snapshots__/katex-spec.js.snap

+44-1
Original file line numberDiff line numberDiff line change
@@ -1468,7 +1468,50 @@ exports[`href and url commands should not affect spacing around 1`] = `
14681468
]
14691469
`;
14701470

1471-
exports[`href and url commands should not allow explicitly disallow protocols 1`] = `
1471+
exports[`href and url commands should not allow explicitly disallowed protocols 1`] = `
1472+
[
1473+
{
1474+
"type": "color",
1475+
"body": [
1476+
{
1477+
"type": "text",
1478+
"body": [
1479+
{
1480+
"type": "textord",
1481+
"mode": "text",
1482+
"text": "\\\\"
1483+
},
1484+
{
1485+
"type": "textord",
1486+
"mode": "text",
1487+
"text": "h"
1488+
},
1489+
{
1490+
"type": "textord",
1491+
"mode": "text",
1492+
"text": "r"
1493+
},
1494+
{
1495+
"type": "textord",
1496+
"mode": "text",
1497+
"text": "e"
1498+
},
1499+
{
1500+
"type": "textord",
1501+
"mode": "text",
1502+
"text": "f"
1503+
}
1504+
],
1505+
"mode": "math"
1506+
}
1507+
],
1508+
"color": "#cc0000",
1509+
"mode": "math"
1510+
}
1511+
]
1512+
`;
1513+
1514+
exports[`href and url commands should not allow explicitly uppercased disallowed protocols 1`] = `
14721515
[
14731516
{
14741517
"type": "color",

‎test/katex-spec.js

+34-1
Original file line numberDiff line numberDiff line change
@@ -3005,13 +3005,46 @@ describe("href and url commands", function() {
30053005
expect(parsed).toMatchSnapshot();
30063006
});
30073007

3008-
it("should not allow explicitly disallow protocols", () => {
3008+
it("should not allow explicitly disallowed protocols", () => {
30093009
const parsed = getParsed(
30103010
"\\href{javascript:alert('x')}{foo}",
30113011
new Settings({trust: context => context.protocol !== "javascript"}),
30123012
);
30133013
expect(parsed).toMatchSnapshot();
30143014
});
3015+
3016+
it("should not allow explicitly uppercased disallowed protocols", () => {
3017+
const parsed = getParsed(
3018+
"\\href{JavaScript:alert('x')}{foo}",
3019+
new Settings({trust: context => context.protocol !== "javascript"}),
3020+
);
3021+
expect(parsed).toMatchSnapshot();
3022+
});
3023+
3024+
function getProtocolViaTrust(url) {
3025+
let protocol;
3026+
getParsed(`\\url{${url}}`, new Settings({
3027+
trust: context => protocol = context.protocol,
3028+
}));
3029+
return protocol;
3030+
}
3031+
3032+
it("should get protocols correctly", () => {
3033+
expect(getProtocolViaTrust("foo")).toBe("_relative");
3034+
expect(getProtocolViaTrust("Foo:")).toBe("foo");
3035+
expect(getProtocolViaTrust("Foo:bar")).toBe("foo");
3036+
expect(getProtocolViaTrust("JavaScript:")).toBe("javascript");
3037+
expect(getProtocolViaTrust("JavaScript:code")).toBe("javascript");
3038+
expect(getProtocolViaTrust("!:")).toBeUndefined();
3039+
expect(getProtocolViaTrust("foo&colon;")).toBeUndefined();
3040+
expect(getProtocolViaTrust("?query=string&colon=")).toBe("_relative");
3041+
expect(getProtocolViaTrust("#query=string&colon=")).toBe("_relative");
3042+
expect(getProtocolViaTrust("dir/file&colon")).toBe("_relative");
3043+
expect(getProtocolViaTrust("//foo")).toBe("_relative");
3044+
expect(getProtocolViaTrust("://foo")).toBeUndefined();
3045+
expect(getProtocolViaTrust(" \t http://")).toBe("http");
3046+
expect(getProtocolViaTrust(" \t http://foo")).toBe("http");
3047+
});
30153048
});
30163049

30173050
describe("A raw text parser", function() {

0 commit comments

Comments
 (0)
Please sign in to comment.