Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Regexps are quite leaky for complex parsing. Regression tests caught a few issues related to path parser. In this diff I implemented the new spec-compliant parser which solves 2 regression cases and covers many edge cases hard to handle with regexps.
- Loading branch information
Showing
4 changed files
with
307 additions
and
83 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,210 @@ | ||
'use strict'; | ||
|
||
// Based on https://www.w3.org/TR/SVG11/paths.html#PathDataBNF | ||
|
||
const argsCountPerCommand = { | ||
M: 2, | ||
m: 2, | ||
Z: 0, | ||
z: 0, | ||
L: 2, | ||
l: 2, | ||
H: 1, | ||
h: 1, | ||
V: 1, | ||
v: 1, | ||
C: 6, | ||
c: 6, | ||
S: 4, | ||
s: 4, | ||
Q: 4, | ||
q: 4, | ||
T: 2, | ||
t: 2, | ||
A: 7, | ||
a: 7, | ||
}; | ||
|
||
const isCommand = (c) => { | ||
return c in argsCountPerCommand; | ||
}; | ||
|
||
const isWsp = (c) => { | ||
const codePoint = c.codePointAt(0); | ||
return ( | ||
codePoint === 0x20 || | ||
codePoint === 0x9 || | ||
codePoint === 0xd || | ||
codePoint === 0xa | ||
); | ||
}; | ||
|
||
const isDigit = (c) => { | ||
const codePoint = c.codePointAt(0); | ||
return 48 <= codePoint && codePoint <= 57; | ||
}; | ||
|
||
const readNumber = (string, cursor) => { | ||
let i = cursor; | ||
let value = ''; | ||
// none | sign | whole | decimal_point | decimal | e | exponent_sign | exponent | ||
let state = 'none'; | ||
for (; i < string.length; i += 1) { | ||
const c = string[i]; | ||
if (c === '+' || c === '-') { | ||
if (state === 'none') { | ||
state = 'sign'; | ||
value += c; | ||
continue; | ||
} | ||
if (state === 'e') { | ||
state === 'exponent_sign'; | ||
value += c; | ||
continue; | ||
} | ||
} | ||
if (isDigit(c)) { | ||
if (state === 'none' || state === 'sign' || state === 'whole') { | ||
state = 'whole'; | ||
value += c; | ||
continue; | ||
} | ||
if (state === 'decimal_point' || state === 'decimal') { | ||
state = 'decimal'; | ||
value += c; | ||
continue; | ||
} | ||
if (state === 'e' || state === 'exponent_sign' || state === 'exponent') { | ||
state = 'exponent'; | ||
value += c; | ||
continue; | ||
} | ||
} | ||
if (c === '.') { | ||
if (state === 'none' || state === 'sign' || state === 'whole') { | ||
state = 'decimal_point'; | ||
value += c; | ||
continue; | ||
} | ||
} | ||
if (c === 'E' || c == 'e') { | ||
if ( | ||
state === 'whole' || | ||
state === 'decimal_point' || | ||
state === 'decimal' | ||
) { | ||
state = 'e'; | ||
value += c; | ||
continue; | ||
} | ||
} | ||
break; | ||
} | ||
const number = Number.parseFloat(value); | ||
if (Number.isNaN(number)) { | ||
return [cursor, null]; | ||
} else { | ||
// step back to delegate iteration to parent loop | ||
return [i - 1, number]; | ||
} | ||
}; | ||
|
||
const parsePathData = (string) => { | ||
const pathData = []; | ||
let i = 0; | ||
let command = null; | ||
let args; | ||
let argsCount; | ||
let canHaveComma = false; | ||
let hadComma = false; | ||
for (; i < string.length; i += 1) { | ||
const c = string.charAt(i); | ||
if (isWsp(c)) { | ||
continue; | ||
} | ||
// allow comma only between arguments | ||
if (canHaveComma && c === ',') { | ||
if (hadComma) { | ||
break; | ||
} | ||
hadComma = true; | ||
continue; | ||
} | ||
if (isCommand(c)) { | ||
if (hadComma) { | ||
return pathData; | ||
} | ||
if (command == null) { | ||
// moveto should be leading command | ||
if (c !== 'M' && c !== 'm') { | ||
return pathData; | ||
} | ||
} else { | ||
// stop if previous command arguments are not flushed | ||
if (args.length !== 0) { | ||
return pathData; | ||
} | ||
} | ||
command = c; | ||
args = []; | ||
argsCount = argsCountPerCommand[command]; | ||
canHaveComma = false; | ||
// flush command without arguments | ||
if (argsCount === 0) { | ||
pathData.push({ command, args }); | ||
} | ||
continue; | ||
} | ||
// avoid parsing arguments if no command detected | ||
if (command == null) { | ||
return pathData; | ||
} | ||
// read next argument | ||
let newCursor = i; | ||
let number = null; | ||
if (command === 'A' || command === 'a') { | ||
const position = args.length; | ||
if (position === 0 || position === 1) { | ||
// allow only positive number without sign as first two arguments | ||
if (c !== '+' && c !== '-') { | ||
[newCursor, number] = readNumber(string, i); | ||
} | ||
} | ||
if (position === 2 || position === 5 || position === 6) { | ||
[newCursor, number] = readNumber(string, i); | ||
} | ||
if (position === 3 || position === 4) { | ||
// read flags | ||
if (c === '0') { | ||
number = 0; | ||
} | ||
if (c === '1') { | ||
number = 1; | ||
} | ||
} | ||
} else { | ||
[newCursor, number] = readNumber(string, i); | ||
} | ||
if (number == null) { | ||
return pathData; | ||
} | ||
args.push(number); | ||
canHaveComma = true; | ||
hadComma = false; | ||
i = newCursor; | ||
// flush arguments when necessary count is reached | ||
if (args.length === argsCount) { | ||
pathData.push({ command, args }); | ||
// subsequent moveto coordinates are threated as implicit lineto commands | ||
if (command === 'M') { | ||
command = 'L'; | ||
} | ||
if (command === 'm') { | ||
command = 'l'; | ||
} | ||
args = []; | ||
} | ||
} | ||
return pathData; | ||
}; | ||
exports.parsePathData = parsePathData; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
'use strict'; | ||
|
||
const { expect } = require('chai'); | ||
const { parsePathData } = require('./path.js'); | ||
|
||
describe('parse path data', () => { | ||
it('should allow spaces between commands', () => { | ||
expect(parsePathData('M0 10 L \n\r\t20 30')).to.deep.equal([ | ||
{ command: 'M', args: [0, 10] }, | ||
{ command: 'L', args: [20, 30] }, | ||
]); | ||
}); | ||
it('should allow spaces and commas between arguments', () => { | ||
expect(parsePathData('M0 , 10 L 20 \n\r\t30,40,50')).to.deep.equal([ | ||
{ command: 'M', args: [0, 10] }, | ||
{ command: 'L', args: [20, 30] }, | ||
{ command: 'L', args: [40, 50] }, | ||
]); | ||
}); | ||
it('should forbid commas before commands', () => { | ||
expect(parsePathData(', M0 10')).to.deep.equal([]); | ||
}); | ||
it('should forbid commas between commands', () => { | ||
expect(parsePathData('M0,10 , L 20,30')).to.deep.equal([ | ||
{ command: 'M', args: [0, 10] }, | ||
]); | ||
}); | ||
it('should forbid commas between command name and argument', () => { | ||
expect(parsePathData('M0,10 L,20,30')).to.deep.equal([ | ||
{ command: 'M', args: [0, 10] }, | ||
]); | ||
}); | ||
it('should forbid multipe commas in a row', () => { | ||
expect(parsePathData('M0 , , 10')).to.deep.equal([]); | ||
}); | ||
it('should stop when unknown char appears', () => { | ||
expect(parsePathData('M0 10 , L 20 #40')).to.deep.equal([ | ||
{ command: 'M', args: [0, 10] }, | ||
]); | ||
}); | ||
it('should stop when not enough arguments', () => { | ||
expect(parsePathData('M0 10 L 20 L 30 40')).to.deep.equal([ | ||
{ command: 'M', args: [0, 10] }, | ||
]); | ||
}); | ||
it('should stop if moveto not the first command', () => { | ||
expect(parsePathData('L 10 20')).to.deep.equal([]); | ||
expect(parsePathData('10 20')).to.deep.equal([]); | ||
}); | ||
it('should stop on invalid numbers', () => { | ||
expect(parsePathData('M ...')).to.deep.equal([]); | ||
}); | ||
it('should handle arcs', () => { | ||
expect( | ||
parsePathData( | ||
` | ||
M600,350 | ||
l 50,-25 | ||
a25,25 -30 0,1 50,-25 | ||
25,50 -30 0,1 50,-25 | ||
25,75 -30 0,1 50,-25 | ||
a25,100 -30 0,1 50,-25 | ||
l 50,-25 | ||
` | ||
) | ||
).to.deep.equal([ | ||
{ command: 'M', args: [600, 350] }, | ||
{ command: 'l', args: [50, -25] }, | ||
{ command: 'a', args: [25, 25, -30, 0, 1, 50, -25] }, | ||
{ command: 'a', args: [25, 50, -30, 0, 1, 50, -25] }, | ||
{ command: 'a', args: [25, 75, -30, 0, 1, 50, -25] }, | ||
{ command: 'a', args: [25, 100, -30, 0, 1, 50, -25] }, | ||
{ command: 'l', args: [50, -25] }, | ||
]); | ||
}); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.