From 4086e313346ce918dac5d82dc18c9176edc7def5 Mon Sep 17 00:00:00 2001 From: Bogdan Chadkin Date: Sun, 28 Feb 2021 22:10:57 +0300 Subject: [PATCH] Implement new path parser (#1378) Regexps are quite leaky for complex parsing. Regression tests caught a few issues related to path parser. In this diff I implemented the new spec-compliant parser which solves 2 regression cases and covers many edge cases hard to handle with regexps. --- lib/path.js | 210 +++++++++++++++++++++++++++++++++++++++++++++++ lib/path.test.js | 76 +++++++++++++++++ package.json | 5 +- plugins/_path.js | 99 ++++------------------ 4 files changed, 307 insertions(+), 83 deletions(-) create mode 100644 lib/path.js create mode 100644 lib/path.test.js diff --git a/lib/path.js b/lib/path.js new file mode 100644 index 000000000..9b79550b0 --- /dev/null +++ b/lib/path.js @@ -0,0 +1,210 @@ +'use strict'; + +// Based on https://www.w3.org/TR/SVG11/paths.html#PathDataBNF + +const argsCountPerCommand = { + M: 2, + m: 2, + Z: 0, + z: 0, + L: 2, + l: 2, + H: 1, + h: 1, + V: 1, + v: 1, + C: 6, + c: 6, + S: 4, + s: 4, + Q: 4, + q: 4, + T: 2, + t: 2, + A: 7, + a: 7, +}; + +const isCommand = (c) => { + return c in argsCountPerCommand; +}; + +const isWsp = (c) => { + const codePoint = c.codePointAt(0); + return ( + codePoint === 0x20 || + codePoint === 0x9 || + codePoint === 0xd || + codePoint === 0xa + ); +}; + +const isDigit = (c) => { + const codePoint = c.codePointAt(0); + return 48 <= codePoint && codePoint <= 57; +}; + +const readNumber = (string, cursor) => { + let i = cursor; + let value = ''; + // none | sign | whole | decimal_point | decimal | e | exponent_sign | exponent + let state = 'none'; + for (; i < string.length; i += 1) { + const c = string[i]; + if (c === '+' || c === '-') { + if (state === 'none') { + state = 'sign'; + value += c; + continue; + } + if (state === 'e') { + state === 'exponent_sign'; + value += c; + continue; + } + } + if (isDigit(c)) { + if (state === 'none' || state === 'sign' || state === 'whole') { + state = 'whole'; + value += c; + continue; + } + if (state === 'decimal_point' || state === 'decimal') { + state = 'decimal'; + value += c; + continue; + } + if (state === 'e' || state === 'exponent_sign' || state === 'exponent') { + state = 'exponent'; + value += c; + continue; + } + } + if (c === '.') { + if (state === 'none' || state === 'sign' || state === 'whole') { + state = 'decimal_point'; + value += c; + continue; + } + } + if (c === 'E' || c == 'e') { + if ( + state === 'whole' || + state === 'decimal_point' || + state === 'decimal' + ) { + state = 'e'; + value += c; + continue; + } + } + break; + } + const number = Number.parseFloat(value); + if (Number.isNaN(number)) { + return [cursor, null]; + } else { + // step back to delegate iteration to parent loop + return [i - 1, number]; + } +}; + +const parsePathData = (string) => { + const pathData = []; + let i = 0; + let command = null; + let args; + let argsCount; + let canHaveComma = false; + let hadComma = false; + for (; i < string.length; i += 1) { + const c = string.charAt(i); + if (isWsp(c)) { + continue; + } + // allow comma only between arguments + if (canHaveComma && c === ',') { + if (hadComma) { + break; + } + hadComma = true; + continue; + } + if (isCommand(c)) { + if (hadComma) { + return pathData; + } + if (command == null) { + // moveto should be leading command + if (c !== 'M' && c !== 'm') { + return pathData; + } + } else { + // stop if previous command arguments are not flushed + if (args.length !== 0) { + return pathData; + } + } + command = c; + args = []; + argsCount = argsCountPerCommand[command]; + canHaveComma = false; + // flush command without arguments + if (argsCount === 0) { + pathData.push({ command, args }); + } + continue; + } + // avoid parsing arguments if no command detected + if (command == null) { + return pathData; + } + // read next argument + let newCursor = i; + let number = null; + if (command === 'A' || command === 'a') { + const position = args.length; + if (position === 0 || position === 1) { + // allow only positive number without sign as first two arguments + if (c !== '+' && c !== '-') { + [newCursor, number] = readNumber(string, i); + } + } + if (position === 2 || position === 5 || position === 6) { + [newCursor, number] = readNumber(string, i); + } + if (position === 3 || position === 4) { + // read flags + if (c === '0') { + number = 0; + } + if (c === '1') { + number = 1; + } + } + } else { + [newCursor, number] = readNumber(string, i); + } + if (number == null) { + return pathData; + } + args.push(number); + canHaveComma = true; + hadComma = false; + i = newCursor; + // flush arguments when necessary count is reached + if (args.length === argsCount) { + pathData.push({ command, args }); + // subsequent moveto coordinates are threated as implicit lineto commands + if (command === 'M') { + command = 'L'; + } + if (command === 'm') { + command = 'l'; + } + args = []; + } + } + return pathData; +}; +exports.parsePathData = parsePathData; diff --git a/lib/path.test.js b/lib/path.test.js new file mode 100644 index 000000000..bcb1dbd48 --- /dev/null +++ b/lib/path.test.js @@ -0,0 +1,76 @@ +'use strict'; + +const { expect } = require('chai'); +const { parsePathData } = require('./path.js'); + +describe('parse path data', () => { + it('should allow spaces between commands', () => { + expect(parsePathData('M0 10 L \n\r\t20 30')).to.deep.equal([ + { command: 'M', args: [0, 10] }, + { command: 'L', args: [20, 30] }, + ]); + }); + it('should allow spaces and commas between arguments', () => { + expect(parsePathData('M0 , 10 L 20 \n\r\t30,40,50')).to.deep.equal([ + { command: 'M', args: [0, 10] }, + { command: 'L', args: [20, 30] }, + { command: 'L', args: [40, 50] }, + ]); + }); + it('should forbid commas before commands', () => { + expect(parsePathData(', M0 10')).to.deep.equal([]); + }); + it('should forbid commas between commands', () => { + expect(parsePathData('M0,10 , L 20,30')).to.deep.equal([ + { command: 'M', args: [0, 10] }, + ]); + }); + it('should forbid commas between command name and argument', () => { + expect(parsePathData('M0,10 L,20,30')).to.deep.equal([ + { command: 'M', args: [0, 10] }, + ]); + }); + it('should forbid multipe commas in a row', () => { + expect(parsePathData('M0 , , 10')).to.deep.equal([]); + }); + it('should stop when unknown char appears', () => { + expect(parsePathData('M0 10 , L 20 #40')).to.deep.equal([ + { command: 'M', args: [0, 10] }, + ]); + }); + it('should stop when not enough arguments', () => { + expect(parsePathData('M0 10 L 20 L 30 40')).to.deep.equal([ + { command: 'M', args: [0, 10] }, + ]); + }); + it('should stop if moveto not the first command', () => { + expect(parsePathData('L 10 20')).to.deep.equal([]); + expect(parsePathData('10 20')).to.deep.equal([]); + }); + it('should stop on invalid numbers', () => { + expect(parsePathData('M ...')).to.deep.equal([]); + }); + it('should handle arcs', () => { + expect( + parsePathData( + ` + M600,350 + l 50,-25 + a25,25 -30 0,1 50,-25 + 25,50 -30 0,1 50,-25 + 25,75 -30 0,1 50,-25 + a25,100 -30 0,1 50,-25 + l 50,-25 + ` + ) + ).to.deep.equal([ + { command: 'M', args: [600, 350] }, + { command: 'l', args: [50, -25] }, + { command: 'a', args: [25, 25, -30, 0, 1, 50, -25] }, + { command: 'a', args: [25, 50, -30, 0, 1, 50, -25] }, + { command: 'a', args: [25, 75, -30, 0, 1, 50, -25] }, + { command: 'a', args: [25, 100, -30, 0, 1, 50, -25] }, + { command: 'l', args: [50, -25] }, + ]); + }); +}); diff --git a/package.json b/package.json index 4d19409b1..2512b6ce0 100644 --- a/package.json +++ b/package.json @@ -49,7 +49,7 @@ "dist" ], "scripts": { - "test": "c8 --reporter=html --reporter=text mocha \"test/*/_index.js\"", + "test": "c8 --reporter=html --reporter=text mocha \"test/*/_index.js\" \"**/*.test.js\" --ignore=\"node_modules/**\"", "lint": "eslint .", "test-browser": "rollup -c && node ./test/browser.js", "prepublishOnly": "rm -rf dist && rollup -c" @@ -83,7 +83,8 @@ }, { "files": [ - "test/**/*.js" + "test/**/*.js", + "**/*.test.js" ], "env": { "mocha": true diff --git a/plugins/_path.js b/plugins/_path.js index 01c9fa7f7..00ecf5a4e 100644 --- a/plugins/_path.js +++ b/plugins/_path.js @@ -1,16 +1,8 @@ 'use strict'; -var rNumber = String.raw`[-+]?(?:\d*\.\d+|\d+\.?)(?:[eE][-+]?\d+)?\s*`, - rCommaWsp = String.raw`(?:\s,?\s*|,\s*)`, - rNumberCommaWsp = `(${rNumber})` + rCommaWsp, - rFlagCommaWsp = `([01])${rCommaWsp}?`, - rCoordinatePair = String.raw`(${rNumber})${rCommaWsp}?(${rNumber})`, - rArcSeq = (rNumberCommaWsp + '?').repeat(2) + rNumberCommaWsp + rFlagCommaWsp.repeat(2) + rCoordinatePair; - -var regPathInstructions = /([MmLlHhVvCcSsQqTtAaZz])\s*/, - regCoordinateSequence = new RegExp(rNumber, 'g'), - regArcArgumentSequence = new RegExp(rArcSeq, 'g'), - regNumericValues = /[-+]?(\d*\.\d+|\d+\.?)(?:[eE][-+]?\d+)?/g, +const { parsePathData } = require('../lib/path.js'); + +var regNumericValues = /[-+]?(\d*\.\d+|\d+\.?)(?:[eE][-+]?\d+)?/g, transform2js = require('./_transforms').transform2js, transformsMultiply = require('./_transforms').transformsMultiply, transformArc = require('./_transforms').transformArc, @@ -29,77 +21,22 @@ var regPathInstructions = /([MmLlHhVvCcSsQqTtAaZz])\s*/, * @return {Array} output array */ exports.path2js = function(path) { - if (path.pathJS) return path.pathJS; - - var paramsLength = { // Number of parameters of every path command - H: 1, V: 1, M: 2, L: 2, T: 2, Q: 4, S: 4, C: 6, A: 7, - h: 1, v: 1, m: 2, l: 2, t: 2, q: 4, s: 4, c: 6, a: 7 - }, - pathData = [], // JS representation of the path data - instruction, // current instruction context - startMoveto = false; - - // splitting path string into array like ['M', '10 50', 'L', '20 30'] - path.attr('d').value.split(regPathInstructions).forEach(function(data) { - if (!data) return; - if (!startMoveto) { - if (data == 'M' || data == 'm') { - startMoveto = true; - } else return; - } - - // instruction item - if (regPathInstructions.test(data)) { - instruction = data; - - // z - instruction w/o data - if (instruction == 'Z' || instruction == 'z') { - pathData.push({ - instruction: 'z' - }); - } - // data item - } else { - if (instruction == 'A' || instruction == 'a') { - var newData = []; - for (var args; (args = regArcArgumentSequence.exec(data));) { - for (var i = 1; i < args.length; i++) { - newData.push(args[i]); - } - } - data = newData; - } else { - data = data.match(regCoordinateSequence); - } - if (!data) return; - - data = data.map(Number); - // Subsequent moveto pairs of coordinates are threated as implicit lineto commands - // https://www.w3.org/TR/SVG11/paths.html#PathDataMovetoCommands - if (instruction == 'M' || instruction == 'm') { - pathData.push({ - instruction: pathData.length == 0 ? 'M' : instruction, - data: data.splice(0, 2) - }); - instruction = instruction == 'M' ? 'L' : 'l'; - } - - for (var pair = paramsLength[instruction]; data.length;) { - pathData.push({ - instruction: instruction, - data: data.splice(0, pair) - }); - } - } - }); - - // First moveto is actually absolute. Subsequent coordinates were separated above. - if (pathData.length && pathData[0].instruction == 'm') { - pathData[0].instruction = 'M'; + if (path.pathJS) return path.pathJS; + const pathData = []; // JS representation of the path data + const newPathData = parsePathData(path.attr('d').value); + for (const { command, args } of newPathData) { + if (command === 'Z' || command === 'z') { + pathData.push({ instruction: 'z' }); + } else { + pathData.push({ instruction: command, data: args }); } - path.pathJS = pathData; - - return pathData; + } + // First moveto is actually absolute. Subsequent coordinates were separated above. + if (pathData.length && pathData[0].instruction == 'm') { + pathData[0].instruction = 'M'; + } + path.pathJS = pathData; + return pathData; }; /**