Skip to content

Commit

Permalink
Remove inlining logic in AST (de-)serializer (#5455)
Browse files Browse the repository at this point in the history
* Do not inline attributes to reduce complexity

* If the reference of a list is 0, treat it as an empty list

* Extend zero logic to stateful lists

* Inline reference position update into string conversions

* Unify how flags are added

* Lint imports
  • Loading branch information
lukastaegert committed Apr 7, 2024
1 parent 1b85663 commit edc1504
Show file tree
Hide file tree
Showing 14 changed files with 1,015 additions and 884 deletions.
622 changes: 373 additions & 249 deletions rust/parse_ast/src/convert_ast/converter.rs

Large diffs are not rendered by default.

388 changes: 223 additions & 165 deletions rust/parse_ast/src/convert_ast/converter/ast_constants.rs

Large diffs are not rendered by default.

15 changes: 10 additions & 5 deletions rust/parse_ast/src/error_emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@ use parking_lot::Mutex;
use swc_common::errors::{DiagnosticBuilder, Emitter, Handler, Level, HANDLER};
use swc_ecma_ast::Program;

use crate::convert_ast::converter::ast_constants::PARSE_ERROR_MESSAGE_OFFSET;
use crate::convert_ast::converter::{
ast_constants::{PARSE_ERROR_RESERVED_BYTES, TYPE_PARSE_ERROR_INLINED_MESSAGE},
convert_string,
ast_constants::{PARSE_ERROR_RESERVED_BYTES, TYPE_PARSE_ERROR},
convert_string, update_reference_position,
};

#[derive(Clone, Default)]
Expand Down Expand Up @@ -68,22 +69,26 @@ where
}

fn create_error_buffer(wr: &Writer, code: &str) -> Vec<u8> {
let mut buffer = TYPE_PARSE_ERROR_INLINED_MESSAGE.to_vec();
let mut lock = wr.0.lock();
let error_buffer = take(&mut *lock);
let pos = u32::from_ne_bytes(error_buffer[0..4].try_into().unwrap());
let mut utf_16_pos: u32 = 0;
// convert utf-8 to utf-16 inline
for (utf_8_pos, char) in code.char_indices() {
if (utf_8_pos as u32) == pos {
break;
}
utf_16_pos += char.len_utf16() as u32;
}
// type
let mut buffer = TYPE_PARSE_ERROR.to_vec();
// start
buffer.extend_from_slice(&utf_16_pos.to_ne_bytes());
// end
buffer.resize(buffer.len() + PARSE_ERROR_RESERVED_BYTES, 0);
// message
let end_position = buffer.len();
buffer.resize(end_position + PARSE_ERROR_RESERVED_BYTES, 0);
// message, the string is already converted to a buffer via convert_string
update_reference_position(&mut buffer, end_position + PARSE_ERROR_MESSAGE_OFFSET);
buffer.extend_from_slice(&error_buffer[4..]);
buffer
}
20 changes: 12 additions & 8 deletions rust/parse_ast/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
use std::panic::{catch_unwind, AssertUnwindSafe};

use convert_ast::converter::ast_constants::{
PANIC_ERROR_RESERVED_BYTES, TYPE_PANIC_ERROR_INLINED_MESSAGE,
};
use convert_ast::converter::{convert_string, AstConverter};
use swc_common::sync::Lrc;
use swc_common::{FileName, FilePathMapping, Globals, SourceMap, GLOBALS};
use swc_compiler_base::parse_js;
use swc_compiler_base::IsModule;
use swc_ecma_ast::EsVersion;
use swc_ecma_parser::{EsConfig, Syntax};

use convert_ast::converter::ast_constants::{PANIC_ERROR_RESERVED_BYTES, TYPE_PANIC_ERROR};
use convert_ast::converter::{convert_string, AstConverter};
use error_emit::try_with_handler;

use crate::convert_ast::annotations::SequentialComments;
use crate::convert_ast::converter::ast_constants::PANIC_ERROR_MESSAGE_OFFSET;
use crate::convert_ast::converter::update_reference_position;

mod convert_ast;

use error_emit::try_with_handler;

mod error_emit;

pub fn parse_ast(code: String, allow_return_outside_function: bool) -> Vec<u8> {
Expand Down Expand Up @@ -63,9 +63,13 @@ pub fn parse_ast(code: String, allow_return_outside_function: bool) -> Vec<u8> {
} else {
"Unknown rust panic message"
};
let mut buffer = TYPE_PANIC_ERROR_INLINED_MESSAGE.to_vec();
// type
let mut buffer = TYPE_PANIC_ERROR.to_vec();
// reserve for start and end even though they are unused
buffer.resize(buffer.len() + 4 + PANIC_ERROR_RESERVED_BYTES, 0);
let end_position = buffer.len() + 4;
buffer.resize(end_position + PANIC_ERROR_RESERVED_BYTES, 0);
// message
update_reference_position(&mut buffer, end_position + PANIC_ERROR_MESSAGE_OFFSET);
convert_string(&mut buffer, msg);
buffer
})
Expand Down
84 changes: 23 additions & 61 deletions scripts/ast-types.js
Original file line number Diff line number Diff line change
Expand Up @@ -158,19 +158,22 @@ export const AST_NODES = {
ClassBody: {
fields: [['body', 'NodeList']],
scriptedFields: {
body: `const length = buffer[$position];
body: ` const bodyPosition = $position;
const body: (MethodDefinition | PropertyDefinition)[] = (node.body = []);
for (let index = 0; index < length; index++) {
const nodePosition = buffer[$position + 1 + index];
body.push(
convertNode(
node,
(buffer[nodePosition + 3] & 1) === 0 ? scope.instanceScope : scope,
nodePosition,
buffer,
readString
)
);
if (bodyPosition) {
const length = buffer[bodyPosition];
for (let index = 0; index < length; index++) {
const nodePosition = buffer[bodyPosition + 1 + index];
body.push(
convertNode(
node,
(buffer[nodePosition + 3] & 1) === 0 ? scope.instanceScope : scope,
nodePosition,
buffer,
readString
)
);
}
}`
}
},
Expand Down Expand Up @@ -638,56 +641,15 @@ export const AST_NODES = {
}
};

export const astNodeNamesWithFieldOrder = Object.entries(AST_NODES).map(([name, node]) => {
/** @type {FieldWithType[]} */
const fields =
(node.hasSameFieldsAs ? AST_NODES[node.hasSameFieldsAs].fields : node.fields) || [];
/** @type {FieldWithType[]} */
const allFields = [];
/** @type {FieldWithType[]} */
const reservedFields = [];
/** @type {FieldWithType|null|undefined} */
let inlinedVariableField = undefined;
for (const field of fields) {
allFields.push(field);
switch (field[1]) {
case 'Annotations':
case 'InvalidAnnotations':
case 'String':
case 'NodeList':
case 'Node': {
if (inlinedVariableField === undefined) {
inlinedVariableField = field;
} else {
reservedFields.push(field);
}
break;
}
case 'OptionalNode': {
// Optional nodes cannot be inlined, but they also cannot be parsed
// out-of-order, so nothing is inlined as the inlined node is always
// parsed first.
if (inlinedVariableField === undefined) {
inlinedVariableField = null;
}
reservedFields.push(field);
break;
}
case 'OptionalString':
case 'FixedString':
case 'Float': {
reservedFields.push(field);
break;
}
default: {
throw new Error(`Unknown field type ${field[0]}`);
}
}
}
/** @type { {name: string; fields: FieldWithType[]; node: NodeDescription; originalNode: NodeDescription;}[] } */
export const astNodeNamesWithFieldOrder = Object.entries(AST_NODES).map(([name, originalNode]) => {
const node = originalNode.hasSameFieldsAs
? AST_NODES[originalNode.hasSameFieldsAs]
: originalNode;
return {
allFields,
inlinedVariableField,
fields: node.fields || [],
name,
reservedFields
node,
originalNode
};
});
146 changes: 68 additions & 78 deletions scripts/generate-buffer-parsers.js
Original file line number Diff line number Diff line change
@@ -1,96 +1,84 @@
import { writeFile } from 'node:fs/promises';
import { AST_NODES, astNodeNamesWithFieldOrder } from './ast-types.js';
import { getNode } from './generate-buffer-to-ast.js';
import { astNodeNamesWithFieldOrder } from './ast-types.js';
import { firstLetterLowercase, lintTsFile } from './helpers.js';

const bufferParsersFile = new URL('../src/ast/bufferParsers.ts', import.meta.url);

const nodeTypes = astNodeNamesWithFieldOrder.map(({ name }) => getNode(name).astType || name);
const nodeTypes = astNodeNamesWithFieldOrder.map(({ name, node }) => node.astType || name);

const nodeTypeImports = nodeTypes.map(name => `import ${name} from './nodes/${name}';`);
const nodeTypeStrings = nodeTypes.map(name => `\t'${name}'`);

const jsConverters = astNodeNamesWithFieldOrder.map(
({ name, inlinedVariableField, reservedFields, allFields }) => {
const node = getNode(name);
const readStringArgument = allFields.some(([, fieldType]) =>
['Node', 'OptionalNode', 'NodeList', 'String', 'FixedString', 'OptionalString'].includes(
fieldType
)
const jsConverters = astNodeNamesWithFieldOrder.map(({ name, fields, node, originalNode }) => {
const readStringArgument = fields.some(([, fieldType]) =>
['Node', 'OptionalNode', 'NodeList', 'String', 'FixedString', 'OptionalString'].includes(
fieldType
)
? ', readString'
: '';
/** @type {string[]} */
const definitions = [];
let offset = 0;
let needsBuffer = false;
let needsScope = false;
if (node.flags) {
offset++;
needsBuffer = true;
definitions.push(
'const flags = buffer[position];\n',
...node.flags.map((flagName, index) => {
let assignmentLeftHand = node.baseForAdditionalFields?.includes(flagName)
? `const ${flagName} = `
: '';
if (!node.hiddenFields?.includes(flagName)) {
assignmentLeftHand += `node.${flagName} = `;
}
return `${assignmentLeftHand}(flags & ${1 << index}) === ${1 << index};`;
})
);
}
for (const [index, field] of reservedFields.entries()) {
const fieldDefinition = getFieldDefinition(field, name, offset + index, false);
needsBuffer = true;
needsScope ||= fieldDefinition.needsScope;
definitions.push(`${fieldDefinition.definition}\n`);
}
offset += reservedFields.length;
if (inlinedVariableField) {
const fieldDefinition = getFieldDefinition(inlinedVariableField, name, offset, true);
needsBuffer = true;
needsScope ||= fieldDefinition.needsScope;
definitions.push(`${fieldDefinition.definition}\n`);
}
for (const [fieldName, fieldValue] of Object.entries(node.additionalFields || {})) {
definitions.push(`node.${fieldName} = ${fieldValue};\n`);
}
for (const [fieldName, fallbackName] of Object.entries(node.optionalFallback || {})) {
needsScope = true;
definitions.push(
`node.${fieldName} = ${fieldName}Position === 0 ? node.${fallbackName} : convertNode(node, scope, ${fieldName}Position, buffer, readString);\n`
);
}
if (needsScope) {
definitions.unshift('const {scope} = node;');
}
/** @type {string[]} */
const parameters = [];
if (definitions.length > 0) {
parameters.push(`node: ${node.astType || name}`);
if (needsBuffer) {
parameters.push(`position, buffer${readStringArgument}`);
}
)
? ', readString'
: '';
/** @type {string[]} */
const definitions = [];
let offset = 0;
let needsBuffer = false;
let needsScope = false;
if (node.flags) {
offset++;
needsBuffer = true;
definitions.push(
'const flags = buffer[position];\n',
...node.flags.map((flagName, index) => {
let assignmentLeftHand = node.baseForAdditionalFields?.includes(flagName)
? `const ${flagName} = `
: '';
if (!node.hiddenFields?.includes(flagName)) {
assignmentLeftHand += `node.${flagName} = `;
}
return `${assignmentLeftHand}(flags & ${1 << index}) === ${1 << index};`;
})
);
}
for (const [index, field] of fields.entries()) {
const fieldDefinition = getFieldDefinition(field, node, originalNode, offset + index);
needsBuffer = true;
needsScope ||= fieldDefinition.needsScope;
definitions.push(`${fieldDefinition.definition}\n`);
}
offset += fields.length;
for (const [fieldName, fieldValue] of Object.entries(node.additionalFields || {})) {
definitions.push(`node.${fieldName} = ${fieldValue};\n`);
}
for (const [fieldName, fallbackName] of Object.entries(node.optionalFallback || {})) {
needsScope = true;
definitions.push(
`node.${fieldName} = ${fieldName}Position === 0 ? node.${fallbackName} : convertNode(node, scope, ${fieldName}Position, buffer, readString);\n`
);
}
if (needsScope) {
definitions.unshift('const {scope} = node;');
}
/** @type {string[]} */
const parameters = [];
if (definitions.length > 0) {
parameters.push(`node: ${node.astType || name}`);
if (needsBuffer) {
parameters.push(`position, buffer${readStringArgument}`);
}
return `function ${firstLetterLowercase(name)} (${parameters.join(', ')}) {
${definitions.join('')}}`;
}
);
return `function ${firstLetterLowercase(name)} (${parameters.join(', ')}) {
${definitions.join('')}}`;
});

/**
* @param {import('./ast-types.js').FieldWithType} field
* @param {string} name
* @param {import("./ast-types.js").FieldWithType} field
* @param {import("./ast-types.js").NodeDescription} node
* @param {import("./ast-types.js").NodeDescription} originalNode
* @param {number} offset
* @param {boolean} isInlined
* @returns {{definition: string, needsScope: boolean}}
*/
function getFieldDefinition([fieldName, fieldType], name, offset, isInlined) {
const originalNode = AST_NODES[name];
const node = getNode(name);
function getFieldDefinition([fieldName, fieldType], node, originalNode, offset) {
const getPosition = offset > 0 ? `position + ${offset}` : 'position';
const dataStart = isInlined ? getPosition : `buffer[${getPosition}]`;
const dataStart = `buffer[${getPosition}]`;
if (node.scriptedFields?.[fieldName]) {
return {
definition: node.scriptedFields?.[fieldName].replace(/\$position/g, dataStart),
Expand All @@ -114,7 +102,7 @@ function getFieldDefinition([fieldName, fieldType], name, offset, isInlined) {
};
}
case 'OptionalNode': {
let definition = `const ${fieldName}Position = buffer[${getPosition}];`;
let definition = `const ${fieldName}Position = ${dataStart};`;
let needsScope = false;
if (!node.optionalFallback?.[fieldName]) {
needsScope = true;
Expand Down Expand Up @@ -158,13 +146,13 @@ function getFieldDefinition([fieldName, fieldType], name, offset, isInlined) {
}
case 'OptionalString': {
return {
definition: `const ${fieldName}Position = buffer[${getPosition}];\n${assignmentLeftHand}${fieldName}Position === 0 ? undefined : convertString(${fieldName}Position, buffer, readString)${typeCastString};`,
definition: `const ${fieldName}Position = ${dataStart};\n${assignmentLeftHand}${fieldName}Position === 0 ? undefined : convertString(${fieldName}Position, buffer, readString)${typeCastString};`,
needsScope: false
};
}
case 'FixedString': {
return {
definition: `${assignmentLeftHand}FIXED_STRINGS[buffer[${getPosition}]]${typeCastString};`,
definition: `${assignmentLeftHand}FIXED_STRINGS[${dataStart}]${typeCastString};`,
needsScope: false
};
}
Expand All @@ -186,6 +174,7 @@ const bufferParsers = `// This file is generated by scripts/generate-ast-convert
import type * as estree from 'estree';
import type { AstContext } from '../Module';
import { convertAnnotations, convertString } from '../utils/astConverterHelpers';
import { EMPTY_ARRAY } from '../utils/blank';
import { convertNode as convertJsonNode } from '../utils/bufferToAst';
import FIXED_STRINGS from '../utils/convert-ast-strings';
import type { ReadString } from '../utils/getReadStringFunction';
Expand Down Expand Up @@ -242,6 +231,7 @@ function convertNode(parent: Node | { context: AstContext; type: string }, paren
}
function convertNodeList(parent: Node | { context: AstContext; type: string }, parentScope: ChildScope, position: number, buffer: Uint32Array, readString: ReadString): any[] {
if (position === 0) return EMPTY_ARRAY as never[];
const length = buffer[position++];
const list: any[] = [];
for (let index = 0; index < length; index++) {
Expand Down

0 comments on commit edc1504

Please sign in to comment.