Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(tokenizer): Re-use object for current location #397

Closed
wants to merge 2 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
56 changes: 42 additions & 14 deletions packages/parse5/lib/tokenizer/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -231,16 +231,21 @@ export class Tokenizer {
this.addLocationInfo = !!options.sourceCodeLocationInfo;
this.onParseError = options.onParseError ?? null;
this.preprocessor = new Preprocessor(options);
this.ctLoc = this.getCurrentLocation();
}

//Errors
private _err(code: ERR): void {
this.onParseError?.(this.preprocessor.getError(code));
}

// Locations
private currentAttrLocation: Location | null = null;
private ctLoc: Location | null = null;
private _getCurrentLocation(): Location | null {

/** The start location of the current section. Object is updated; use `getStartLocation` to get a copy. */
private readonly ctLoc: Location | null;

private getCurrentLocation(): Location | null {
if (!this.addLocationInfo) {
return null;
}
Expand All @@ -255,6 +260,29 @@ export class Tokenizer {
};
}

private updateStartLocation(): void {
if (this.ctLoc !== null) {
this.ctLoc.startLine = this.preprocessor.line;
this.ctLoc.startCol = this.preprocessor.col;
this.ctLoc.startOffset = this.preprocessor.offset;
}
}

private getStartLocation(): Location | null {
if (this.ctLoc === null) {
return null;
}

return {
startLine: this.ctLoc.startLine,
startCol: this.ctLoc.startCol,
startOffset: this.ctLoc.startOffset,
endLine: -1,
endCol: -1,
endOffset: -1,
};
}

//API
public getNextToken(): Token {
while (this.tokenQueue.length === 0 && this.active) {
Expand Down Expand Up @@ -334,7 +362,7 @@ export class Tokenizer {
selfClosing: false,
ackSelfClosing: false,
attrs: [],
location: this.ctLoc,
location: this.getStartLocation(),
};
}

Expand All @@ -346,15 +374,15 @@ export class Tokenizer {
selfClosing: false,
ackSelfClosing: false,
attrs: [],
location: this.ctLoc,
location: this.getStartLocation(),
};
}

private _createCommentToken(): void {
this.currentToken = {
type: TokenType.COMMENT,
data: '',
location: this.ctLoc,
location: this.getStartLocation(),
};
}

Expand All @@ -365,20 +393,20 @@ export class Tokenizer {
forceQuirks: false,
publicId: null,
systemId: null,
location: this.ctLoc,
location: this.getStartLocation(),
};
}

private _createCharacterToken(type: CharacterToken['type'], chars: string): void {
this.currentCharacterToken = {
type,
chars,
location: this.ctLoc,
location: this.getStartLocation(),
};
}

private _createEOFToken(): void {
const ctLoc = this._getCurrentLocation();
const ctLoc = this.getCurrentLocation();

if (ctLoc) {
ctLoc.endLine = ctLoc.startLine;
Expand All @@ -395,7 +423,7 @@ export class Tokenizer {
name: attrNameFirstCh,
value: '',
};
this.currentAttrLocation = this._getCurrentLocation();
this.currentAttrLocation = this.getCurrentLocation();
}

private _leaveAttrName(): void {
Expand Down Expand Up @@ -925,7 +953,7 @@ export class Tokenizer {
//------------------------------------------------------------------
private _stateData(cp: number): void {
this.preprocessor.dropParsedChunk();
this.ctLoc = this._getCurrentLocation();
this.updateStartLocation();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

feels a bit weird having to do this book-keeping — its existence seems to indicate that the data might be out of date and before you access it you’d have to call this function?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I played around with this some more. Turns out we can actually get rid of location updates in the *DATA states altogether: #402


switch (cp) {
case $.LESS_THAN_SIGN: {
Expand Down Expand Up @@ -956,7 +984,7 @@ export class Tokenizer {
//------------------------------------------------------------------
private _stateRcdata(cp: number): void {
this.preprocessor.dropParsedChunk();
this.ctLoc = this._getCurrentLocation();
this.updateStartLocation();

switch (cp) {
case $.AMPERSAND: {
Expand Down Expand Up @@ -987,7 +1015,7 @@ export class Tokenizer {
//------------------------------------------------------------------
private _stateRawtext(cp: number): void {
this.preprocessor.dropParsedChunk();
this.ctLoc = this._getCurrentLocation();
this.updateStartLocation();

switch (cp) {
case $.LESS_THAN_SIGN: {
Expand All @@ -1013,7 +1041,7 @@ export class Tokenizer {
//------------------------------------------------------------------
private _stateScriptData(cp: number): void {
this.preprocessor.dropParsedChunk();
this.ctLoc = this._getCurrentLocation();
this.updateStartLocation();

switch (cp) {
case $.LESS_THAN_SIGN: {
Expand All @@ -1039,7 +1067,7 @@ export class Tokenizer {
//------------------------------------------------------------------
private _statePlaintext(cp: number): void {
this.preprocessor.dropParsedChunk();
this.ctLoc = this._getCurrentLocation();
this.updateStartLocation();

switch (cp) {
case $.NULL: {
Expand Down