NEW : REBASE THE ENTIRE WORKING PROJECT
This commit is contained in:
885
node_modules/chevrotain/lib_esm/src/scan/lexer.js
generated
vendored
Normal file
885
node_modules/chevrotain/lib_esm/src/scan/lexer.js
generated
vendored
Normal file
@@ -0,0 +1,885 @@
|
||||
var __extends = (this && this.__extends) || (function () {
|
||||
var extendStatics = function (d, b) {
|
||||
extendStatics = Object.setPrototypeOf ||
|
||||
({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) ||
|
||||
function (d, b) { for (var p in b) if (b.hasOwnProperty(p)) d[p] = b[p]; };
|
||||
return extendStatics(d, b);
|
||||
};
|
||||
return function (d, b) {
|
||||
extendStatics(d, b);
|
||||
function __() { this.constructor = d; }
|
||||
d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __());
|
||||
};
|
||||
})();
|
||||
import { BaseRegExpVisitor } from "regexp-to-ast";
|
||||
import { Lexer, LexerDefinitionErrorType } from "./lexer_public";
|
||||
import { compact, contains, defaults, difference, filter, find, first, flatten, forEach, has, indexOf, isArray, isEmpty, isFunction, isRegExp, isString, isUndefined, keys, map, mapValues, packArray, PRINT_ERROR, reduce, reject } from "../utils/utils";
|
||||
import { canMatchCharCode, failedOptimizationPrefixMsg, getOptimizedStartCodesIndices } from "./reg_exp";
|
||||
import { getRegExpAst } from "./reg_exp_parser";
|
||||
var PATTERN = "PATTERN";
|
||||
export var DEFAULT_MODE = "defaultMode";
|
||||
export var MODES = "modes";
|
||||
export var SUPPORT_STICKY = typeof new RegExp("(?:)").sticky === "boolean";
|
||||
export function disableSticky() {
|
||||
SUPPORT_STICKY = false;
|
||||
}
|
||||
export function enableSticky() {
|
||||
SUPPORT_STICKY = true;
|
||||
}
|
||||
export function analyzeTokenTypes(tokenTypes, options) {
|
||||
options = defaults(options, {
|
||||
useSticky: SUPPORT_STICKY,
|
||||
debug: false,
|
||||
safeMode: false,
|
||||
positionTracking: "full",
|
||||
lineTerminatorCharacters: ["\r", "\n"],
|
||||
tracer: function (msg, action) { return action(); }
|
||||
});
|
||||
var tracer = options.tracer;
|
||||
tracer("initCharCodeToOptimizedIndexMap", function () {
|
||||
initCharCodeToOptimizedIndexMap();
|
||||
});
|
||||
var onlyRelevantTypes;
|
||||
tracer("Reject Lexer.NA", function () {
|
||||
onlyRelevantTypes = reject(tokenTypes, function (currType) {
|
||||
return currType[PATTERN] === Lexer.NA;
|
||||
});
|
||||
});
|
||||
var hasCustom = false;
|
||||
var allTransformedPatterns;
|
||||
tracer("Transform Patterns", function () {
|
||||
hasCustom = false;
|
||||
allTransformedPatterns = map(onlyRelevantTypes, function (currType) {
|
||||
var currPattern = currType[PATTERN];
|
||||
/* istanbul ignore else */
|
||||
if (isRegExp(currPattern)) {
|
||||
var regExpSource = currPattern.source;
|
||||
if (regExpSource.length === 1 &&
|
||||
// only these regExp meta characters which can appear in a length one regExp
|
||||
regExpSource !== "^" &&
|
||||
regExpSource !== "$" &&
|
||||
regExpSource !== ".") {
|
||||
return regExpSource;
|
||||
}
|
||||
else if (regExpSource.length === 2 &&
|
||||
regExpSource[0] === "\\" &&
|
||||
// not a meta character
|
||||
!contains([
|
||||
"d",
|
||||
"D",
|
||||
"s",
|
||||
"S",
|
||||
"t",
|
||||
"r",
|
||||
"n",
|
||||
"t",
|
||||
"0",
|
||||
"c",
|
||||
"b",
|
||||
"B",
|
||||
"f",
|
||||
"v",
|
||||
"w",
|
||||
"W"
|
||||
], regExpSource[1])) {
|
||||
// escaped meta Characters: /\+/ /\[/
|
||||
// or redundant escaping: /\a/
|
||||
// without the escaping "\"
|
||||
return regExpSource[1];
|
||||
}
|
||||
else {
|
||||
return options.useSticky
|
||||
? addStickyFlag(currPattern)
|
||||
: addStartOfInput(currPattern);
|
||||
}
|
||||
}
|
||||
else if (isFunction(currPattern)) {
|
||||
hasCustom = true;
|
||||
// CustomPatternMatcherFunc - custom patterns do not require any transformations, only wrapping in a RegExp Like object
|
||||
return { exec: currPattern };
|
||||
}
|
||||
else if (has(currPattern, "exec")) {
|
||||
hasCustom = true;
|
||||
// ICustomPattern
|
||||
return currPattern;
|
||||
}
|
||||
else if (typeof currPattern === "string") {
|
||||
if (currPattern.length === 1) {
|
||||
return currPattern;
|
||||
}
|
||||
else {
|
||||
var escapedRegExpString = currPattern.replace(/[\\^$.*+?()[\]{}|]/g, "\\$&");
|
||||
var wrappedRegExp = new RegExp(escapedRegExpString);
|
||||
return options.useSticky
|
||||
? addStickyFlag(wrappedRegExp)
|
||||
: addStartOfInput(wrappedRegExp);
|
||||
}
|
||||
}
|
||||
else {
|
||||
throw Error("non exhaustive match");
|
||||
}
|
||||
});
|
||||
});
|
||||
var patternIdxToType;
|
||||
var patternIdxToGroup;
|
||||
var patternIdxToLongerAltIdx;
|
||||
var patternIdxToPushMode;
|
||||
var patternIdxToPopMode;
|
||||
tracer("misc mapping", function () {
|
||||
patternIdxToType = map(onlyRelevantTypes, function (currType) { return currType.tokenTypeIdx; });
|
||||
patternIdxToGroup = map(onlyRelevantTypes, function (clazz) {
|
||||
var groupName = clazz.GROUP;
|
||||
/* istanbul ignore next */
|
||||
if (groupName === Lexer.SKIPPED) {
|
||||
return undefined;
|
||||
}
|
||||
else if (isString(groupName)) {
|
||||
return groupName;
|
||||
}
|
||||
else if (isUndefined(groupName)) {
|
||||
return false;
|
||||
}
|
||||
else {
|
||||
throw Error("non exhaustive match");
|
||||
}
|
||||
});
|
||||
patternIdxToLongerAltIdx = map(onlyRelevantTypes, function (clazz) {
|
||||
var longerAltType = clazz.LONGER_ALT;
|
||||
if (longerAltType) {
|
||||
var longerAltIdx = indexOf(onlyRelevantTypes, longerAltType);
|
||||
return longerAltIdx;
|
||||
}
|
||||
});
|
||||
patternIdxToPushMode = map(onlyRelevantTypes, function (clazz) { return clazz.PUSH_MODE; });
|
||||
patternIdxToPopMode = map(onlyRelevantTypes, function (clazz) {
|
||||
return has(clazz, "POP_MODE");
|
||||
});
|
||||
});
|
||||
var patternIdxToCanLineTerminator;
|
||||
tracer("Line Terminator Handling", function () {
|
||||
var lineTerminatorCharCodes = getCharCodes(options.lineTerminatorCharacters);
|
||||
patternIdxToCanLineTerminator = map(onlyRelevantTypes, function (tokType) { return false; });
|
||||
if (options.positionTracking !== "onlyOffset") {
|
||||
patternIdxToCanLineTerminator = map(onlyRelevantTypes, function (tokType) {
|
||||
if (has(tokType, "LINE_BREAKS")) {
|
||||
return tokType.LINE_BREAKS;
|
||||
}
|
||||
else {
|
||||
if (checkLineBreaksIssues(tokType, lineTerminatorCharCodes) === false) {
|
||||
return canMatchCharCode(lineTerminatorCharCodes, tokType.PATTERN);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
var patternIdxToIsCustom;
|
||||
var patternIdxToShort;
|
||||
var emptyGroups;
|
||||
var patternIdxToConfig;
|
||||
tracer("Misc Mapping #2", function () {
|
||||
patternIdxToIsCustom = map(onlyRelevantTypes, isCustomPattern);
|
||||
patternIdxToShort = map(allTransformedPatterns, isShortPattern);
|
||||
emptyGroups = reduce(onlyRelevantTypes, function (acc, clazz) {
|
||||
var groupName = clazz.GROUP;
|
||||
if (isString(groupName) && !(groupName === Lexer.SKIPPED)) {
|
||||
acc[groupName] = [];
|
||||
}
|
||||
return acc;
|
||||
}, {});
|
||||
patternIdxToConfig = map(allTransformedPatterns, function (x, idx) {
|
||||
return {
|
||||
pattern: allTransformedPatterns[idx],
|
||||
longerAlt: patternIdxToLongerAltIdx[idx],
|
||||
canLineTerminator: patternIdxToCanLineTerminator[idx],
|
||||
isCustom: patternIdxToIsCustom[idx],
|
||||
short: patternIdxToShort[idx],
|
||||
group: patternIdxToGroup[idx],
|
||||
push: patternIdxToPushMode[idx],
|
||||
pop: patternIdxToPopMode[idx],
|
||||
tokenTypeIdx: patternIdxToType[idx],
|
||||
tokenType: onlyRelevantTypes[idx]
|
||||
};
|
||||
});
|
||||
});
|
||||
var canBeOptimized = true;
|
||||
var charCodeToPatternIdxToConfig = [];
|
||||
if (!options.safeMode) {
|
||||
tracer("First Char Optimization", function () {
|
||||
charCodeToPatternIdxToConfig = reduce(onlyRelevantTypes, function (result, currTokType, idx) {
|
||||
if (typeof currTokType.PATTERN === "string") {
|
||||
var charCode = currTokType.PATTERN.charCodeAt(0);
|
||||
var optimizedIdx = charCodeToOptimizedIndex(charCode);
|
||||
addToMapOfArrays(result, optimizedIdx, patternIdxToConfig[idx]);
|
||||
}
|
||||
else if (isArray(currTokType.START_CHARS_HINT)) {
|
||||
var lastOptimizedIdx_1;
|
||||
forEach(currTokType.START_CHARS_HINT, function (charOrInt) {
|
||||
var charCode = typeof charOrInt === "string"
|
||||
? charOrInt.charCodeAt(0)
|
||||
: charOrInt;
|
||||
var currOptimizedIdx = charCodeToOptimizedIndex(charCode);
|
||||
// Avoid adding the config multiple times
|
||||
if (lastOptimizedIdx_1 !== currOptimizedIdx) {
|
||||
lastOptimizedIdx_1 = currOptimizedIdx;
|
||||
addToMapOfArrays(result, currOptimizedIdx, patternIdxToConfig[idx]);
|
||||
}
|
||||
});
|
||||
}
|
||||
else if (isRegExp(currTokType.PATTERN)) {
|
||||
if (currTokType.PATTERN.unicode) {
|
||||
canBeOptimized = false;
|
||||
if (options.ensureOptimizations) {
|
||||
PRINT_ERROR("" + failedOptimizationPrefixMsg +
|
||||
("\tUnable to analyze < " + currTokType.PATTERN.toString() + " > pattern.\n") +
|
||||
"\tThe regexp unicode flag is not currently supported by the regexp-to-ast library.\n" +
|
||||
"\tThis will disable the lexer's first char optimizations.\n" +
|
||||
"\tFor details See: https://sap.github.io/chevrotain/docs/guide/resolving_lexer_errors.html#UNICODE_OPTIMIZE");
|
||||
}
|
||||
}
|
||||
else {
|
||||
var optimizedCodes = getOptimizedStartCodesIndices(currTokType.PATTERN, options.ensureOptimizations);
|
||||
/* istanbul ignore if */
|
||||
// start code will only be empty given an empty regExp or failure of regexp-to-ast library
|
||||
// the first should be a different validation and the second cannot be tested.
|
||||
if (isEmpty(optimizedCodes)) {
|
||||
// we cannot understand what codes may start possible matches
|
||||
// The optimization correctness requires knowing start codes for ALL patterns.
|
||||
// Not actually sure this is an error, no debug message
|
||||
canBeOptimized = false;
|
||||
}
|
||||
forEach(optimizedCodes, function (code) {
|
||||
addToMapOfArrays(result, code, patternIdxToConfig[idx]);
|
||||
});
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (options.ensureOptimizations) {
|
||||
PRINT_ERROR("" + failedOptimizationPrefixMsg +
|
||||
("\tTokenType: <" + currTokType.name + "> is using a custom token pattern without providing <start_chars_hint> parameter.\n") +
|
||||
"\tThis will disable the lexer's first char optimizations.\n" +
|
||||
"\tFor details See: https://sap.github.io/chevrotain/docs/guide/resolving_lexer_errors.html#CUSTOM_OPTIMIZE");
|
||||
}
|
||||
canBeOptimized = false;
|
||||
}
|
||||
return result;
|
||||
}, []);
|
||||
});
|
||||
}
|
||||
tracer("ArrayPacking", function () {
|
||||
charCodeToPatternIdxToConfig = packArray(charCodeToPatternIdxToConfig);
|
||||
});
|
||||
return {
|
||||
emptyGroups: emptyGroups,
|
||||
patternIdxToConfig: patternIdxToConfig,
|
||||
charCodeToPatternIdxToConfig: charCodeToPatternIdxToConfig,
|
||||
hasCustom: hasCustom,
|
||||
canBeOptimized: canBeOptimized
|
||||
};
|
||||
}
|
||||
export function validatePatterns(tokenTypes, validModesNames) {
|
||||
var errors = [];
|
||||
var missingResult = findMissingPatterns(tokenTypes);
|
||||
errors = errors.concat(missingResult.errors);
|
||||
var invalidResult = findInvalidPatterns(missingResult.valid);
|
||||
var validTokenTypes = invalidResult.valid;
|
||||
errors = errors.concat(invalidResult.errors);
|
||||
errors = errors.concat(validateRegExpPattern(validTokenTypes));
|
||||
errors = errors.concat(findInvalidGroupType(validTokenTypes));
|
||||
errors = errors.concat(findModesThatDoNotExist(validTokenTypes, validModesNames));
|
||||
errors = errors.concat(findUnreachablePatterns(validTokenTypes));
|
||||
return errors;
|
||||
}
|
||||
function validateRegExpPattern(tokenTypes) {
|
||||
var errors = [];
|
||||
var withRegExpPatterns = filter(tokenTypes, function (currTokType) {
|
||||
return isRegExp(currTokType[PATTERN]);
|
||||
});
|
||||
errors = errors.concat(findEndOfInputAnchor(withRegExpPatterns));
|
||||
errors = errors.concat(findStartOfInputAnchor(withRegExpPatterns));
|
||||
errors = errors.concat(findUnsupportedFlags(withRegExpPatterns));
|
||||
errors = errors.concat(findDuplicatePatterns(withRegExpPatterns));
|
||||
errors = errors.concat(findEmptyMatchRegExps(withRegExpPatterns));
|
||||
return errors;
|
||||
}
|
||||
export function findMissingPatterns(tokenTypes) {
|
||||
var tokenTypesWithMissingPattern = filter(tokenTypes, function (currType) {
|
||||
return !has(currType, PATTERN);
|
||||
});
|
||||
var errors = map(tokenTypesWithMissingPattern, function (currType) {
|
||||
return {
|
||||
message: "Token Type: ->" +
|
||||
currType.name +
|
||||
"<- missing static 'PATTERN' property",
|
||||
type: LexerDefinitionErrorType.MISSING_PATTERN,
|
||||
tokenTypes: [currType]
|
||||
};
|
||||
});
|
||||
var valid = difference(tokenTypes, tokenTypesWithMissingPattern);
|
||||
return { errors: errors, valid: valid };
|
||||
}
|
||||
export function findInvalidPatterns(tokenTypes) {
|
||||
var tokenTypesWithInvalidPattern = filter(tokenTypes, function (currType) {
|
||||
var pattern = currType[PATTERN];
|
||||
return (!isRegExp(pattern) &&
|
||||
!isFunction(pattern) &&
|
||||
!has(pattern, "exec") &&
|
||||
!isString(pattern));
|
||||
});
|
||||
var errors = map(tokenTypesWithInvalidPattern, function (currType) {
|
||||
return {
|
||||
message: "Token Type: ->" +
|
||||
currType.name +
|
||||
"<- static 'PATTERN' can only be a RegExp, a" +
|
||||
" Function matching the {CustomPatternMatcherFunc} type or an Object matching the {ICustomPattern} interface.",
|
||||
type: LexerDefinitionErrorType.INVALID_PATTERN,
|
||||
tokenTypes: [currType]
|
||||
};
|
||||
});
|
||||
var valid = difference(tokenTypes, tokenTypesWithInvalidPattern);
|
||||
return { errors: errors, valid: valid };
|
||||
}
|
||||
var end_of_input = /[^\\][\$]/;
|
||||
export function findEndOfInputAnchor(tokenTypes) {
|
||||
var EndAnchorFinder = /** @class */ (function (_super) {
|
||||
__extends(EndAnchorFinder, _super);
|
||||
function EndAnchorFinder() {
|
||||
var _this = _super !== null && _super.apply(this, arguments) || this;
|
||||
_this.found = false;
|
||||
return _this;
|
||||
}
|
||||
EndAnchorFinder.prototype.visitEndAnchor = function (node) {
|
||||
this.found = true;
|
||||
};
|
||||
return EndAnchorFinder;
|
||||
}(BaseRegExpVisitor));
|
||||
var invalidRegex = filter(tokenTypes, function (currType) {
|
||||
var pattern = currType[PATTERN];
|
||||
try {
|
||||
var regexpAst = getRegExpAst(pattern);
|
||||
var endAnchorVisitor = new EndAnchorFinder();
|
||||
endAnchorVisitor.visit(regexpAst);
|
||||
return endAnchorVisitor.found;
|
||||
}
|
||||
catch (e) {
|
||||
// old behavior in case of runtime exceptions with regexp-to-ast.
|
||||
/* istanbul ignore next - cannot ensure an error in regexp-to-ast*/
|
||||
return end_of_input.test(pattern.source);
|
||||
}
|
||||
});
|
||||
var errors = map(invalidRegex, function (currType) {
|
||||
return {
|
||||
message: "Unexpected RegExp Anchor Error:\n" +
|
||||
"\tToken Type: ->" +
|
||||
currType.name +
|
||||
"<- static 'PATTERN' cannot contain end of input anchor '$'\n" +
|
||||
"\tSee sap.github.io/chevrotain/docs/guide/resolving_lexer_errors.html#ANCHORS" +
|
||||
"\tfor details.",
|
||||
type: LexerDefinitionErrorType.EOI_ANCHOR_FOUND,
|
||||
tokenTypes: [currType]
|
||||
};
|
||||
});
|
||||
return errors;
|
||||
}
|
||||
export function findEmptyMatchRegExps(tokenTypes) {
|
||||
var matchesEmptyString = filter(tokenTypes, function (currType) {
|
||||
var pattern = currType[PATTERN];
|
||||
return pattern.test("");
|
||||
});
|
||||
var errors = map(matchesEmptyString, function (currType) {
|
||||
return {
|
||||
message: "Token Type: ->" +
|
||||
currType.name +
|
||||
"<- static 'PATTERN' must not match an empty string",
|
||||
type: LexerDefinitionErrorType.EMPTY_MATCH_PATTERN,
|
||||
tokenTypes: [currType]
|
||||
};
|
||||
});
|
||||
return errors;
|
||||
}
|
||||
var start_of_input = /[^\\[][\^]|^\^/;
|
||||
export function findStartOfInputAnchor(tokenTypes) {
|
||||
var StartAnchorFinder = /** @class */ (function (_super) {
|
||||
__extends(StartAnchorFinder, _super);
|
||||
function StartAnchorFinder() {
|
||||
var _this = _super !== null && _super.apply(this, arguments) || this;
|
||||
_this.found = false;
|
||||
return _this;
|
||||
}
|
||||
StartAnchorFinder.prototype.visitStartAnchor = function (node) {
|
||||
this.found = true;
|
||||
};
|
||||
return StartAnchorFinder;
|
||||
}(BaseRegExpVisitor));
|
||||
var invalidRegex = filter(tokenTypes, function (currType) {
|
||||
var pattern = currType[PATTERN];
|
||||
try {
|
||||
var regexpAst = getRegExpAst(pattern);
|
||||
var startAnchorVisitor = new StartAnchorFinder();
|
||||
startAnchorVisitor.visit(regexpAst);
|
||||
return startAnchorVisitor.found;
|
||||
}
|
||||
catch (e) {
|
||||
// old behavior in case of runtime exceptions with regexp-to-ast.
|
||||
/* istanbul ignore next - cannot ensure an error in regexp-to-ast*/
|
||||
return start_of_input.test(pattern.source);
|
||||
}
|
||||
});
|
||||
var errors = map(invalidRegex, function (currType) {
|
||||
return {
|
||||
message: "Unexpected RegExp Anchor Error:\n" +
|
||||
"\tToken Type: ->" +
|
||||
currType.name +
|
||||
"<- static 'PATTERN' cannot contain start of input anchor '^'\n" +
|
||||
"\tSee https://sap.github.io/chevrotain/docs/guide/resolving_lexer_errors.html#ANCHORS" +
|
||||
"\tfor details.",
|
||||
type: LexerDefinitionErrorType.SOI_ANCHOR_FOUND,
|
||||
tokenTypes: [currType]
|
||||
};
|
||||
});
|
||||
return errors;
|
||||
}
|
||||
export function findUnsupportedFlags(tokenTypes) {
|
||||
var invalidFlags = filter(tokenTypes, function (currType) {
|
||||
var pattern = currType[PATTERN];
|
||||
return (pattern instanceof RegExp && (pattern.multiline || pattern.global));
|
||||
});
|
||||
var errors = map(invalidFlags, function (currType) {
|
||||
return {
|
||||
message: "Token Type: ->" +
|
||||
currType.name +
|
||||
"<- static 'PATTERN' may NOT contain global('g') or multiline('m')",
|
||||
type: LexerDefinitionErrorType.UNSUPPORTED_FLAGS_FOUND,
|
||||
tokenTypes: [currType]
|
||||
};
|
||||
});
|
||||
return errors;
|
||||
}
|
||||
// This can only test for identical duplicate RegExps, not semantically equivalent ones.
|
||||
export function findDuplicatePatterns(tokenTypes) {
|
||||
var found = [];
|
||||
var identicalPatterns = map(tokenTypes, function (outerType) {
|
||||
return reduce(tokenTypes, function (result, innerType) {
|
||||
if (outerType.PATTERN.source === innerType.PATTERN.source &&
|
||||
!contains(found, innerType) &&
|
||||
innerType.PATTERN !== Lexer.NA) {
|
||||
// this avoids duplicates in the result, each Token Type may only appear in one "set"
|
||||
// in essence we are creating Equivalence classes on equality relation.
|
||||
found.push(innerType);
|
||||
result.push(innerType);
|
||||
return result;
|
||||
}
|
||||
return result;
|
||||
}, []);
|
||||
});
|
||||
identicalPatterns = compact(identicalPatterns);
|
||||
var duplicatePatterns = filter(identicalPatterns, function (currIdenticalSet) {
|
||||
return currIdenticalSet.length > 1;
|
||||
});
|
||||
var errors = map(duplicatePatterns, function (setOfIdentical) {
|
||||
var tokenTypeNames = map(setOfIdentical, function (currType) {
|
||||
return currType.name;
|
||||
});
|
||||
var dupPatternSrc = first(setOfIdentical).PATTERN;
|
||||
return {
|
||||
message: "The same RegExp pattern ->" + dupPatternSrc + "<-" +
|
||||
("has been used in all of the following Token Types: " + tokenTypeNames.join(", ") + " <-"),
|
||||
type: LexerDefinitionErrorType.DUPLICATE_PATTERNS_FOUND,
|
||||
tokenTypes: setOfIdentical
|
||||
};
|
||||
});
|
||||
return errors;
|
||||
}
|
||||
export function findInvalidGroupType(tokenTypes) {
|
||||
var invalidTypes = filter(tokenTypes, function (clazz) {
|
||||
if (!has(clazz, "GROUP")) {
|
||||
return false;
|
||||
}
|
||||
var group = clazz.GROUP;
|
||||
return group !== Lexer.SKIPPED && group !== Lexer.NA && !isString(group);
|
||||
});
|
||||
var errors = map(invalidTypes, function (currType) {
|
||||
return {
|
||||
message: "Token Type: ->" +
|
||||
currType.name +
|
||||
"<- static 'GROUP' can only be Lexer.SKIPPED/Lexer.NA/A String",
|
||||
type: LexerDefinitionErrorType.INVALID_GROUP_TYPE_FOUND,
|
||||
tokenTypes: [currType]
|
||||
};
|
||||
});
|
||||
return errors;
|
||||
}
|
||||
export function findModesThatDoNotExist(tokenTypes, validModes) {
|
||||
var invalidModes = filter(tokenTypes, function (clazz) {
|
||||
return (clazz.PUSH_MODE !== undefined &&
|
||||
!contains(validModes, clazz.PUSH_MODE));
|
||||
});
|
||||
var errors = map(invalidModes, function (tokType) {
|
||||
var msg = "Token Type: ->" + tokType.name + "<- static 'PUSH_MODE' value cannot refer to a Lexer Mode ->" + tokType.PUSH_MODE + "<-" +
|
||||
"which does not exist";
|
||||
return {
|
||||
message: msg,
|
||||
type: LexerDefinitionErrorType.PUSH_MODE_DOES_NOT_EXIST,
|
||||
tokenTypes: [tokType]
|
||||
};
|
||||
});
|
||||
return errors;
|
||||
}
|
||||
export function findUnreachablePatterns(tokenTypes) {
|
||||
var errors = [];
|
||||
var canBeTested = reduce(tokenTypes, function (result, tokType, idx) {
|
||||
var pattern = tokType.PATTERN;
|
||||
if (pattern === Lexer.NA) {
|
||||
return result;
|
||||
}
|
||||
// a more comprehensive validation for all forms of regExps would require
|
||||
// deeper regExp analysis capabilities
|
||||
if (isString(pattern)) {
|
||||
result.push({ str: pattern, idx: idx, tokenType: tokType });
|
||||
}
|
||||
else if (isRegExp(pattern) && noMetaChar(pattern)) {
|
||||
result.push({ str: pattern.source, idx: idx, tokenType: tokType });
|
||||
}
|
||||
return result;
|
||||
}, []);
|
||||
forEach(tokenTypes, function (tokType, testIdx) {
|
||||
forEach(canBeTested, function (_a) {
|
||||
var str = _a.str, idx = _a.idx, tokenType = _a.tokenType;
|
||||
if (testIdx < idx && testTokenType(str, tokType.PATTERN)) {
|
||||
var msg = "Token: ->" + tokenType.name + "<- can never be matched.\n" +
|
||||
("Because it appears AFTER the Token Type ->" + tokType.name + "<-") +
|
||||
"in the lexer's definition.\n" +
|
||||
"See https://sap.github.io/chevrotain/docs/guide/resolving_lexer_errors.html#UNREACHABLE";
|
||||
errors.push({
|
||||
message: msg,
|
||||
type: LexerDefinitionErrorType.UNREACHABLE_PATTERN,
|
||||
tokenTypes: [tokType, tokenType]
|
||||
});
|
||||
}
|
||||
});
|
||||
});
|
||||
return errors;
|
||||
}
|
||||
function testTokenType(str, pattern) {
|
||||
/* istanbul ignore else */
|
||||
if (isRegExp(pattern)) {
|
||||
var regExpArray = pattern.exec(str);
|
||||
return regExpArray !== null && regExpArray.index === 0;
|
||||
}
|
||||
else if (isFunction(pattern)) {
|
||||
// maintain the API of custom patterns
|
||||
return pattern(str, 0, [], {});
|
||||
}
|
||||
else if (has(pattern, "exec")) {
|
||||
// maintain the API of custom patterns
|
||||
return pattern.exec(str, 0, [], {});
|
||||
}
|
||||
else if (typeof pattern === "string") {
|
||||
return pattern === str;
|
||||
}
|
||||
else {
|
||||
throw Error("non exhaustive match");
|
||||
}
|
||||
}
|
||||
function noMetaChar(regExp) {
|
||||
//https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp
|
||||
var metaChars = [
|
||||
".",
|
||||
"\\",
|
||||
"[",
|
||||
"]",
|
||||
"|",
|
||||
"^",
|
||||
"$",
|
||||
"(",
|
||||
")",
|
||||
"?",
|
||||
"*",
|
||||
"+",
|
||||
"{"
|
||||
];
|
||||
return (find(metaChars, function (char) { return regExp.source.indexOf(char) !== -1; }) ===
|
||||
undefined);
|
||||
}
|
||||
export function addStartOfInput(pattern) {
|
||||
var flags = pattern.ignoreCase ? "i" : "";
|
||||
// always wrapping in a none capturing group preceded by '^' to make sure matching can only work on start of input.
|
||||
// duplicate/redundant start of input markers have no meaning (/^^^^A/ === /^A/)
|
||||
return new RegExp("^(?:" + pattern.source + ")", flags);
|
||||
}
|
||||
export function addStickyFlag(pattern) {
|
||||
var flags = pattern.ignoreCase ? "iy" : "y";
|
||||
// always wrapping in a none capturing group preceded by '^' to make sure matching can only work on start of input.
|
||||
// duplicate/redundant start of input markers have no meaning (/^^^^A/ === /^A/)
|
||||
return new RegExp("" + pattern.source, flags);
|
||||
}
|
||||
export function performRuntimeChecks(lexerDefinition, trackLines, lineTerminatorCharacters) {
|
||||
var errors = [];
|
||||
// some run time checks to help the end users.
|
||||
if (!has(lexerDefinition, DEFAULT_MODE)) {
|
||||
errors.push({
|
||||
message: "A MultiMode Lexer cannot be initialized without a <" +
|
||||
DEFAULT_MODE +
|
||||
"> property in its definition\n",
|
||||
type: LexerDefinitionErrorType.MULTI_MODE_LEXER_WITHOUT_DEFAULT_MODE
|
||||
});
|
||||
}
|
||||
if (!has(lexerDefinition, MODES)) {
|
||||
errors.push({
|
||||
message: "A MultiMode Lexer cannot be initialized without a <" +
|
||||
MODES +
|
||||
"> property in its definition\n",
|
||||
type: LexerDefinitionErrorType.MULTI_MODE_LEXER_WITHOUT_MODES_PROPERTY
|
||||
});
|
||||
}
|
||||
if (has(lexerDefinition, MODES) &&
|
||||
has(lexerDefinition, DEFAULT_MODE) &&
|
||||
!has(lexerDefinition.modes, lexerDefinition.defaultMode)) {
|
||||
errors.push({
|
||||
message: "A MultiMode Lexer cannot be initialized with a " + DEFAULT_MODE + ": <" + lexerDefinition.defaultMode + ">" +
|
||||
"which does not exist\n",
|
||||
type: LexerDefinitionErrorType.MULTI_MODE_LEXER_DEFAULT_MODE_VALUE_DOES_NOT_EXIST
|
||||
});
|
||||
}
|
||||
if (has(lexerDefinition, MODES)) {
|
||||
forEach(lexerDefinition.modes, function (currModeValue, currModeName) {
|
||||
forEach(currModeValue, function (currTokType, currIdx) {
|
||||
if (isUndefined(currTokType)) {
|
||||
errors.push({
|
||||
message: "A Lexer cannot be initialized using an undefined Token Type. Mode:" +
|
||||
("<" + currModeName + "> at index: <" + currIdx + ">\n"),
|
||||
type: LexerDefinitionErrorType.LEXER_DEFINITION_CANNOT_CONTAIN_UNDEFINED
|
||||
});
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
return errors;
|
||||
}
|
||||
export function performWarningRuntimeChecks(lexerDefinition, trackLines, lineTerminatorCharacters) {
|
||||
var warnings = [];
|
||||
var hasAnyLineBreak = false;
|
||||
var allTokenTypes = compact(flatten(mapValues(lexerDefinition.modes, function (tokTypes) { return tokTypes; })));
|
||||
var concreteTokenTypes = reject(allTokenTypes, function (currType) { return currType[PATTERN] === Lexer.NA; });
|
||||
var terminatorCharCodes = getCharCodes(lineTerminatorCharacters);
|
||||
if (trackLines) {
|
||||
forEach(concreteTokenTypes, function (tokType) {
|
||||
var currIssue = checkLineBreaksIssues(tokType, terminatorCharCodes);
|
||||
if (currIssue !== false) {
|
||||
var message = buildLineBreakIssueMessage(tokType, currIssue);
|
||||
var warningDescriptor = {
|
||||
message: message,
|
||||
type: currIssue.issue,
|
||||
tokenType: tokType
|
||||
};
|
||||
warnings.push(warningDescriptor);
|
||||
}
|
||||
else {
|
||||
// we don't want to attempt to scan if the user explicitly specified the line_breaks option.
|
||||
if (has(tokType, "LINE_BREAKS")) {
|
||||
if (tokType.LINE_BREAKS === true) {
|
||||
hasAnyLineBreak = true;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (canMatchCharCode(terminatorCharCodes, tokType.PATTERN)) {
|
||||
hasAnyLineBreak = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
if (trackLines && !hasAnyLineBreak) {
|
||||
warnings.push({
|
||||
message: "Warning: No LINE_BREAKS Found.\n" +
|
||||
"\tThis Lexer has been defined to track line and column information,\n" +
|
||||
"\tBut none of the Token Types can be identified as matching a line terminator.\n" +
|
||||
"\tSee https://sap.github.io/chevrotain/docs/guide/resolving_lexer_errors.html#LINE_BREAKS \n" +
|
||||
"\tfor details.",
|
||||
type: LexerDefinitionErrorType.NO_LINE_BREAKS_FLAGS
|
||||
});
|
||||
}
|
||||
return warnings;
|
||||
}
|
||||
export function cloneEmptyGroups(emptyGroups) {
|
||||
var clonedResult = {};
|
||||
var groupKeys = keys(emptyGroups);
|
||||
forEach(groupKeys, function (currKey) {
|
||||
var currGroupValue = emptyGroups[currKey];
|
||||
/* istanbul ignore else */
|
||||
if (isArray(currGroupValue)) {
|
||||
clonedResult[currKey] = [];
|
||||
}
|
||||
else {
|
||||
throw Error("non exhaustive match");
|
||||
}
|
||||
});
|
||||
return clonedResult;
|
||||
}
|
||||
// TODO: refactor to avoid duplication
|
||||
export function isCustomPattern(tokenType) {
|
||||
var pattern = tokenType.PATTERN;
|
||||
/* istanbul ignore else */
|
||||
if (isRegExp(pattern)) {
|
||||
return false;
|
||||
}
|
||||
else if (isFunction(pattern)) {
|
||||
// CustomPatternMatcherFunc - custom patterns do not require any transformations, only wrapping in a RegExp Like object
|
||||
return true;
|
||||
}
|
||||
else if (has(pattern, "exec")) {
|
||||
// ICustomPattern
|
||||
return true;
|
||||
}
|
||||
else if (isString(pattern)) {
|
||||
return false;
|
||||
}
|
||||
else {
|
||||
throw Error("non exhaustive match");
|
||||
}
|
||||
}
|
||||
export function isShortPattern(pattern) {
|
||||
if (isString(pattern) && pattern.length === 1) {
|
||||
return pattern.charCodeAt(0);
|
||||
}
|
||||
else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Faster than using a RegExp for default newline detection during lexing.
|
||||
*/
|
||||
export var LineTerminatorOptimizedTester = {
|
||||
// implements /\n|\r\n?/g.test
|
||||
test: function (text) {
|
||||
var len = text.length;
|
||||
for (var i = this.lastIndex; i < len; i++) {
|
||||
var c = text.charCodeAt(i);
|
||||
if (c === 10) {
|
||||
this.lastIndex = i + 1;
|
||||
return true;
|
||||
}
|
||||
else if (c === 13) {
|
||||
if (text.charCodeAt(i + 1) === 10) {
|
||||
this.lastIndex = i + 2;
|
||||
}
|
||||
else {
|
||||
this.lastIndex = i + 1;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
},
|
||||
lastIndex: 0
|
||||
};
|
||||
function checkLineBreaksIssues(tokType, lineTerminatorCharCodes) {
|
||||
if (has(tokType, "LINE_BREAKS")) {
|
||||
// if the user explicitly declared the line_breaks option we will respect their choice
|
||||
// and assume it is correct.
|
||||
return false;
|
||||
}
|
||||
else {
|
||||
/* istanbul ignore else */
|
||||
if (isRegExp(tokType.PATTERN)) {
|
||||
try {
|
||||
canMatchCharCode(lineTerminatorCharCodes, tokType.PATTERN);
|
||||
}
|
||||
catch (e) {
|
||||
/* istanbul ignore next - to test this we would have to mock <canMatchCharCode> to throw an error */
|
||||
return {
|
||||
issue: LexerDefinitionErrorType.IDENTIFY_TERMINATOR,
|
||||
errMsg: e.message
|
||||
};
|
||||
}
|
||||
return false;
|
||||
}
|
||||
else if (isString(tokType.PATTERN)) {
|
||||
// string literal patterns can always be analyzed to detect line terminator usage
|
||||
return false;
|
||||
}
|
||||
else if (isCustomPattern(tokType)) {
|
||||
// custom token types
|
||||
return { issue: LexerDefinitionErrorType.CUSTOM_LINE_BREAK };
|
||||
}
|
||||
else {
|
||||
throw Error("non exhaustive match");
|
||||
}
|
||||
}
|
||||
}
|
||||
export function buildLineBreakIssueMessage(tokType, details) {
|
||||
/* istanbul ignore else */
|
||||
if (details.issue === LexerDefinitionErrorType.IDENTIFY_TERMINATOR) {
|
||||
return ("Warning: unable to identify line terminator usage in pattern.\n" +
|
||||
("\tThe problem is in the <" + tokType.name + "> Token Type\n") +
|
||||
("\t Root cause: " + details.errMsg + ".\n") +
|
||||
"\tFor details See: https://sap.github.io/chevrotain/docs/guide/resolving_lexer_errors.html#IDENTIFY_TERMINATOR");
|
||||
}
|
||||
else if (details.issue === LexerDefinitionErrorType.CUSTOM_LINE_BREAK) {
|
||||
return ("Warning: A Custom Token Pattern should specify the <line_breaks> option.\n" +
|
||||
("\tThe problem is in the <" + tokType.name + "> Token Type\n") +
|
||||
"\tFor details See: https://sap.github.io/chevrotain/docs/guide/resolving_lexer_errors.html#CUSTOM_LINE_BREAK");
|
||||
}
|
||||
else {
|
||||
throw Error("non exhaustive match");
|
||||
}
|
||||
}
|
||||
function getCharCodes(charsOrCodes) {
|
||||
var charCodes = map(charsOrCodes, function (numOrString) {
|
||||
if (isString(numOrString) && numOrString.length > 0) {
|
||||
return numOrString.charCodeAt(0);
|
||||
}
|
||||
else {
|
||||
return numOrString;
|
||||
}
|
||||
});
|
||||
return charCodes;
|
||||
}
|
||||
function addToMapOfArrays(map, key, value) {
|
||||
if (map[key] === undefined) {
|
||||
map[key] = [value];
|
||||
}
|
||||
else {
|
||||
map[key].push(value);
|
||||
}
|
||||
}
|
||||
export var minOptimizationVal = 256;
|
||||
/**
|
||||
* We ae mapping charCode above ASCI (256) into buckets each in the size of 256.
|
||||
* This is because ASCI are the most common start chars so each one of those will get its own
|
||||
* possible token configs vector.
|
||||
*
|
||||
* Tokens starting with charCodes "above" ASCI are uncommon, so we can "afford"
|
||||
* to place these into buckets of possible token configs, What we gain from
|
||||
* this is avoiding the case of creating an optimization 'charCodeToPatternIdxToConfig'
|
||||
* which would contain 10,000+ arrays of small size (e.g unicode Identifiers scenario).
|
||||
* Our 'charCodeToPatternIdxToConfig' max size will now be:
|
||||
* 256 + (2^16 / 2^8) - 1 === 511
|
||||
*
|
||||
* note the hack for fast division integer part extraction
|
||||
* See: https://stackoverflow.com/a/4228528
|
||||
*/
|
||||
export function charCodeToOptimizedIndex(charCode) {
|
||||
return charCode < minOptimizationVal
|
||||
? charCode
|
||||
: charCodeToOptimizedIdxMap[charCode];
|
||||
}
|
||||
/**
|
||||
* This is a compromise between cold start / hot running performance
|
||||
* Creating this array takes ~3ms on a modern machine,
|
||||
* But if we perform the computation at runtime as needed the CSS Lexer benchmark
|
||||
* performance degrades by ~10%
|
||||
*
|
||||
* TODO: Perhaps it should be lazy initialized only if a charCode > 255 is used.
|
||||
*/
|
||||
var charCodeToOptimizedIdxMap = [];
|
||||
function initCharCodeToOptimizedIndexMap() {
|
||||
if (isEmpty(charCodeToOptimizedIdxMap)) {
|
||||
charCodeToOptimizedIdxMap = new Array(65536);
|
||||
for (var i = 0; i < 65536; i++) {
|
||||
/* tslint:disable */
|
||||
charCodeToOptimizedIdxMap[i] = i > 255 ? 255 + ~~(i / 255) : i;
|
||||
/* tslint:enable */
|
||||
}
|
||||
}
|
||||
}
|
||||
//# sourceMappingURL=lexer.js.map
|
9
node_modules/chevrotain/lib_esm/src/scan/lexer_errors_public.js
generated
vendored
Normal file
9
node_modules/chevrotain/lib_esm/src/scan/lexer_errors_public.js
generated
vendored
Normal file
@@ -0,0 +1,9 @@
|
||||
export var defaultLexerErrorProvider = {
|
||||
buildUnableToPopLexerModeMessage: function (token) {
|
||||
return "Unable to pop Lexer Mode after encountering Token ->" + token.image + "<- The Mode Stack is empty";
|
||||
},
|
||||
buildUnexpectedCharactersMessage: function (fullText, startOffset, length, line, column) {
|
||||
return ("unexpected character: ->" + fullText.charAt(startOffset) + "<- at offset: " + startOffset + "," + (" skipped " + length + " characters."));
|
||||
}
|
||||
};
|
||||
//# sourceMappingURL=lexer_errors_public.js.map
|
672
node_modules/chevrotain/lib_esm/src/scan/lexer_public.js
generated
vendored
Normal file
672
node_modules/chevrotain/lib_esm/src/scan/lexer_public.js
generated
vendored
Normal file
@@ -0,0 +1,672 @@
|
||||
import { analyzeTokenTypes, charCodeToOptimizedIndex, cloneEmptyGroups, DEFAULT_MODE, LineTerminatorOptimizedTester, performRuntimeChecks, performWarningRuntimeChecks, SUPPORT_STICKY, validatePatterns } from "./lexer";
|
||||
import { cloneArr, cloneObj, forEach, IDENTITY, isArray, isEmpty, isUndefined, keys, last, map, merge, NOOP, PRINT_WARNING, reduce, reject, timer, toFastProperties } from "../utils/utils";
|
||||
import { augmentTokenTypes } from "./tokens";
|
||||
import { defaultLexerErrorProvider } from "../scan/lexer_errors_public";
|
||||
import { clearRegExpParserCache } from "./reg_exp_parser";
|
||||
export var LexerDefinitionErrorType;
|
||||
(function (LexerDefinitionErrorType) {
|
||||
LexerDefinitionErrorType[LexerDefinitionErrorType["MISSING_PATTERN"] = 0] = "MISSING_PATTERN";
|
||||
LexerDefinitionErrorType[LexerDefinitionErrorType["INVALID_PATTERN"] = 1] = "INVALID_PATTERN";
|
||||
LexerDefinitionErrorType[LexerDefinitionErrorType["EOI_ANCHOR_FOUND"] = 2] = "EOI_ANCHOR_FOUND";
|
||||
LexerDefinitionErrorType[LexerDefinitionErrorType["UNSUPPORTED_FLAGS_FOUND"] = 3] = "UNSUPPORTED_FLAGS_FOUND";
|
||||
LexerDefinitionErrorType[LexerDefinitionErrorType["DUPLICATE_PATTERNS_FOUND"] = 4] = "DUPLICATE_PATTERNS_FOUND";
|
||||
LexerDefinitionErrorType[LexerDefinitionErrorType["INVALID_GROUP_TYPE_FOUND"] = 5] = "INVALID_GROUP_TYPE_FOUND";
|
||||
LexerDefinitionErrorType[LexerDefinitionErrorType["PUSH_MODE_DOES_NOT_EXIST"] = 6] = "PUSH_MODE_DOES_NOT_EXIST";
|
||||
LexerDefinitionErrorType[LexerDefinitionErrorType["MULTI_MODE_LEXER_WITHOUT_DEFAULT_MODE"] = 7] = "MULTI_MODE_LEXER_WITHOUT_DEFAULT_MODE";
|
||||
LexerDefinitionErrorType[LexerDefinitionErrorType["MULTI_MODE_LEXER_WITHOUT_MODES_PROPERTY"] = 8] = "MULTI_MODE_LEXER_WITHOUT_MODES_PROPERTY";
|
||||
LexerDefinitionErrorType[LexerDefinitionErrorType["MULTI_MODE_LEXER_DEFAULT_MODE_VALUE_DOES_NOT_EXIST"] = 9] = "MULTI_MODE_LEXER_DEFAULT_MODE_VALUE_DOES_NOT_EXIST";
|
||||
LexerDefinitionErrorType[LexerDefinitionErrorType["LEXER_DEFINITION_CANNOT_CONTAIN_UNDEFINED"] = 10] = "LEXER_DEFINITION_CANNOT_CONTAIN_UNDEFINED";
|
||||
LexerDefinitionErrorType[LexerDefinitionErrorType["SOI_ANCHOR_FOUND"] = 11] = "SOI_ANCHOR_FOUND";
|
||||
LexerDefinitionErrorType[LexerDefinitionErrorType["EMPTY_MATCH_PATTERN"] = 12] = "EMPTY_MATCH_PATTERN";
|
||||
LexerDefinitionErrorType[LexerDefinitionErrorType["NO_LINE_BREAKS_FLAGS"] = 13] = "NO_LINE_BREAKS_FLAGS";
|
||||
LexerDefinitionErrorType[LexerDefinitionErrorType["UNREACHABLE_PATTERN"] = 14] = "UNREACHABLE_PATTERN";
|
||||
LexerDefinitionErrorType[LexerDefinitionErrorType["IDENTIFY_TERMINATOR"] = 15] = "IDENTIFY_TERMINATOR";
|
||||
LexerDefinitionErrorType[LexerDefinitionErrorType["CUSTOM_LINE_BREAK"] = 16] = "CUSTOM_LINE_BREAK";
|
||||
})(LexerDefinitionErrorType || (LexerDefinitionErrorType = {}));
|
||||
var DEFAULT_LEXER_CONFIG = {
|
||||
deferDefinitionErrorsHandling: false,
|
||||
positionTracking: "full",
|
||||
lineTerminatorsPattern: /\n|\r\n?/g,
|
||||
lineTerminatorCharacters: ["\n", "\r"],
|
||||
ensureOptimizations: false,
|
||||
safeMode: false,
|
||||
errorMessageProvider: defaultLexerErrorProvider,
|
||||
traceInitPerf: false,
|
||||
skipValidations: false
|
||||
};
|
||||
Object.freeze(DEFAULT_LEXER_CONFIG);
|
||||
var Lexer = /** @class */ (function () {
|
||||
function Lexer(lexerDefinition, config) {
|
||||
var _this = this;
|
||||
if (config === void 0) { config = DEFAULT_LEXER_CONFIG; }
|
||||
this.lexerDefinition = lexerDefinition;
|
||||
this.lexerDefinitionErrors = [];
|
||||
this.lexerDefinitionWarning = [];
|
||||
this.patternIdxToConfig = {};
|
||||
this.charCodeToPatternIdxToConfig = {};
|
||||
this.modes = [];
|
||||
this.emptyGroups = {};
|
||||
this.config = undefined;
|
||||
this.trackStartLines = true;
|
||||
this.trackEndLines = true;
|
||||
this.hasCustom = false;
|
||||
this.canModeBeOptimized = {};
|
||||
if (typeof config === "boolean") {
|
||||
throw Error("The second argument to the Lexer constructor is now an ILexerConfig Object.\n" +
|
||||
"a boolean 2nd argument is no longer supported");
|
||||
}
|
||||
// todo: defaults func?
|
||||
this.config = merge(DEFAULT_LEXER_CONFIG, config);
|
||||
var traceInitVal = this.config.traceInitPerf;
|
||||
if (traceInitVal === true) {
|
||||
this.traceInitMaxIdent = Infinity;
|
||||
this.traceInitPerf = true;
|
||||
}
|
||||
else if (typeof traceInitVal === "number") {
|
||||
this.traceInitMaxIdent = traceInitVal;
|
||||
this.traceInitPerf = true;
|
||||
}
|
||||
this.traceInitIndent = -1;
|
||||
this.TRACE_INIT("Lexer Constructor", function () {
|
||||
var actualDefinition;
|
||||
var hasOnlySingleMode = true;
|
||||
_this.TRACE_INIT("Lexer Config handling", function () {
|
||||
if (_this.config.lineTerminatorsPattern ===
|
||||
DEFAULT_LEXER_CONFIG.lineTerminatorsPattern) {
|
||||
// optimized built-in implementation for the defaults definition of lineTerminators
|
||||
_this.config.lineTerminatorsPattern = LineTerminatorOptimizedTester;
|
||||
}
|
||||
else {
|
||||
if (_this.config.lineTerminatorCharacters ===
|
||||
DEFAULT_LEXER_CONFIG.lineTerminatorCharacters) {
|
||||
throw Error("Error: Missing <lineTerminatorCharacters> property on the Lexer config.\n" +
|
||||
"\tFor details See: https://sap.github.io/chevrotain/docs/guide/resolving_lexer_errors.html#MISSING_LINE_TERM_CHARS");
|
||||
}
|
||||
}
|
||||
if (config.safeMode && config.ensureOptimizations) {
|
||||
throw Error('"safeMode" and "ensureOptimizations" flags are mutually exclusive.');
|
||||
}
|
||||
_this.trackStartLines = /full|onlyStart/i.test(_this.config.positionTracking);
|
||||
_this.trackEndLines = /full/i.test(_this.config.positionTracking);
|
||||
// Convert SingleModeLexerDefinition into a IMultiModeLexerDefinition.
|
||||
if (isArray(lexerDefinition)) {
|
||||
actualDefinition = { modes: {} };
|
||||
actualDefinition.modes[DEFAULT_MODE] = cloneArr(lexerDefinition);
|
||||
actualDefinition[DEFAULT_MODE] = DEFAULT_MODE;
|
||||
}
|
||||
else {
|
||||
// no conversion needed, input should already be a IMultiModeLexerDefinition
|
||||
hasOnlySingleMode = false;
|
||||
actualDefinition = cloneObj((lexerDefinition));
|
||||
}
|
||||
});
|
||||
if (_this.config.skipValidations === false) {
|
||||
_this.TRACE_INIT("performRuntimeChecks", function () {
|
||||
_this.lexerDefinitionErrors = _this.lexerDefinitionErrors.concat(performRuntimeChecks(actualDefinition, _this.trackStartLines, _this.config.lineTerminatorCharacters));
|
||||
});
|
||||
_this.TRACE_INIT("performWarningRuntimeChecks", function () {
|
||||
_this.lexerDefinitionWarning = _this.lexerDefinitionWarning.concat(performWarningRuntimeChecks(actualDefinition, _this.trackStartLines, _this.config.lineTerminatorCharacters));
|
||||
});
|
||||
}
|
||||
// for extra robustness to avoid throwing an none informative error message
|
||||
actualDefinition.modes = actualDefinition.modes
|
||||
? actualDefinition.modes
|
||||
: {};
|
||||
// an error of undefined TokenTypes will be detected in "performRuntimeChecks" above.
|
||||
// this transformation is to increase robustness in the case of partially invalid lexer definition.
|
||||
forEach(actualDefinition.modes, function (currModeValue, currModeName) {
|
||||
actualDefinition.modes[currModeName] = reject(currModeValue, function (currTokType) { return isUndefined(currTokType); });
|
||||
});
|
||||
var allModeNames = keys(actualDefinition.modes);
|
||||
forEach(actualDefinition.modes, function (currModDef, currModName) {
|
||||
_this.TRACE_INIT("Mode: <" + currModName + "> processing", function () {
|
||||
_this.modes.push(currModName);
|
||||
if (_this.config.skipValidations === false) {
|
||||
_this.TRACE_INIT("validatePatterns", function () {
|
||||
_this.lexerDefinitionErrors = _this.lexerDefinitionErrors.concat(validatePatterns(currModDef, allModeNames));
|
||||
});
|
||||
}
|
||||
// If definition errors were encountered, the analysis phase may fail unexpectedly/
|
||||
// Considering a lexer with definition errors may never be used, there is no point
|
||||
// to performing the analysis anyhow...
|
||||
if (isEmpty(_this.lexerDefinitionErrors)) {
|
||||
augmentTokenTypes(currModDef);
|
||||
var currAnalyzeResult_1;
|
||||
_this.TRACE_INIT("analyzeTokenTypes", function () {
|
||||
currAnalyzeResult_1 = analyzeTokenTypes(currModDef, {
|
||||
lineTerminatorCharacters: _this.config
|
||||
.lineTerminatorCharacters,
|
||||
positionTracking: config.positionTracking,
|
||||
ensureOptimizations: config.ensureOptimizations,
|
||||
safeMode: config.safeMode,
|
||||
tracer: _this.TRACE_INIT.bind(_this)
|
||||
});
|
||||
});
|
||||
_this.patternIdxToConfig[currModName] =
|
||||
currAnalyzeResult_1.patternIdxToConfig;
|
||||
_this.charCodeToPatternIdxToConfig[currModName] =
|
||||
currAnalyzeResult_1.charCodeToPatternIdxToConfig;
|
||||
_this.emptyGroups = merge(_this.emptyGroups, currAnalyzeResult_1.emptyGroups);
|
||||
_this.hasCustom =
|
||||
currAnalyzeResult_1.hasCustom || _this.hasCustom;
|
||||
_this.canModeBeOptimized[currModName] =
|
||||
currAnalyzeResult_1.canBeOptimized;
|
||||
}
|
||||
});
|
||||
});
|
||||
_this.defaultMode = actualDefinition.defaultMode;
|
||||
if (!isEmpty(_this.lexerDefinitionErrors) &&
|
||||
!_this.config.deferDefinitionErrorsHandling) {
|
||||
var allErrMessages = map(_this.lexerDefinitionErrors, function (error) {
|
||||
return error.message;
|
||||
});
|
||||
var allErrMessagesString = allErrMessages.join("-----------------------\n");
|
||||
throw new Error("Errors detected in definition of Lexer:\n" +
|
||||
allErrMessagesString);
|
||||
}
|
||||
// Only print warning if there are no errors, This will avoid pl
|
||||
forEach(_this.lexerDefinitionWarning, function (warningDescriptor) {
|
||||
PRINT_WARNING(warningDescriptor.message);
|
||||
});
|
||||
_this.TRACE_INIT("Choosing sub-methods implementations", function () {
|
||||
// Choose the relevant internal implementations for this specific parser.
|
||||
// These implementations should be in-lined by the JavaScript engine
|
||||
// to provide optimal performance in each scenario.
|
||||
if (SUPPORT_STICKY) {
|
||||
_this.chopInput = IDENTITY;
|
||||
_this.match = _this.matchWithTest;
|
||||
}
|
||||
else {
|
||||
_this.updateLastIndex = NOOP;
|
||||
_this.match = _this.matchWithExec;
|
||||
}
|
||||
if (hasOnlySingleMode) {
|
||||
_this.handleModes = NOOP;
|
||||
}
|
||||
if (_this.trackStartLines === false) {
|
||||
_this.computeNewColumn = IDENTITY;
|
||||
}
|
||||
if (_this.trackEndLines === false) {
|
||||
_this.updateTokenEndLineColumnLocation = NOOP;
|
||||
}
|
||||
if (/full/i.test(_this.config.positionTracking)) {
|
||||
_this.createTokenInstance = _this.createFullToken;
|
||||
}
|
||||
else if (/onlyStart/i.test(_this.config.positionTracking)) {
|
||||
_this.createTokenInstance = _this.createStartOnlyToken;
|
||||
}
|
||||
else if (/onlyOffset/i.test(_this.config.positionTracking)) {
|
||||
_this.createTokenInstance = _this.createOffsetOnlyToken;
|
||||
}
|
||||
else {
|
||||
throw Error("Invalid <positionTracking> config option: \"" + _this.config.positionTracking + "\"");
|
||||
}
|
||||
if (_this.hasCustom) {
|
||||
_this.addToken = _this.addTokenUsingPush;
|
||||
_this.handlePayload = _this.handlePayloadWithCustom;
|
||||
}
|
||||
else {
|
||||
_this.addToken = _this.addTokenUsingMemberAccess;
|
||||
_this.handlePayload = _this.handlePayloadNoCustom;
|
||||
}
|
||||
});
|
||||
_this.TRACE_INIT("Failed Optimization Warnings", function () {
|
||||
var unOptimizedModes = reduce(_this.canModeBeOptimized, function (cannotBeOptimized, canBeOptimized, modeName) {
|
||||
if (canBeOptimized === false) {
|
||||
cannotBeOptimized.push(modeName);
|
||||
}
|
||||
return cannotBeOptimized;
|
||||
}, []);
|
||||
if (config.ensureOptimizations && !isEmpty(unOptimizedModes)) {
|
||||
throw Error("Lexer Modes: < " + unOptimizedModes.join(", ") + " > cannot be optimized.\n" +
|
||||
'\t Disable the "ensureOptimizations" lexer config flag to silently ignore this and run the lexer in an un-optimized mode.\n' +
|
||||
"\t Or inspect the console log for details on how to resolve these issues.");
|
||||
}
|
||||
});
|
||||
_this.TRACE_INIT("clearRegExpParserCache", function () {
|
||||
clearRegExpParserCache();
|
||||
});
|
||||
_this.TRACE_INIT("toFastProperties", function () {
|
||||
toFastProperties(_this);
|
||||
});
|
||||
});
|
||||
}
|
||||
Lexer.prototype.tokenize = function (text, initialMode) {
|
||||
if (initialMode === void 0) { initialMode = this.defaultMode; }
|
||||
if (!isEmpty(this.lexerDefinitionErrors)) {
|
||||
var allErrMessages = map(this.lexerDefinitionErrors, function (error) {
|
||||
return error.message;
|
||||
});
|
||||
var allErrMessagesString = allErrMessages.join("-----------------------\n");
|
||||
throw new Error("Unable to Tokenize because Errors detected in definition of Lexer:\n" +
|
||||
allErrMessagesString);
|
||||
}
|
||||
var lexResult = this.tokenizeInternal(text, initialMode);
|
||||
return lexResult;
|
||||
};
|
||||
// There is quite a bit of duplication between this and "tokenizeInternalLazy"
|
||||
// This is intentional due to performance considerations.
|
||||
Lexer.prototype.tokenizeInternal = function (text, initialMode) {
|
||||
var _this = this;
|
||||
var i, j, matchAltImage, longerAltIdx, matchedImage, payload, altPayload, imageLength, group, tokType, newToken, errLength, droppedChar, msg, match;
|
||||
var orgText = text;
|
||||
var orgLength = orgText.length;
|
||||
var offset = 0;
|
||||
var matchedTokensIndex = 0;
|
||||
// initializing the tokensArray to the "guessed" size.
|
||||
// guessing too little will still reduce the number of array re-sizes on pushes.
|
||||
// guessing too large (Tested by guessing x4 too large) may cost a bit more of memory
|
||||
// but would still have a faster runtime by avoiding (All but one) array resizing.
|
||||
var guessedNumberOfTokens = this.hasCustom
|
||||
? 0 // will break custom token pattern APIs the matchedTokens array will contain undefined elements.
|
||||
: Math.floor(text.length / 10);
|
||||
var matchedTokens = new Array(guessedNumberOfTokens);
|
||||
var errors = [];
|
||||
var line = this.trackStartLines ? 1 : undefined;
|
||||
var column = this.trackStartLines ? 1 : undefined;
|
||||
var groups = cloneEmptyGroups(this.emptyGroups);
|
||||
var trackLines = this.trackStartLines;
|
||||
var lineTerminatorPattern = this.config.lineTerminatorsPattern;
|
||||
var currModePatternsLength = 0;
|
||||
var patternIdxToConfig = [];
|
||||
var currCharCodeToPatternIdxToConfig = [];
|
||||
var modeStack = [];
|
||||
var emptyArray = [];
|
||||
Object.freeze(emptyArray);
|
||||
var getPossiblePatterns = undefined;
|
||||
function getPossiblePatternsSlow() {
|
||||
return patternIdxToConfig;
|
||||
}
|
||||
function getPossiblePatternsOptimized(charCode) {
|
||||
var optimizedCharIdx = charCodeToOptimizedIndex(charCode);
|
||||
var possiblePatterns = currCharCodeToPatternIdxToConfig[optimizedCharIdx];
|
||||
if (possiblePatterns === undefined) {
|
||||
return emptyArray;
|
||||
}
|
||||
else {
|
||||
return possiblePatterns;
|
||||
}
|
||||
}
|
||||
var pop_mode = function (popToken) {
|
||||
// TODO: perhaps avoid this error in the edge case there is no more input?
|
||||
if (modeStack.length === 1 &&
|
||||
// if we have both a POP_MODE and a PUSH_MODE this is in-fact a "transition"
|
||||
// So no error should occur.
|
||||
popToken.tokenType.PUSH_MODE === undefined) {
|
||||
// if we try to pop the last mode there lexer will no longer have ANY mode.
|
||||
// thus the pop is ignored, an error will be created and the lexer will continue parsing in the previous mode.
|
||||
var msg_1 = _this.config.errorMessageProvider.buildUnableToPopLexerModeMessage(popToken);
|
||||
errors.push({
|
||||
offset: popToken.startOffset,
|
||||
line: popToken.startLine !== undefined
|
||||
? popToken.startLine
|
||||
: undefined,
|
||||
column: popToken.startColumn !== undefined
|
||||
? popToken.startColumn
|
||||
: undefined,
|
||||
length: popToken.image.length,
|
||||
message: msg_1
|
||||
});
|
||||
}
|
||||
else {
|
||||
modeStack.pop();
|
||||
var newMode = last(modeStack);
|
||||
patternIdxToConfig = _this.patternIdxToConfig[newMode];
|
||||
currCharCodeToPatternIdxToConfig = _this
|
||||
.charCodeToPatternIdxToConfig[newMode];
|
||||
currModePatternsLength = patternIdxToConfig.length;
|
||||
var modeCanBeOptimized = _this.canModeBeOptimized[newMode] &&
|
||||
_this.config.safeMode === false;
|
||||
if (currCharCodeToPatternIdxToConfig && modeCanBeOptimized) {
|
||||
getPossiblePatterns = getPossiblePatternsOptimized;
|
||||
}
|
||||
else {
|
||||
getPossiblePatterns = getPossiblePatternsSlow;
|
||||
}
|
||||
}
|
||||
};
|
||||
function push_mode(newMode) {
|
||||
modeStack.push(newMode);
|
||||
currCharCodeToPatternIdxToConfig = this
|
||||
.charCodeToPatternIdxToConfig[newMode];
|
||||
patternIdxToConfig = this.patternIdxToConfig[newMode];
|
||||
currModePatternsLength = patternIdxToConfig.length;
|
||||
currModePatternsLength = patternIdxToConfig.length;
|
||||
var modeCanBeOptimized = this.canModeBeOptimized[newMode] &&
|
||||
this.config.safeMode === false;
|
||||
if (currCharCodeToPatternIdxToConfig && modeCanBeOptimized) {
|
||||
getPossiblePatterns = getPossiblePatternsOptimized;
|
||||
}
|
||||
else {
|
||||
getPossiblePatterns = getPossiblePatternsSlow;
|
||||
}
|
||||
}
|
||||
// this pattern seems to avoid a V8 de-optimization, although that de-optimization does not
|
||||
// seem to matter performance wise.
|
||||
push_mode.call(this, initialMode);
|
||||
var currConfig;
|
||||
while (offset < orgLength) {
|
||||
matchedImage = null;
|
||||
var nextCharCode = orgText.charCodeAt(offset);
|
||||
var chosenPatternIdxToConfig = getPossiblePatterns(nextCharCode);
|
||||
var chosenPatternsLength = chosenPatternIdxToConfig.length;
|
||||
for (i = 0; i < chosenPatternsLength; i++) {
|
||||
currConfig = chosenPatternIdxToConfig[i];
|
||||
var currPattern = currConfig.pattern;
|
||||
payload = null;
|
||||
// manually in-lined because > 600 chars won't be in-lined in V8
|
||||
var singleCharCode = currConfig.short;
|
||||
if (singleCharCode !== false) {
|
||||
if (nextCharCode === singleCharCode) {
|
||||
// single character string
|
||||
matchedImage = currPattern;
|
||||
}
|
||||
}
|
||||
else if (currConfig.isCustom === true) {
|
||||
match = currPattern.exec(orgText, offset, matchedTokens, groups);
|
||||
if (match !== null) {
|
||||
matchedImage = match[0];
|
||||
if (match.payload !== undefined) {
|
||||
payload = match.payload;
|
||||
}
|
||||
}
|
||||
else {
|
||||
matchedImage = null;
|
||||
}
|
||||
}
|
||||
else {
|
||||
this.updateLastIndex(currPattern, offset);
|
||||
matchedImage = this.match(currPattern, text, offset);
|
||||
}
|
||||
if (matchedImage !== null) {
|
||||
// even though this pattern matched we must try a another longer alternative.
|
||||
// this can be used to prioritize keywords over identifiers
|
||||
longerAltIdx = currConfig.longerAlt;
|
||||
if (longerAltIdx !== undefined) {
|
||||
// TODO: micro optimize, avoid extra prop access
|
||||
// by saving/linking longerAlt on the original config?
|
||||
var longerAltConfig = patternIdxToConfig[longerAltIdx];
|
||||
var longerAltPattern = longerAltConfig.pattern;
|
||||
altPayload = null;
|
||||
// single Char can never be a longer alt so no need to test it.
|
||||
// manually in-lined because > 600 chars won't be in-lined in V8
|
||||
if (longerAltConfig.isCustom === true) {
|
||||
match = longerAltPattern.exec(orgText, offset, matchedTokens, groups);
|
||||
if (match !== null) {
|
||||
matchAltImage = match[0];
|
||||
if (match.payload !== undefined) {
|
||||
altPayload = match.payload;
|
||||
}
|
||||
}
|
||||
else {
|
||||
matchAltImage = null;
|
||||
}
|
||||
}
|
||||
else {
|
||||
this.updateLastIndex(longerAltPattern, offset);
|
||||
matchAltImage = this.match(longerAltPattern, text, offset);
|
||||
}
|
||||
if (matchAltImage &&
|
||||
matchAltImage.length > matchedImage.length) {
|
||||
matchedImage = matchAltImage;
|
||||
payload = altPayload;
|
||||
currConfig = longerAltConfig;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
// successful match
|
||||
if (matchedImage !== null) {
|
||||
imageLength = matchedImage.length;
|
||||
group = currConfig.group;
|
||||
if (group !== undefined) {
|
||||
tokType = currConfig.tokenTypeIdx;
|
||||
// TODO: "offset + imageLength" and the new column may be computed twice in case of "full" location information inside
|
||||
// createFullToken method
|
||||
newToken = this.createTokenInstance(matchedImage, offset, tokType, currConfig.tokenType, line, column, imageLength);
|
||||
this.handlePayload(newToken, payload);
|
||||
// TODO: optimize NOOP in case there are no special groups?
|
||||
if (group === false) {
|
||||
matchedTokensIndex = this.addToken(matchedTokens, matchedTokensIndex, newToken);
|
||||
}
|
||||
else {
|
||||
groups[group].push(newToken);
|
||||
}
|
||||
}
|
||||
text = this.chopInput(text, imageLength);
|
||||
offset = offset + imageLength;
|
||||
// TODO: with newlines the column may be assigned twice
|
||||
column = this.computeNewColumn(column, imageLength);
|
||||
if (trackLines === true &&
|
||||
currConfig.canLineTerminator === true) {
|
||||
var numOfLTsInMatch = 0;
|
||||
var foundTerminator = void 0;
|
||||
var lastLTEndOffset = void 0;
|
||||
lineTerminatorPattern.lastIndex = 0;
|
||||
do {
|
||||
foundTerminator = lineTerminatorPattern.test(matchedImage);
|
||||
if (foundTerminator === true) {
|
||||
lastLTEndOffset =
|
||||
lineTerminatorPattern.lastIndex - 1;
|
||||
numOfLTsInMatch++;
|
||||
}
|
||||
} while (foundTerminator === true);
|
||||
if (numOfLTsInMatch !== 0) {
|
||||
line = line + numOfLTsInMatch;
|
||||
column = imageLength - lastLTEndOffset;
|
||||
this.updateTokenEndLineColumnLocation(newToken, group, lastLTEndOffset, numOfLTsInMatch, line, column, imageLength);
|
||||
}
|
||||
}
|
||||
// will be NOOP if no modes present
|
||||
this.handleModes(currConfig, pop_mode, push_mode, newToken);
|
||||
}
|
||||
else {
|
||||
// error recovery, drop characters until we identify a valid token's start point
|
||||
var errorStartOffset = offset;
|
||||
var errorLine = line;
|
||||
var errorColumn = column;
|
||||
var foundResyncPoint = false;
|
||||
while (!foundResyncPoint && offset < orgLength) {
|
||||
// drop chars until we succeed in matching something
|
||||
droppedChar = orgText.charCodeAt(offset);
|
||||
// Identity Func (when sticky flag is enabled)
|
||||
text = this.chopInput(text, 1);
|
||||
offset++;
|
||||
for (j = 0; j < currModePatternsLength; j++) {
|
||||
var currConfig_1 = patternIdxToConfig[j];
|
||||
var currPattern = currConfig_1.pattern;
|
||||
// manually in-lined because > 600 chars won't be in-lined in V8
|
||||
var singleCharCode = currConfig_1.short;
|
||||
if (singleCharCode !== false) {
|
||||
if (orgText.charCodeAt(offset) === singleCharCode) {
|
||||
// single character string
|
||||
foundResyncPoint = true;
|
||||
}
|
||||
}
|
||||
else if (currConfig_1.isCustom === true) {
|
||||
foundResyncPoint =
|
||||
currPattern.exec(orgText, offset, matchedTokens, groups) !== null;
|
||||
}
|
||||
else {
|
||||
this.updateLastIndex(currPattern, offset);
|
||||
foundResyncPoint = currPattern.exec(text) !== null;
|
||||
}
|
||||
if (foundResyncPoint === true) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
errLength = offset - errorStartOffset;
|
||||
// at this point we either re-synced or reached the end of the input text
|
||||
msg = this.config.errorMessageProvider.buildUnexpectedCharactersMessage(orgText, errorStartOffset, errLength, errorLine, errorColumn);
|
||||
errors.push({
|
||||
offset: errorStartOffset,
|
||||
line: errorLine,
|
||||
column: errorColumn,
|
||||
length: errLength,
|
||||
message: msg
|
||||
});
|
||||
}
|
||||
}
|
||||
// if we do have custom patterns which push directly into the
|
||||
// TODO: custom tokens should not push directly??
|
||||
if (!this.hasCustom) {
|
||||
// if we guessed a too large size for the tokens array this will shrink it to the right size.
|
||||
matchedTokens.length = matchedTokensIndex;
|
||||
}
|
||||
return {
|
||||
tokens: matchedTokens,
|
||||
groups: groups,
|
||||
errors: errors
|
||||
};
|
||||
};
|
||||
Lexer.prototype.handleModes = function (config, pop_mode, push_mode, newToken) {
|
||||
if (config.pop === true) {
|
||||
// need to save the PUSH_MODE property as if the mode is popped
|
||||
// patternIdxToPopMode is updated to reflect the new mode after popping the stack
|
||||
var pushMode = config.push;
|
||||
pop_mode(newToken);
|
||||
if (pushMode !== undefined) {
|
||||
push_mode.call(this, pushMode);
|
||||
}
|
||||
}
|
||||
else if (config.push !== undefined) {
|
||||
push_mode.call(this, config.push);
|
||||
}
|
||||
};
|
||||
Lexer.prototype.chopInput = function (text, length) {
|
||||
return text.substring(length);
|
||||
};
|
||||
Lexer.prototype.updateLastIndex = function (regExp, newLastIndex) {
|
||||
regExp.lastIndex = newLastIndex;
|
||||
};
|
||||
// TODO: decrease this under 600 characters? inspect stripping comments option in TSC compiler
|
||||
Lexer.prototype.updateTokenEndLineColumnLocation = function (newToken, group, lastLTIdx, numOfLTsInMatch, line, column, imageLength) {
|
||||
var lastCharIsLT, fixForEndingInLT;
|
||||
if (group !== undefined) {
|
||||
// a none skipped multi line Token, need to update endLine/endColumn
|
||||
lastCharIsLT = lastLTIdx === imageLength - 1;
|
||||
fixForEndingInLT = lastCharIsLT ? -1 : 0;
|
||||
if (!(numOfLTsInMatch === 1 && lastCharIsLT === true)) {
|
||||
// if a token ends in a LT that last LT only affects the line numbering of following Tokens
|
||||
newToken.endLine = line + fixForEndingInLT;
|
||||
// the last LT in a token does not affect the endColumn either as the [columnStart ... columnEnd)
|
||||
// inclusive to exclusive range.
|
||||
newToken.endColumn = column - 1 + -fixForEndingInLT;
|
||||
}
|
||||
// else single LT in the last character of a token, no need to modify the endLine/EndColumn
|
||||
}
|
||||
};
|
||||
Lexer.prototype.computeNewColumn = function (oldColumn, imageLength) {
|
||||
return oldColumn + imageLength;
|
||||
};
|
||||
// Place holder, will be replaced by the correct variant according to the locationTracking option at runtime.
|
||||
/* istanbul ignore next - place holder */
|
||||
Lexer.prototype.createTokenInstance = function () {
|
||||
var args = [];
|
||||
for (var _i = 0; _i < arguments.length; _i++) {
|
||||
args[_i] = arguments[_i];
|
||||
}
|
||||
return null;
|
||||
};
|
||||
Lexer.prototype.createOffsetOnlyToken = function (image, startOffset, tokenTypeIdx, tokenType) {
|
||||
return {
|
||||
image: image,
|
||||
startOffset: startOffset,
|
||||
tokenTypeIdx: tokenTypeIdx,
|
||||
tokenType: tokenType
|
||||
};
|
||||
};
|
||||
Lexer.prototype.createStartOnlyToken = function (image, startOffset, tokenTypeIdx, tokenType, startLine, startColumn) {
|
||||
return {
|
||||
image: image,
|
||||
startOffset: startOffset,
|
||||
startLine: startLine,
|
||||
startColumn: startColumn,
|
||||
tokenTypeIdx: tokenTypeIdx,
|
||||
tokenType: tokenType
|
||||
};
|
||||
};
|
||||
Lexer.prototype.createFullToken = function (image, startOffset, tokenTypeIdx, tokenType, startLine, startColumn, imageLength) {
|
||||
return {
|
||||
image: image,
|
||||
startOffset: startOffset,
|
||||
endOffset: startOffset + imageLength - 1,
|
||||
startLine: startLine,
|
||||
endLine: startLine,
|
||||
startColumn: startColumn,
|
||||
endColumn: startColumn + imageLength - 1,
|
||||
tokenTypeIdx: tokenTypeIdx,
|
||||
tokenType: tokenType
|
||||
};
|
||||
};
|
||||
// Place holder, will be replaced by the correct variant according to the locationTracking option at runtime.
|
||||
/* istanbul ignore next - place holder */
|
||||
Lexer.prototype.addToken = function (tokenVector, index, tokenToAdd) {
|
||||
return 666;
|
||||
};
|
||||
Lexer.prototype.addTokenUsingPush = function (tokenVector, index, tokenToAdd) {
|
||||
tokenVector.push(tokenToAdd);
|
||||
return index;
|
||||
};
|
||||
Lexer.prototype.addTokenUsingMemberAccess = function (tokenVector, index, tokenToAdd) {
|
||||
tokenVector[index] = tokenToAdd;
|
||||
index++;
|
||||
return index;
|
||||
};
|
||||
// Place holder, will be replaced by the correct variant according to the hasCustom flag option at runtime.
|
||||
/* istanbul ignore next - place holder */
|
||||
Lexer.prototype.handlePayload = function (token, payload) { };
|
||||
Lexer.prototype.handlePayloadNoCustom = function (token, payload) { };
|
||||
Lexer.prototype.handlePayloadWithCustom = function (token, payload) {
|
||||
if (payload !== null) {
|
||||
token.payload = payload;
|
||||
}
|
||||
};
|
||||
/* istanbul ignore next - place holder to be replaced with chosen alternative at runtime */
|
||||
Lexer.prototype.match = function (pattern, text, offset) {
|
||||
return null;
|
||||
};
|
||||
Lexer.prototype.matchWithTest = function (pattern, text, offset) {
|
||||
var found = pattern.test(text);
|
||||
if (found === true) {
|
||||
return text.substring(offset, pattern.lastIndex);
|
||||
}
|
||||
return null;
|
||||
};
|
||||
Lexer.prototype.matchWithExec = function (pattern, text) {
|
||||
var regExpArray = pattern.exec(text);
|
||||
return regExpArray !== null ? regExpArray[0] : regExpArray;
|
||||
};
|
||||
// Duplicated from the parser's perf trace trait to allow future extraction
|
||||
// of the lexer to a separate package.
|
||||
Lexer.prototype.TRACE_INIT = function (phaseDesc, phaseImpl) {
|
||||
// No need to optimize this using NOOP pattern because
|
||||
// It is not called in a hot spot...
|
||||
if (this.traceInitPerf === true) {
|
||||
this.traceInitIndent++;
|
||||
var indent = new Array(this.traceInitIndent + 1).join("\t");
|
||||
if (this.traceInitIndent < this.traceInitMaxIdent) {
|
||||
console.log(indent + "--> <" + phaseDesc + ">");
|
||||
}
|
||||
var _a = timer(phaseImpl), time = _a.time, value = _a.value;
|
||||
/* istanbul ignore next - Difficult to reproduce specific performance behavior (>10ms) in tests */
|
||||
var traceMethod = time > 10 ? console.warn : console.log;
|
||||
if (this.traceInitIndent < this.traceInitMaxIdent) {
|
||||
traceMethod(indent + "<-- <" + phaseDesc + "> time: " + time + "ms");
|
||||
}
|
||||
this.traceInitIndent--;
|
||||
return value;
|
||||
}
|
||||
else {
|
||||
return phaseImpl();
|
||||
}
|
||||
};
|
||||
Lexer.SKIPPED = "This marks a skipped Token pattern, this means each token identified by it will" +
|
||||
"be consumed and then thrown into oblivion, this can be used to for example to completely ignore whitespace.";
|
||||
Lexer.NA = /NOT_APPLICABLE/;
|
||||
return Lexer;
|
||||
}());
|
||||
export { Lexer };
|
||||
//# sourceMappingURL=lexer_public.js.map
|
260
node_modules/chevrotain/lib_esm/src/scan/reg_exp.js
generated
vendored
Normal file
260
node_modules/chevrotain/lib_esm/src/scan/reg_exp.js
generated
vendored
Normal file
@@ -0,0 +1,260 @@
|
||||
var __extends = (this && this.__extends) || (function () {
|
||||
var extendStatics = function (d, b) {
|
||||
extendStatics = Object.setPrototypeOf ||
|
||||
({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) ||
|
||||
function (d, b) { for (var p in b) if (b.hasOwnProperty(p)) d[p] = b[p]; };
|
||||
return extendStatics(d, b);
|
||||
};
|
||||
return function (d, b) {
|
||||
extendStatics(d, b);
|
||||
function __() { this.constructor = d; }
|
||||
d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __());
|
||||
};
|
||||
})();
|
||||
import { VERSION, BaseRegExpVisitor } from "regexp-to-ast";
|
||||
import { forEach, contains, PRINT_ERROR, PRINT_WARNING, find, isArray, every, values } from "../utils/utils";
|
||||
import { getRegExpAst } from "./reg_exp_parser";
|
||||
import { charCodeToOptimizedIndex, minOptimizationVal } from "./lexer";
|
||||
var complementErrorMessage = "Complement Sets are not supported for first char optimization";
|
||||
export var failedOptimizationPrefixMsg = 'Unable to use "first char" lexer optimizations:\n';
|
||||
export function getOptimizedStartCodesIndices(regExp, ensureOptimizations) {
|
||||
if (ensureOptimizations === void 0) { ensureOptimizations = false; }
|
||||
try {
|
||||
var ast = getRegExpAst(regExp);
|
||||
var firstChars = firstCharOptimizedIndices(ast.value, {}, ast.flags.ignoreCase);
|
||||
return firstChars;
|
||||
}
|
||||
catch (e) {
|
||||
/* istanbul ignore next */
|
||||
// Testing this relies on the regexp-to-ast library having a bug... */
|
||||
// TODO: only the else branch needs to be ignored, try to fix with newer prettier / tsc
|
||||
if (e.message === complementErrorMessage) {
|
||||
if (ensureOptimizations) {
|
||||
PRINT_WARNING("" + failedOptimizationPrefixMsg +
|
||||
("\tUnable to optimize: < " + regExp.toString() + " >\n") +
|
||||
"\tComplement Sets cannot be automatically optimized.\n" +
|
||||
"\tThis will disable the lexer's first char optimizations.\n" +
|
||||
"\tSee: https://sap.github.io/chevrotain/docs/guide/resolving_lexer_errors.html#COMPLEMENT for details.");
|
||||
}
|
||||
}
|
||||
else {
|
||||
var msgSuffix = "";
|
||||
if (ensureOptimizations) {
|
||||
msgSuffix =
|
||||
"\n\tThis will disable the lexer's first char optimizations.\n" +
|
||||
"\tSee: https://sap.github.io/chevrotain/docs/guide/resolving_lexer_errors.html#REGEXP_PARSING for details.";
|
||||
}
|
||||
PRINT_ERROR(failedOptimizationPrefixMsg + "\n" +
|
||||
("\tFailed parsing: < " + regExp.toString() + " >\n") +
|
||||
("\tUsing the regexp-to-ast library version: " + VERSION + "\n") +
|
||||
"\tPlease open an issue at: https://github.com/bd82/regexp-to-ast/issues" +
|
||||
msgSuffix);
|
||||
}
|
||||
}
|
||||
return [];
|
||||
}
|
||||
export function firstCharOptimizedIndices(ast, result, ignoreCase) {
|
||||
switch (ast.type) {
|
||||
case "Disjunction":
|
||||
for (var i = 0; i < ast.value.length; i++) {
|
||||
firstCharOptimizedIndices(ast.value[i], result, ignoreCase);
|
||||
}
|
||||
break;
|
||||
case "Alternative":
|
||||
var terms = ast.value;
|
||||
for (var i = 0; i < terms.length; i++) {
|
||||
var term = terms[i];
|
||||
// skip terms that cannot effect the first char results
|
||||
switch (term.type) {
|
||||
case "EndAnchor":
|
||||
// A group back reference cannot affect potential starting char.
|
||||
// because if a back reference is the first production than automatically
|
||||
// the group being referenced has had to come BEFORE so its codes have already been added
|
||||
case "GroupBackReference":
|
||||
// assertions do not affect potential starting codes
|
||||
case "Lookahead":
|
||||
case "NegativeLookahead":
|
||||
case "StartAnchor":
|
||||
case "WordBoundary":
|
||||
case "NonWordBoundary":
|
||||
continue;
|
||||
}
|
||||
var atom = term;
|
||||
switch (atom.type) {
|
||||
case "Character":
|
||||
addOptimizedIdxToResult(atom.value, result, ignoreCase);
|
||||
break;
|
||||
case "Set":
|
||||
if (atom.complement === true) {
|
||||
throw Error(complementErrorMessage);
|
||||
}
|
||||
forEach(atom.value, function (code) {
|
||||
if (typeof code === "number") {
|
||||
addOptimizedIdxToResult(code, result, ignoreCase);
|
||||
}
|
||||
else {
|
||||
// range
|
||||
var range = code;
|
||||
// cannot optimize when ignoreCase is
|
||||
if (ignoreCase === true) {
|
||||
for (var rangeCode = range.from; rangeCode <= range.to; rangeCode++) {
|
||||
addOptimizedIdxToResult(rangeCode, result, ignoreCase);
|
||||
}
|
||||
}
|
||||
// Optimization (2 orders of magnitude less work for very large ranges)
|
||||
else {
|
||||
// handle unoptimized values
|
||||
for (var rangeCode = range.from; rangeCode <= range.to &&
|
||||
rangeCode < minOptimizationVal; rangeCode++) {
|
||||
addOptimizedIdxToResult(rangeCode, result, ignoreCase);
|
||||
}
|
||||
// Less common charCode where we optimize for faster init time, by using larger "buckets"
|
||||
if (range.to >= minOptimizationVal) {
|
||||
var minUnOptVal = range.from >= minOptimizationVal
|
||||
? range.from
|
||||
: minOptimizationVal;
|
||||
var maxUnOptVal = range.to;
|
||||
var minOptIdx = charCodeToOptimizedIndex(minUnOptVal);
|
||||
var maxOptIdx = charCodeToOptimizedIndex(maxUnOptVal);
|
||||
for (var currOptIdx = minOptIdx; currOptIdx <= maxOptIdx; currOptIdx++) {
|
||||
result[currOptIdx] = currOptIdx;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
break;
|
||||
case "Group":
|
||||
firstCharOptimizedIndices(atom.value, result, ignoreCase);
|
||||
break;
|
||||
/* istanbul ignore next */
|
||||
default:
|
||||
throw Error("Non Exhaustive Match");
|
||||
}
|
||||
// reached a mandatory production, no more **start** codes can be found on this alternative
|
||||
var isOptionalQuantifier = atom.quantifier !== undefined &&
|
||||
atom.quantifier.atLeast === 0;
|
||||
if (
|
||||
// A group may be optional due to empty contents /(?:)/
|
||||
// or if everything inside it is optional /((a)?)/
|
||||
(atom.type === "Group" &&
|
||||
isWholeOptional(atom) === false) ||
|
||||
// If this term is not a group it may only be optional if it has an optional quantifier
|
||||
(atom.type !== "Group" && isOptionalQuantifier === false)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
/* istanbul ignore next */
|
||||
default:
|
||||
throw Error("non exhaustive match!");
|
||||
}
|
||||
// console.log(Object.keys(result).length)
|
||||
return values(result);
|
||||
}
|
||||
function addOptimizedIdxToResult(code, result, ignoreCase) {
|
||||
var optimizedCharIdx = charCodeToOptimizedIndex(code);
|
||||
result[optimizedCharIdx] = optimizedCharIdx;
|
||||
if (ignoreCase === true) {
|
||||
handleIgnoreCase(code, result);
|
||||
}
|
||||
}
|
||||
function handleIgnoreCase(code, result) {
|
||||
var char = String.fromCharCode(code);
|
||||
var upperChar = char.toUpperCase();
|
||||
/* istanbul ignore else */
|
||||
if (upperChar !== char) {
|
||||
var optimizedCharIdx = charCodeToOptimizedIndex(upperChar.charCodeAt(0));
|
||||
result[optimizedCharIdx] = optimizedCharIdx;
|
||||
}
|
||||
else {
|
||||
var lowerChar = char.toLowerCase();
|
||||
if (lowerChar !== char) {
|
||||
var optimizedCharIdx = charCodeToOptimizedIndex(lowerChar.charCodeAt(0));
|
||||
result[optimizedCharIdx] = optimizedCharIdx;
|
||||
}
|
||||
}
|
||||
}
|
||||
function findCode(setNode, targetCharCodes) {
|
||||
return find(setNode.value, function (codeOrRange) {
|
||||
if (typeof codeOrRange === "number") {
|
||||
return contains(targetCharCodes, codeOrRange);
|
||||
}
|
||||
else {
|
||||
// range
|
||||
var range_1 = codeOrRange;
|
||||
return (find(targetCharCodes, function (targetCode) {
|
||||
return range_1.from <= targetCode && targetCode <= range_1.to;
|
||||
}) !== undefined);
|
||||
}
|
||||
});
|
||||
}
|
||||
function isWholeOptional(ast) {
|
||||
if (ast.quantifier && ast.quantifier.atLeast === 0) {
|
||||
return true;
|
||||
}
|
||||
if (!ast.value) {
|
||||
return false;
|
||||
}
|
||||
return isArray(ast.value)
|
||||
? every(ast.value, isWholeOptional)
|
||||
: isWholeOptional(ast.value);
|
||||
}
|
||||
var CharCodeFinder = /** @class */ (function (_super) {
|
||||
__extends(CharCodeFinder, _super);
|
||||
function CharCodeFinder(targetCharCodes) {
|
||||
var _this = _super.call(this) || this;
|
||||
_this.targetCharCodes = targetCharCodes;
|
||||
_this.found = false;
|
||||
return _this;
|
||||
}
|
||||
CharCodeFinder.prototype.visitChildren = function (node) {
|
||||
// No need to keep looking...
|
||||
if (this.found === true) {
|
||||
return;
|
||||
}
|
||||
// switch lookaheads as they do not actually consume any characters thus
|
||||
// finding a charCode at lookahead context does not mean that regexp can actually contain it in a match.
|
||||
switch (node.type) {
|
||||
case "Lookahead":
|
||||
this.visitLookahead(node);
|
||||
return;
|
||||
case "NegativeLookahead":
|
||||
this.visitNegativeLookahead(node);
|
||||
return;
|
||||
}
|
||||
_super.prototype.visitChildren.call(this, node);
|
||||
};
|
||||
CharCodeFinder.prototype.visitCharacter = function (node) {
|
||||
if (contains(this.targetCharCodes, node.value)) {
|
||||
this.found = true;
|
||||
}
|
||||
};
|
||||
CharCodeFinder.prototype.visitSet = function (node) {
|
||||
if (node.complement) {
|
||||
if (findCode(node, this.targetCharCodes) === undefined) {
|
||||
this.found = true;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (findCode(node, this.targetCharCodes) !== undefined) {
|
||||
this.found = true;
|
||||
}
|
||||
}
|
||||
};
|
||||
return CharCodeFinder;
|
||||
}(BaseRegExpVisitor));
|
||||
export function canMatchCharCode(charCodes, pattern) {
|
||||
if (pattern instanceof RegExp) {
|
||||
var ast = getRegExpAst(pattern);
|
||||
var charCodeFinder = new CharCodeFinder(charCodes);
|
||||
charCodeFinder.visit(ast);
|
||||
return charCodeFinder.found;
|
||||
}
|
||||
else {
|
||||
return (find(pattern, function (char) {
|
||||
return contains(charCodes, char.charCodeAt(0));
|
||||
}) !== undefined);
|
||||
}
|
||||
}
|
||||
//# sourceMappingURL=reg_exp.js.map
|
18
node_modules/chevrotain/lib_esm/src/scan/reg_exp_parser.js
generated
vendored
Normal file
18
node_modules/chevrotain/lib_esm/src/scan/reg_exp_parser.js
generated
vendored
Normal file
@@ -0,0 +1,18 @@
|
||||
import { RegExpParser } from "regexp-to-ast";
|
||||
var regExpAstCache = {};
|
||||
var regExpParser = new RegExpParser();
|
||||
export function getRegExpAst(regExp) {
|
||||
var regExpStr = regExp.toString();
|
||||
if (regExpAstCache.hasOwnProperty(regExpStr)) {
|
||||
return regExpAstCache[regExpStr];
|
||||
}
|
||||
else {
|
||||
var regExpAst = regExpParser.pattern(regExpStr);
|
||||
regExpAstCache[regExpStr] = regExpAst;
|
||||
return regExpAst;
|
||||
}
|
||||
}
|
||||
export function clearRegExpParserCache() {
|
||||
regExpAstCache = {};
|
||||
}
|
||||
//# sourceMappingURL=reg_exp_parser.js.map
|
114
node_modules/chevrotain/lib_esm/src/scan/tokens.js
generated
vendored
Normal file
114
node_modules/chevrotain/lib_esm/src/scan/tokens.js
generated
vendored
Normal file
@@ -0,0 +1,114 @@
|
||||
import { cloneArr, compact, contains, difference, flatten, forEach, has, isArray, isEmpty, map } from "../utils/utils";
|
||||
export function tokenStructuredMatcher(tokInstance, tokConstructor) {
|
||||
var instanceType = tokInstance.tokenTypeIdx;
|
||||
if (instanceType === tokConstructor.tokenTypeIdx) {
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
return (tokConstructor.isParent === true &&
|
||||
tokConstructor.categoryMatchesMap[instanceType] === true);
|
||||
}
|
||||
}
|
||||
// Optimized tokenMatcher in case our grammar does not use token categories
|
||||
// Being so tiny it is much more likely to be in-lined and this avoid the function call overhead
|
||||
export function tokenStructuredMatcherNoCategories(token, tokType) {
|
||||
return token.tokenTypeIdx === tokType.tokenTypeIdx;
|
||||
}
|
||||
export var tokenShortNameIdx = 1;
|
||||
export var tokenIdxToClass = {};
|
||||
export function augmentTokenTypes(tokenTypes) {
|
||||
// collect the parent Token Types as well.
|
||||
var tokenTypesAndParents = expandCategories(tokenTypes);
|
||||
// add required tokenType and categoryMatches properties
|
||||
assignTokenDefaultProps(tokenTypesAndParents);
|
||||
// fill up the categoryMatches
|
||||
assignCategoriesMapProp(tokenTypesAndParents);
|
||||
assignCategoriesTokensProp(tokenTypesAndParents);
|
||||
forEach(tokenTypesAndParents, function (tokType) {
|
||||
tokType.isParent = tokType.categoryMatches.length > 0;
|
||||
});
|
||||
}
|
||||
export function expandCategories(tokenTypes) {
|
||||
var result = cloneArr(tokenTypes);
|
||||
var categories = tokenTypes;
|
||||
var searching = true;
|
||||
while (searching) {
|
||||
categories = compact(flatten(map(categories, function (currTokType) { return currTokType.CATEGORIES; })));
|
||||
var newCategories = difference(categories, result);
|
||||
result = result.concat(newCategories);
|
||||
if (isEmpty(newCategories)) {
|
||||
searching = false;
|
||||
}
|
||||
else {
|
||||
categories = newCategories;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
export function assignTokenDefaultProps(tokenTypes) {
|
||||
forEach(tokenTypes, function (currTokType) {
|
||||
if (!hasShortKeyProperty(currTokType)) {
|
||||
tokenIdxToClass[tokenShortNameIdx] = currTokType;
|
||||
currTokType.tokenTypeIdx = tokenShortNameIdx++;
|
||||
}
|
||||
// CATEGORIES? : TokenType | TokenType[]
|
||||
if (hasCategoriesProperty(currTokType) &&
|
||||
!isArray(currTokType.CATEGORIES)
|
||||
// &&
|
||||
// !isUndefined(currTokType.CATEGORIES.PATTERN)
|
||||
) {
|
||||
currTokType.CATEGORIES = [currTokType.CATEGORIES];
|
||||
}
|
||||
if (!hasCategoriesProperty(currTokType)) {
|
||||
currTokType.CATEGORIES = [];
|
||||
}
|
||||
if (!hasExtendingTokensTypesProperty(currTokType)) {
|
||||
currTokType.categoryMatches = [];
|
||||
}
|
||||
if (!hasExtendingTokensTypesMapProperty(currTokType)) {
|
||||
currTokType.categoryMatchesMap = {};
|
||||
}
|
||||
});
|
||||
}
|
||||
export function assignCategoriesTokensProp(tokenTypes) {
|
||||
forEach(tokenTypes, function (currTokType) {
|
||||
// avoid duplications
|
||||
currTokType.categoryMatches = [];
|
||||
forEach(currTokType.categoryMatchesMap, function (val, key) {
|
||||
currTokType.categoryMatches.push(tokenIdxToClass[key].tokenTypeIdx);
|
||||
});
|
||||
});
|
||||
}
|
||||
export function assignCategoriesMapProp(tokenTypes) {
|
||||
forEach(tokenTypes, function (currTokType) {
|
||||
singleAssignCategoriesToksMap([], currTokType);
|
||||
});
|
||||
}
|
||||
export function singleAssignCategoriesToksMap(path, nextNode) {
|
||||
forEach(path, function (pathNode) {
|
||||
nextNode.categoryMatchesMap[pathNode.tokenTypeIdx] = true;
|
||||
});
|
||||
forEach(nextNode.CATEGORIES, function (nextCategory) {
|
||||
var newPath = path.concat(nextNode);
|
||||
// avoids infinite loops due to cyclic categories.
|
||||
if (!contains(newPath, nextCategory)) {
|
||||
singleAssignCategoriesToksMap(newPath, nextCategory);
|
||||
}
|
||||
});
|
||||
}
|
||||
export function hasShortKeyProperty(tokType) {
|
||||
return has(tokType, "tokenTypeIdx");
|
||||
}
|
||||
export function hasCategoriesProperty(tokType) {
|
||||
return has(tokType, "CATEGORIES");
|
||||
}
|
||||
export function hasExtendingTokensTypesProperty(tokType) {
|
||||
return has(tokType, "categoryMatches");
|
||||
}
|
||||
export function hasExtendingTokensTypesMapProperty(tokType) {
|
||||
return has(tokType, "categoryMatchesMap");
|
||||
}
|
||||
export function isTokenType(tokType) {
|
||||
return has(tokType, "tokenTypeIdx");
|
||||
}
|
||||
//# sourceMappingURL=tokens.js.map
|
2
node_modules/chevrotain/lib_esm/src/scan/tokens_constants.js
generated
vendored
Normal file
2
node_modules/chevrotain/lib_esm/src/scan/tokens_constants.js
generated
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
export var EOF_TOKEN_TYPE = 1;
|
||||
//# sourceMappingURL=tokens_constants.js.map
|
87
node_modules/chevrotain/lib_esm/src/scan/tokens_public.js
generated
vendored
Normal file
87
node_modules/chevrotain/lib_esm/src/scan/tokens_public.js
generated
vendored
Normal file
@@ -0,0 +1,87 @@
|
||||
import { has, isString, isUndefined } from "../utils/utils";
|
||||
import { Lexer } from "./lexer_public";
|
||||
import { augmentTokenTypes, tokenStructuredMatcher } from "./tokens";
|
||||
export function tokenLabel(tokType) {
|
||||
if (hasTokenLabel(tokType)) {
|
||||
return tokType.LABEL;
|
||||
}
|
||||
else {
|
||||
return tokType.name;
|
||||
}
|
||||
}
|
||||
export function tokenName(tokType) {
|
||||
return tokType.name;
|
||||
}
|
||||
export function hasTokenLabel(obj) {
|
||||
return isString(obj.LABEL) && obj.LABEL !== "";
|
||||
}
|
||||
var PARENT = "parent";
|
||||
var CATEGORIES = "categories";
|
||||
var LABEL = "label";
|
||||
var GROUP = "group";
|
||||
var PUSH_MODE = "push_mode";
|
||||
var POP_MODE = "pop_mode";
|
||||
var LONGER_ALT = "longer_alt";
|
||||
var LINE_BREAKS = "line_breaks";
|
||||
var START_CHARS_HINT = "start_chars_hint";
|
||||
export function createToken(config) {
|
||||
return createTokenInternal(config);
|
||||
}
|
||||
function createTokenInternal(config) {
|
||||
var pattern = config.pattern;
|
||||
var tokenType = {};
|
||||
tokenType.name = config.name;
|
||||
if (!isUndefined(pattern)) {
|
||||
tokenType.PATTERN = pattern;
|
||||
}
|
||||
if (has(config, PARENT)) {
|
||||
throw "The parent property is no longer supported.\n" +
|
||||
"See: https://github.com/SAP/chevrotain/issues/564#issuecomment-349062346 for details.";
|
||||
}
|
||||
if (has(config, CATEGORIES)) {
|
||||
// casting to ANY as this will be fixed inside `augmentTokenTypes``
|
||||
tokenType.CATEGORIES = config[CATEGORIES];
|
||||
}
|
||||
augmentTokenTypes([tokenType]);
|
||||
if (has(config, LABEL)) {
|
||||
tokenType.LABEL = config[LABEL];
|
||||
}
|
||||
if (has(config, GROUP)) {
|
||||
tokenType.GROUP = config[GROUP];
|
||||
}
|
||||
if (has(config, POP_MODE)) {
|
||||
tokenType.POP_MODE = config[POP_MODE];
|
||||
}
|
||||
if (has(config, PUSH_MODE)) {
|
||||
tokenType.PUSH_MODE = config[PUSH_MODE];
|
||||
}
|
||||
if (has(config, LONGER_ALT)) {
|
||||
tokenType.LONGER_ALT = config[LONGER_ALT];
|
||||
}
|
||||
if (has(config, LINE_BREAKS)) {
|
||||
tokenType.LINE_BREAKS = config[LINE_BREAKS];
|
||||
}
|
||||
if (has(config, START_CHARS_HINT)) {
|
||||
tokenType.START_CHARS_HINT = config[START_CHARS_HINT];
|
||||
}
|
||||
return tokenType;
|
||||
}
|
||||
export var EOF = createToken({ name: "EOF", pattern: Lexer.NA });
|
||||
augmentTokenTypes([EOF]);
|
||||
export function createTokenInstance(tokType, image, startOffset, endOffset, startLine, endLine, startColumn, endColumn) {
|
||||
return {
|
||||
image: image,
|
||||
startOffset: startOffset,
|
||||
endOffset: endOffset,
|
||||
startLine: startLine,
|
||||
endLine: endLine,
|
||||
startColumn: startColumn,
|
||||
endColumn: endColumn,
|
||||
tokenTypeIdx: tokType.tokenTypeIdx,
|
||||
tokenType: tokType
|
||||
};
|
||||
}
|
||||
export function tokenMatcher(token, tokType) {
|
||||
return tokenStructuredMatcher(token, tokType);
|
||||
}
|
||||
//# sourceMappingURL=tokens_public.js.map
|
Reference in New Issue
Block a user