NEW : REBASE THE ENTIRE WORKING PROJECT

This commit is contained in:
2022-02-09 19:19:09 +01:00
parent ddad60988e
commit ae93015aa8
1338 changed files with 286867 additions and 0 deletions

885
node_modules/chevrotain/lib_esm/src/scan/lexer.js generated vendored Normal file
View File

@@ -0,0 +1,885 @@
var __extends = (this && this.__extends) || (function () {
var extendStatics = function (d, b) {
extendStatics = Object.setPrototypeOf ||
({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) ||
function (d, b) { for (var p in b) if (b.hasOwnProperty(p)) d[p] = b[p]; };
return extendStatics(d, b);
};
return function (d, b) {
extendStatics(d, b);
function __() { this.constructor = d; }
d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __());
};
})();
import { BaseRegExpVisitor } from "regexp-to-ast";
import { Lexer, LexerDefinitionErrorType } from "./lexer_public";
import { compact, contains, defaults, difference, filter, find, first, flatten, forEach, has, indexOf, isArray, isEmpty, isFunction, isRegExp, isString, isUndefined, keys, map, mapValues, packArray, PRINT_ERROR, reduce, reject } from "../utils/utils";
import { canMatchCharCode, failedOptimizationPrefixMsg, getOptimizedStartCodesIndices } from "./reg_exp";
import { getRegExpAst } from "./reg_exp_parser";
var PATTERN = "PATTERN";
export var DEFAULT_MODE = "defaultMode";
export var MODES = "modes";
export var SUPPORT_STICKY = typeof new RegExp("(?:)").sticky === "boolean";
export function disableSticky() {
SUPPORT_STICKY = false;
}
export function enableSticky() {
SUPPORT_STICKY = true;
}
export function analyzeTokenTypes(tokenTypes, options) {
options = defaults(options, {
useSticky: SUPPORT_STICKY,
debug: false,
safeMode: false,
positionTracking: "full",
lineTerminatorCharacters: ["\r", "\n"],
tracer: function (msg, action) { return action(); }
});
var tracer = options.tracer;
tracer("initCharCodeToOptimizedIndexMap", function () {
initCharCodeToOptimizedIndexMap();
});
var onlyRelevantTypes;
tracer("Reject Lexer.NA", function () {
onlyRelevantTypes = reject(tokenTypes, function (currType) {
return currType[PATTERN] === Lexer.NA;
});
});
var hasCustom = false;
var allTransformedPatterns;
tracer("Transform Patterns", function () {
hasCustom = false;
allTransformedPatterns = map(onlyRelevantTypes, function (currType) {
var currPattern = currType[PATTERN];
/* istanbul ignore else */
if (isRegExp(currPattern)) {
var regExpSource = currPattern.source;
if (regExpSource.length === 1 &&
// only these regExp meta characters which can appear in a length one regExp
regExpSource !== "^" &&
regExpSource !== "$" &&
regExpSource !== ".") {
return regExpSource;
}
else if (regExpSource.length === 2 &&
regExpSource[0] === "\\" &&
// not a meta character
!contains([
"d",
"D",
"s",
"S",
"t",
"r",
"n",
"t",
"0",
"c",
"b",
"B",
"f",
"v",
"w",
"W"
], regExpSource[1])) {
// escaped meta Characters: /\+/ /\[/
// or redundant escaping: /\a/
// without the escaping "\"
return regExpSource[1];
}
else {
return options.useSticky
? addStickyFlag(currPattern)
: addStartOfInput(currPattern);
}
}
else if (isFunction(currPattern)) {
hasCustom = true;
// CustomPatternMatcherFunc - custom patterns do not require any transformations, only wrapping in a RegExp Like object
return { exec: currPattern };
}
else if (has(currPattern, "exec")) {
hasCustom = true;
// ICustomPattern
return currPattern;
}
else if (typeof currPattern === "string") {
if (currPattern.length === 1) {
return currPattern;
}
else {
var escapedRegExpString = currPattern.replace(/[\\^$.*+?()[\]{}|]/g, "\\$&");
var wrappedRegExp = new RegExp(escapedRegExpString);
return options.useSticky
? addStickyFlag(wrappedRegExp)
: addStartOfInput(wrappedRegExp);
}
}
else {
throw Error("non exhaustive match");
}
});
});
var patternIdxToType;
var patternIdxToGroup;
var patternIdxToLongerAltIdx;
var patternIdxToPushMode;
var patternIdxToPopMode;
tracer("misc mapping", function () {
patternIdxToType = map(onlyRelevantTypes, function (currType) { return currType.tokenTypeIdx; });
patternIdxToGroup = map(onlyRelevantTypes, function (clazz) {
var groupName = clazz.GROUP;
/* istanbul ignore next */
if (groupName === Lexer.SKIPPED) {
return undefined;
}
else if (isString(groupName)) {
return groupName;
}
else if (isUndefined(groupName)) {
return false;
}
else {
throw Error("non exhaustive match");
}
});
patternIdxToLongerAltIdx = map(onlyRelevantTypes, function (clazz) {
var longerAltType = clazz.LONGER_ALT;
if (longerAltType) {
var longerAltIdx = indexOf(onlyRelevantTypes, longerAltType);
return longerAltIdx;
}
});
patternIdxToPushMode = map(onlyRelevantTypes, function (clazz) { return clazz.PUSH_MODE; });
patternIdxToPopMode = map(onlyRelevantTypes, function (clazz) {
return has(clazz, "POP_MODE");
});
});
var patternIdxToCanLineTerminator;
tracer("Line Terminator Handling", function () {
var lineTerminatorCharCodes = getCharCodes(options.lineTerminatorCharacters);
patternIdxToCanLineTerminator = map(onlyRelevantTypes, function (tokType) { return false; });
if (options.positionTracking !== "onlyOffset") {
patternIdxToCanLineTerminator = map(onlyRelevantTypes, function (tokType) {
if (has(tokType, "LINE_BREAKS")) {
return tokType.LINE_BREAKS;
}
else {
if (checkLineBreaksIssues(tokType, lineTerminatorCharCodes) === false) {
return canMatchCharCode(lineTerminatorCharCodes, tokType.PATTERN);
}
}
});
}
});
var patternIdxToIsCustom;
var patternIdxToShort;
var emptyGroups;
var patternIdxToConfig;
tracer("Misc Mapping #2", function () {
patternIdxToIsCustom = map(onlyRelevantTypes, isCustomPattern);
patternIdxToShort = map(allTransformedPatterns, isShortPattern);
emptyGroups = reduce(onlyRelevantTypes, function (acc, clazz) {
var groupName = clazz.GROUP;
if (isString(groupName) && !(groupName === Lexer.SKIPPED)) {
acc[groupName] = [];
}
return acc;
}, {});
patternIdxToConfig = map(allTransformedPatterns, function (x, idx) {
return {
pattern: allTransformedPatterns[idx],
longerAlt: patternIdxToLongerAltIdx[idx],
canLineTerminator: patternIdxToCanLineTerminator[idx],
isCustom: patternIdxToIsCustom[idx],
short: patternIdxToShort[idx],
group: patternIdxToGroup[idx],
push: patternIdxToPushMode[idx],
pop: patternIdxToPopMode[idx],
tokenTypeIdx: patternIdxToType[idx],
tokenType: onlyRelevantTypes[idx]
};
});
});
var canBeOptimized = true;
var charCodeToPatternIdxToConfig = [];
if (!options.safeMode) {
tracer("First Char Optimization", function () {
charCodeToPatternIdxToConfig = reduce(onlyRelevantTypes, function (result, currTokType, idx) {
if (typeof currTokType.PATTERN === "string") {
var charCode = currTokType.PATTERN.charCodeAt(0);
var optimizedIdx = charCodeToOptimizedIndex(charCode);
addToMapOfArrays(result, optimizedIdx, patternIdxToConfig[idx]);
}
else if (isArray(currTokType.START_CHARS_HINT)) {
var lastOptimizedIdx_1;
forEach(currTokType.START_CHARS_HINT, function (charOrInt) {
var charCode = typeof charOrInt === "string"
? charOrInt.charCodeAt(0)
: charOrInt;
var currOptimizedIdx = charCodeToOptimizedIndex(charCode);
// Avoid adding the config multiple times
if (lastOptimizedIdx_1 !== currOptimizedIdx) {
lastOptimizedIdx_1 = currOptimizedIdx;
addToMapOfArrays(result, currOptimizedIdx, patternIdxToConfig[idx]);
}
});
}
else if (isRegExp(currTokType.PATTERN)) {
if (currTokType.PATTERN.unicode) {
canBeOptimized = false;
if (options.ensureOptimizations) {
PRINT_ERROR("" + failedOptimizationPrefixMsg +
("\tUnable to analyze < " + currTokType.PATTERN.toString() + " > pattern.\n") +
"\tThe regexp unicode flag is not currently supported by the regexp-to-ast library.\n" +
"\tThis will disable the lexer's first char optimizations.\n" +
"\tFor details See: https://sap.github.io/chevrotain/docs/guide/resolving_lexer_errors.html#UNICODE_OPTIMIZE");
}
}
else {
var optimizedCodes = getOptimizedStartCodesIndices(currTokType.PATTERN, options.ensureOptimizations);
/* istanbul ignore if */
// start code will only be empty given an empty regExp or failure of regexp-to-ast library
// the first should be a different validation and the second cannot be tested.
if (isEmpty(optimizedCodes)) {
// we cannot understand what codes may start possible matches
// The optimization correctness requires knowing start codes for ALL patterns.
// Not actually sure this is an error, no debug message
canBeOptimized = false;
}
forEach(optimizedCodes, function (code) {
addToMapOfArrays(result, code, patternIdxToConfig[idx]);
});
}
}
else {
if (options.ensureOptimizations) {
PRINT_ERROR("" + failedOptimizationPrefixMsg +
("\tTokenType: <" + currTokType.name + "> is using a custom token pattern without providing <start_chars_hint> parameter.\n") +
"\tThis will disable the lexer's first char optimizations.\n" +
"\tFor details See: https://sap.github.io/chevrotain/docs/guide/resolving_lexer_errors.html#CUSTOM_OPTIMIZE");
}
canBeOptimized = false;
}
return result;
}, []);
});
}
tracer("ArrayPacking", function () {
charCodeToPatternIdxToConfig = packArray(charCodeToPatternIdxToConfig);
});
return {
emptyGroups: emptyGroups,
patternIdxToConfig: patternIdxToConfig,
charCodeToPatternIdxToConfig: charCodeToPatternIdxToConfig,
hasCustom: hasCustom,
canBeOptimized: canBeOptimized
};
}
export function validatePatterns(tokenTypes, validModesNames) {
var errors = [];
var missingResult = findMissingPatterns(tokenTypes);
errors = errors.concat(missingResult.errors);
var invalidResult = findInvalidPatterns(missingResult.valid);
var validTokenTypes = invalidResult.valid;
errors = errors.concat(invalidResult.errors);
errors = errors.concat(validateRegExpPattern(validTokenTypes));
errors = errors.concat(findInvalidGroupType(validTokenTypes));
errors = errors.concat(findModesThatDoNotExist(validTokenTypes, validModesNames));
errors = errors.concat(findUnreachablePatterns(validTokenTypes));
return errors;
}
function validateRegExpPattern(tokenTypes) {
var errors = [];
var withRegExpPatterns = filter(tokenTypes, function (currTokType) {
return isRegExp(currTokType[PATTERN]);
});
errors = errors.concat(findEndOfInputAnchor(withRegExpPatterns));
errors = errors.concat(findStartOfInputAnchor(withRegExpPatterns));
errors = errors.concat(findUnsupportedFlags(withRegExpPatterns));
errors = errors.concat(findDuplicatePatterns(withRegExpPatterns));
errors = errors.concat(findEmptyMatchRegExps(withRegExpPatterns));
return errors;
}
export function findMissingPatterns(tokenTypes) {
var tokenTypesWithMissingPattern = filter(tokenTypes, function (currType) {
return !has(currType, PATTERN);
});
var errors = map(tokenTypesWithMissingPattern, function (currType) {
return {
message: "Token Type: ->" +
currType.name +
"<- missing static 'PATTERN' property",
type: LexerDefinitionErrorType.MISSING_PATTERN,
tokenTypes: [currType]
};
});
var valid = difference(tokenTypes, tokenTypesWithMissingPattern);
return { errors: errors, valid: valid };
}
export function findInvalidPatterns(tokenTypes) {
var tokenTypesWithInvalidPattern = filter(tokenTypes, function (currType) {
var pattern = currType[PATTERN];
return (!isRegExp(pattern) &&
!isFunction(pattern) &&
!has(pattern, "exec") &&
!isString(pattern));
});
var errors = map(tokenTypesWithInvalidPattern, function (currType) {
return {
message: "Token Type: ->" +
currType.name +
"<- static 'PATTERN' can only be a RegExp, a" +
" Function matching the {CustomPatternMatcherFunc} type or an Object matching the {ICustomPattern} interface.",
type: LexerDefinitionErrorType.INVALID_PATTERN,
tokenTypes: [currType]
};
});
var valid = difference(tokenTypes, tokenTypesWithInvalidPattern);
return { errors: errors, valid: valid };
}
var end_of_input = /[^\\][\$]/;
export function findEndOfInputAnchor(tokenTypes) {
var EndAnchorFinder = /** @class */ (function (_super) {
__extends(EndAnchorFinder, _super);
function EndAnchorFinder() {
var _this = _super !== null && _super.apply(this, arguments) || this;
_this.found = false;
return _this;
}
EndAnchorFinder.prototype.visitEndAnchor = function (node) {
this.found = true;
};
return EndAnchorFinder;
}(BaseRegExpVisitor));
var invalidRegex = filter(tokenTypes, function (currType) {
var pattern = currType[PATTERN];
try {
var regexpAst = getRegExpAst(pattern);
var endAnchorVisitor = new EndAnchorFinder();
endAnchorVisitor.visit(regexpAst);
return endAnchorVisitor.found;
}
catch (e) {
// old behavior in case of runtime exceptions with regexp-to-ast.
/* istanbul ignore next - cannot ensure an error in regexp-to-ast*/
return end_of_input.test(pattern.source);
}
});
var errors = map(invalidRegex, function (currType) {
return {
message: "Unexpected RegExp Anchor Error:\n" +
"\tToken Type: ->" +
currType.name +
"<- static 'PATTERN' cannot contain end of input anchor '$'\n" +
"\tSee sap.github.io/chevrotain/docs/guide/resolving_lexer_errors.html#ANCHORS" +
"\tfor details.",
type: LexerDefinitionErrorType.EOI_ANCHOR_FOUND,
tokenTypes: [currType]
};
});
return errors;
}
export function findEmptyMatchRegExps(tokenTypes) {
var matchesEmptyString = filter(tokenTypes, function (currType) {
var pattern = currType[PATTERN];
return pattern.test("");
});
var errors = map(matchesEmptyString, function (currType) {
return {
message: "Token Type: ->" +
currType.name +
"<- static 'PATTERN' must not match an empty string",
type: LexerDefinitionErrorType.EMPTY_MATCH_PATTERN,
tokenTypes: [currType]
};
});
return errors;
}
var start_of_input = /[^\\[][\^]|^\^/;
export function findStartOfInputAnchor(tokenTypes) {
var StartAnchorFinder = /** @class */ (function (_super) {
__extends(StartAnchorFinder, _super);
function StartAnchorFinder() {
var _this = _super !== null && _super.apply(this, arguments) || this;
_this.found = false;
return _this;
}
StartAnchorFinder.prototype.visitStartAnchor = function (node) {
this.found = true;
};
return StartAnchorFinder;
}(BaseRegExpVisitor));
var invalidRegex = filter(tokenTypes, function (currType) {
var pattern = currType[PATTERN];
try {
var regexpAst = getRegExpAst(pattern);
var startAnchorVisitor = new StartAnchorFinder();
startAnchorVisitor.visit(regexpAst);
return startAnchorVisitor.found;
}
catch (e) {
// old behavior in case of runtime exceptions with regexp-to-ast.
/* istanbul ignore next - cannot ensure an error in regexp-to-ast*/
return start_of_input.test(pattern.source);
}
});
var errors = map(invalidRegex, function (currType) {
return {
message: "Unexpected RegExp Anchor Error:\n" +
"\tToken Type: ->" +
currType.name +
"<- static 'PATTERN' cannot contain start of input anchor '^'\n" +
"\tSee https://sap.github.io/chevrotain/docs/guide/resolving_lexer_errors.html#ANCHORS" +
"\tfor details.",
type: LexerDefinitionErrorType.SOI_ANCHOR_FOUND,
tokenTypes: [currType]
};
});
return errors;
}
export function findUnsupportedFlags(tokenTypes) {
var invalidFlags = filter(tokenTypes, function (currType) {
var pattern = currType[PATTERN];
return (pattern instanceof RegExp && (pattern.multiline || pattern.global));
});
var errors = map(invalidFlags, function (currType) {
return {
message: "Token Type: ->" +
currType.name +
"<- static 'PATTERN' may NOT contain global('g') or multiline('m')",
type: LexerDefinitionErrorType.UNSUPPORTED_FLAGS_FOUND,
tokenTypes: [currType]
};
});
return errors;
}
// This can only test for identical duplicate RegExps, not semantically equivalent ones.
export function findDuplicatePatterns(tokenTypes) {
var found = [];
var identicalPatterns = map(tokenTypes, function (outerType) {
return reduce(tokenTypes, function (result, innerType) {
if (outerType.PATTERN.source === innerType.PATTERN.source &&
!contains(found, innerType) &&
innerType.PATTERN !== Lexer.NA) {
// this avoids duplicates in the result, each Token Type may only appear in one "set"
// in essence we are creating Equivalence classes on equality relation.
found.push(innerType);
result.push(innerType);
return result;
}
return result;
}, []);
});
identicalPatterns = compact(identicalPatterns);
var duplicatePatterns = filter(identicalPatterns, function (currIdenticalSet) {
return currIdenticalSet.length > 1;
});
var errors = map(duplicatePatterns, function (setOfIdentical) {
var tokenTypeNames = map(setOfIdentical, function (currType) {
return currType.name;
});
var dupPatternSrc = first(setOfIdentical).PATTERN;
return {
message: "The same RegExp pattern ->" + dupPatternSrc + "<-" +
("has been used in all of the following Token Types: " + tokenTypeNames.join(", ") + " <-"),
type: LexerDefinitionErrorType.DUPLICATE_PATTERNS_FOUND,
tokenTypes: setOfIdentical
};
});
return errors;
}
export function findInvalidGroupType(tokenTypes) {
var invalidTypes = filter(tokenTypes, function (clazz) {
if (!has(clazz, "GROUP")) {
return false;
}
var group = clazz.GROUP;
return group !== Lexer.SKIPPED && group !== Lexer.NA && !isString(group);
});
var errors = map(invalidTypes, function (currType) {
return {
message: "Token Type: ->" +
currType.name +
"<- static 'GROUP' can only be Lexer.SKIPPED/Lexer.NA/A String",
type: LexerDefinitionErrorType.INVALID_GROUP_TYPE_FOUND,
tokenTypes: [currType]
};
});
return errors;
}
export function findModesThatDoNotExist(tokenTypes, validModes) {
var invalidModes = filter(tokenTypes, function (clazz) {
return (clazz.PUSH_MODE !== undefined &&
!contains(validModes, clazz.PUSH_MODE));
});
var errors = map(invalidModes, function (tokType) {
var msg = "Token Type: ->" + tokType.name + "<- static 'PUSH_MODE' value cannot refer to a Lexer Mode ->" + tokType.PUSH_MODE + "<-" +
"which does not exist";
return {
message: msg,
type: LexerDefinitionErrorType.PUSH_MODE_DOES_NOT_EXIST,
tokenTypes: [tokType]
};
});
return errors;
}
export function findUnreachablePatterns(tokenTypes) {
var errors = [];
var canBeTested = reduce(tokenTypes, function (result, tokType, idx) {
var pattern = tokType.PATTERN;
if (pattern === Lexer.NA) {
return result;
}
// a more comprehensive validation for all forms of regExps would require
// deeper regExp analysis capabilities
if (isString(pattern)) {
result.push({ str: pattern, idx: idx, tokenType: tokType });
}
else if (isRegExp(pattern) && noMetaChar(pattern)) {
result.push({ str: pattern.source, idx: idx, tokenType: tokType });
}
return result;
}, []);
forEach(tokenTypes, function (tokType, testIdx) {
forEach(canBeTested, function (_a) {
var str = _a.str, idx = _a.idx, tokenType = _a.tokenType;
if (testIdx < idx && testTokenType(str, tokType.PATTERN)) {
var msg = "Token: ->" + tokenType.name + "<- can never be matched.\n" +
("Because it appears AFTER the Token Type ->" + tokType.name + "<-") +
"in the lexer's definition.\n" +
"See https://sap.github.io/chevrotain/docs/guide/resolving_lexer_errors.html#UNREACHABLE";
errors.push({
message: msg,
type: LexerDefinitionErrorType.UNREACHABLE_PATTERN,
tokenTypes: [tokType, tokenType]
});
}
});
});
return errors;
}
function testTokenType(str, pattern) {
/* istanbul ignore else */
if (isRegExp(pattern)) {
var regExpArray = pattern.exec(str);
return regExpArray !== null && regExpArray.index === 0;
}
else if (isFunction(pattern)) {
// maintain the API of custom patterns
return pattern(str, 0, [], {});
}
else if (has(pattern, "exec")) {
// maintain the API of custom patterns
return pattern.exec(str, 0, [], {});
}
else if (typeof pattern === "string") {
return pattern === str;
}
else {
throw Error("non exhaustive match");
}
}
function noMetaChar(regExp) {
//https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp
var metaChars = [
".",
"\\",
"[",
"]",
"|",
"^",
"$",
"(",
")",
"?",
"*",
"+",
"{"
];
return (find(metaChars, function (char) { return regExp.source.indexOf(char) !== -1; }) ===
undefined);
}
export function addStartOfInput(pattern) {
var flags = pattern.ignoreCase ? "i" : "";
// always wrapping in a none capturing group preceded by '^' to make sure matching can only work on start of input.
// duplicate/redundant start of input markers have no meaning (/^^^^A/ === /^A/)
return new RegExp("^(?:" + pattern.source + ")", flags);
}
export function addStickyFlag(pattern) {
var flags = pattern.ignoreCase ? "iy" : "y";
// always wrapping in a none capturing group preceded by '^' to make sure matching can only work on start of input.
// duplicate/redundant start of input markers have no meaning (/^^^^A/ === /^A/)
return new RegExp("" + pattern.source, flags);
}
export function performRuntimeChecks(lexerDefinition, trackLines, lineTerminatorCharacters) {
var errors = [];
// some run time checks to help the end users.
if (!has(lexerDefinition, DEFAULT_MODE)) {
errors.push({
message: "A MultiMode Lexer cannot be initialized without a <" +
DEFAULT_MODE +
"> property in its definition\n",
type: LexerDefinitionErrorType.MULTI_MODE_LEXER_WITHOUT_DEFAULT_MODE
});
}
if (!has(lexerDefinition, MODES)) {
errors.push({
message: "A MultiMode Lexer cannot be initialized without a <" +
MODES +
"> property in its definition\n",
type: LexerDefinitionErrorType.MULTI_MODE_LEXER_WITHOUT_MODES_PROPERTY
});
}
if (has(lexerDefinition, MODES) &&
has(lexerDefinition, DEFAULT_MODE) &&
!has(lexerDefinition.modes, lexerDefinition.defaultMode)) {
errors.push({
message: "A MultiMode Lexer cannot be initialized with a " + DEFAULT_MODE + ": <" + lexerDefinition.defaultMode + ">" +
"which does not exist\n",
type: LexerDefinitionErrorType.MULTI_MODE_LEXER_DEFAULT_MODE_VALUE_DOES_NOT_EXIST
});
}
if (has(lexerDefinition, MODES)) {
forEach(lexerDefinition.modes, function (currModeValue, currModeName) {
forEach(currModeValue, function (currTokType, currIdx) {
if (isUndefined(currTokType)) {
errors.push({
message: "A Lexer cannot be initialized using an undefined Token Type. Mode:" +
("<" + currModeName + "> at index: <" + currIdx + ">\n"),
type: LexerDefinitionErrorType.LEXER_DEFINITION_CANNOT_CONTAIN_UNDEFINED
});
}
});
});
}
return errors;
}
export function performWarningRuntimeChecks(lexerDefinition, trackLines, lineTerminatorCharacters) {
var warnings = [];
var hasAnyLineBreak = false;
var allTokenTypes = compact(flatten(mapValues(lexerDefinition.modes, function (tokTypes) { return tokTypes; })));
var concreteTokenTypes = reject(allTokenTypes, function (currType) { return currType[PATTERN] === Lexer.NA; });
var terminatorCharCodes = getCharCodes(lineTerminatorCharacters);
if (trackLines) {
forEach(concreteTokenTypes, function (tokType) {
var currIssue = checkLineBreaksIssues(tokType, terminatorCharCodes);
if (currIssue !== false) {
var message = buildLineBreakIssueMessage(tokType, currIssue);
var warningDescriptor = {
message: message,
type: currIssue.issue,
tokenType: tokType
};
warnings.push(warningDescriptor);
}
else {
// we don't want to attempt to scan if the user explicitly specified the line_breaks option.
if (has(tokType, "LINE_BREAKS")) {
if (tokType.LINE_BREAKS === true) {
hasAnyLineBreak = true;
}
}
else {
if (canMatchCharCode(terminatorCharCodes, tokType.PATTERN)) {
hasAnyLineBreak = true;
}
}
}
});
}
if (trackLines && !hasAnyLineBreak) {
warnings.push({
message: "Warning: No LINE_BREAKS Found.\n" +
"\tThis Lexer has been defined to track line and column information,\n" +
"\tBut none of the Token Types can be identified as matching a line terminator.\n" +
"\tSee https://sap.github.io/chevrotain/docs/guide/resolving_lexer_errors.html#LINE_BREAKS \n" +
"\tfor details.",
type: LexerDefinitionErrorType.NO_LINE_BREAKS_FLAGS
});
}
return warnings;
}
export function cloneEmptyGroups(emptyGroups) {
var clonedResult = {};
var groupKeys = keys(emptyGroups);
forEach(groupKeys, function (currKey) {
var currGroupValue = emptyGroups[currKey];
/* istanbul ignore else */
if (isArray(currGroupValue)) {
clonedResult[currKey] = [];
}
else {
throw Error("non exhaustive match");
}
});
return clonedResult;
}
// TODO: refactor to avoid duplication
export function isCustomPattern(tokenType) {
var pattern = tokenType.PATTERN;
/* istanbul ignore else */
if (isRegExp(pattern)) {
return false;
}
else if (isFunction(pattern)) {
// CustomPatternMatcherFunc - custom patterns do not require any transformations, only wrapping in a RegExp Like object
return true;
}
else if (has(pattern, "exec")) {
// ICustomPattern
return true;
}
else if (isString(pattern)) {
return false;
}
else {
throw Error("non exhaustive match");
}
}
export function isShortPattern(pattern) {
if (isString(pattern) && pattern.length === 1) {
return pattern.charCodeAt(0);
}
else {
return false;
}
}
/**
* Faster than using a RegExp for default newline detection during lexing.
*/
export var LineTerminatorOptimizedTester = {
// implements /\n|\r\n?/g.test
test: function (text) {
var len = text.length;
for (var i = this.lastIndex; i < len; i++) {
var c = text.charCodeAt(i);
if (c === 10) {
this.lastIndex = i + 1;
return true;
}
else if (c === 13) {
if (text.charCodeAt(i + 1) === 10) {
this.lastIndex = i + 2;
}
else {
this.lastIndex = i + 1;
}
return true;
}
}
return false;
},
lastIndex: 0
};
function checkLineBreaksIssues(tokType, lineTerminatorCharCodes) {
if (has(tokType, "LINE_BREAKS")) {
// if the user explicitly declared the line_breaks option we will respect their choice
// and assume it is correct.
return false;
}
else {
/* istanbul ignore else */
if (isRegExp(tokType.PATTERN)) {
try {
canMatchCharCode(lineTerminatorCharCodes, tokType.PATTERN);
}
catch (e) {
/* istanbul ignore next - to test this we would have to mock <canMatchCharCode> to throw an error */
return {
issue: LexerDefinitionErrorType.IDENTIFY_TERMINATOR,
errMsg: e.message
};
}
return false;
}
else if (isString(tokType.PATTERN)) {
// string literal patterns can always be analyzed to detect line terminator usage
return false;
}
else if (isCustomPattern(tokType)) {
// custom token types
return { issue: LexerDefinitionErrorType.CUSTOM_LINE_BREAK };
}
else {
throw Error("non exhaustive match");
}
}
}
export function buildLineBreakIssueMessage(tokType, details) {
/* istanbul ignore else */
if (details.issue === LexerDefinitionErrorType.IDENTIFY_TERMINATOR) {
return ("Warning: unable to identify line terminator usage in pattern.\n" +
("\tThe problem is in the <" + tokType.name + "> Token Type\n") +
("\t Root cause: " + details.errMsg + ".\n") +
"\tFor details See: https://sap.github.io/chevrotain/docs/guide/resolving_lexer_errors.html#IDENTIFY_TERMINATOR");
}
else if (details.issue === LexerDefinitionErrorType.CUSTOM_LINE_BREAK) {
return ("Warning: A Custom Token Pattern should specify the <line_breaks> option.\n" +
("\tThe problem is in the <" + tokType.name + "> Token Type\n") +
"\tFor details See: https://sap.github.io/chevrotain/docs/guide/resolving_lexer_errors.html#CUSTOM_LINE_BREAK");
}
else {
throw Error("non exhaustive match");
}
}
function getCharCodes(charsOrCodes) {
var charCodes = map(charsOrCodes, function (numOrString) {
if (isString(numOrString) && numOrString.length > 0) {
return numOrString.charCodeAt(0);
}
else {
return numOrString;
}
});
return charCodes;
}
function addToMapOfArrays(map, key, value) {
if (map[key] === undefined) {
map[key] = [value];
}
else {
map[key].push(value);
}
}
export var minOptimizationVal = 256;
/**
* We ae mapping charCode above ASCI (256) into buckets each in the size of 256.
* This is because ASCI are the most common start chars so each one of those will get its own
* possible token configs vector.
*
* Tokens starting with charCodes "above" ASCI are uncommon, so we can "afford"
* to place these into buckets of possible token configs, What we gain from
* this is avoiding the case of creating an optimization 'charCodeToPatternIdxToConfig'
* which would contain 10,000+ arrays of small size (e.g unicode Identifiers scenario).
* Our 'charCodeToPatternIdxToConfig' max size will now be:
* 256 + (2^16 / 2^8) - 1 === 511
*
* note the hack for fast division integer part extraction
* See: https://stackoverflow.com/a/4228528
*/
export function charCodeToOptimizedIndex(charCode) {
return charCode < minOptimizationVal
? charCode
: charCodeToOptimizedIdxMap[charCode];
}
/**
* This is a compromise between cold start / hot running performance
* Creating this array takes ~3ms on a modern machine,
* But if we perform the computation at runtime as needed the CSS Lexer benchmark
* performance degrades by ~10%
*
* TODO: Perhaps it should be lazy initialized only if a charCode > 255 is used.
*/
var charCodeToOptimizedIdxMap = [];
function initCharCodeToOptimizedIndexMap() {
if (isEmpty(charCodeToOptimizedIdxMap)) {
charCodeToOptimizedIdxMap = new Array(65536);
for (var i = 0; i < 65536; i++) {
/* tslint:disable */
charCodeToOptimizedIdxMap[i] = i > 255 ? 255 + ~~(i / 255) : i;
/* tslint:enable */
}
}
}
//# sourceMappingURL=lexer.js.map

View File

@@ -0,0 +1,9 @@
export var defaultLexerErrorProvider = {
buildUnableToPopLexerModeMessage: function (token) {
return "Unable to pop Lexer Mode after encountering Token ->" + token.image + "<- The Mode Stack is empty";
},
buildUnexpectedCharactersMessage: function (fullText, startOffset, length, line, column) {
return ("unexpected character: ->" + fullText.charAt(startOffset) + "<- at offset: " + startOffset + "," + (" skipped " + length + " characters."));
}
};
//# sourceMappingURL=lexer_errors_public.js.map

View File

@@ -0,0 +1,672 @@
import { analyzeTokenTypes, charCodeToOptimizedIndex, cloneEmptyGroups, DEFAULT_MODE, LineTerminatorOptimizedTester, performRuntimeChecks, performWarningRuntimeChecks, SUPPORT_STICKY, validatePatterns } from "./lexer";
import { cloneArr, cloneObj, forEach, IDENTITY, isArray, isEmpty, isUndefined, keys, last, map, merge, NOOP, PRINT_WARNING, reduce, reject, timer, toFastProperties } from "../utils/utils";
import { augmentTokenTypes } from "./tokens";
import { defaultLexerErrorProvider } from "../scan/lexer_errors_public";
import { clearRegExpParserCache } from "./reg_exp_parser";
export var LexerDefinitionErrorType;
(function (LexerDefinitionErrorType) {
LexerDefinitionErrorType[LexerDefinitionErrorType["MISSING_PATTERN"] = 0] = "MISSING_PATTERN";
LexerDefinitionErrorType[LexerDefinitionErrorType["INVALID_PATTERN"] = 1] = "INVALID_PATTERN";
LexerDefinitionErrorType[LexerDefinitionErrorType["EOI_ANCHOR_FOUND"] = 2] = "EOI_ANCHOR_FOUND";
LexerDefinitionErrorType[LexerDefinitionErrorType["UNSUPPORTED_FLAGS_FOUND"] = 3] = "UNSUPPORTED_FLAGS_FOUND";
LexerDefinitionErrorType[LexerDefinitionErrorType["DUPLICATE_PATTERNS_FOUND"] = 4] = "DUPLICATE_PATTERNS_FOUND";
LexerDefinitionErrorType[LexerDefinitionErrorType["INVALID_GROUP_TYPE_FOUND"] = 5] = "INVALID_GROUP_TYPE_FOUND";
LexerDefinitionErrorType[LexerDefinitionErrorType["PUSH_MODE_DOES_NOT_EXIST"] = 6] = "PUSH_MODE_DOES_NOT_EXIST";
LexerDefinitionErrorType[LexerDefinitionErrorType["MULTI_MODE_LEXER_WITHOUT_DEFAULT_MODE"] = 7] = "MULTI_MODE_LEXER_WITHOUT_DEFAULT_MODE";
LexerDefinitionErrorType[LexerDefinitionErrorType["MULTI_MODE_LEXER_WITHOUT_MODES_PROPERTY"] = 8] = "MULTI_MODE_LEXER_WITHOUT_MODES_PROPERTY";
LexerDefinitionErrorType[LexerDefinitionErrorType["MULTI_MODE_LEXER_DEFAULT_MODE_VALUE_DOES_NOT_EXIST"] = 9] = "MULTI_MODE_LEXER_DEFAULT_MODE_VALUE_DOES_NOT_EXIST";
LexerDefinitionErrorType[LexerDefinitionErrorType["LEXER_DEFINITION_CANNOT_CONTAIN_UNDEFINED"] = 10] = "LEXER_DEFINITION_CANNOT_CONTAIN_UNDEFINED";
LexerDefinitionErrorType[LexerDefinitionErrorType["SOI_ANCHOR_FOUND"] = 11] = "SOI_ANCHOR_FOUND";
LexerDefinitionErrorType[LexerDefinitionErrorType["EMPTY_MATCH_PATTERN"] = 12] = "EMPTY_MATCH_PATTERN";
LexerDefinitionErrorType[LexerDefinitionErrorType["NO_LINE_BREAKS_FLAGS"] = 13] = "NO_LINE_BREAKS_FLAGS";
LexerDefinitionErrorType[LexerDefinitionErrorType["UNREACHABLE_PATTERN"] = 14] = "UNREACHABLE_PATTERN";
LexerDefinitionErrorType[LexerDefinitionErrorType["IDENTIFY_TERMINATOR"] = 15] = "IDENTIFY_TERMINATOR";
LexerDefinitionErrorType[LexerDefinitionErrorType["CUSTOM_LINE_BREAK"] = 16] = "CUSTOM_LINE_BREAK";
})(LexerDefinitionErrorType || (LexerDefinitionErrorType = {}));
var DEFAULT_LEXER_CONFIG = {
deferDefinitionErrorsHandling: false,
positionTracking: "full",
lineTerminatorsPattern: /\n|\r\n?/g,
lineTerminatorCharacters: ["\n", "\r"],
ensureOptimizations: false,
safeMode: false,
errorMessageProvider: defaultLexerErrorProvider,
traceInitPerf: false,
skipValidations: false
};
Object.freeze(DEFAULT_LEXER_CONFIG);
var Lexer = /** @class */ (function () {
function Lexer(lexerDefinition, config) {
var _this = this;
if (config === void 0) { config = DEFAULT_LEXER_CONFIG; }
this.lexerDefinition = lexerDefinition;
this.lexerDefinitionErrors = [];
this.lexerDefinitionWarning = [];
this.patternIdxToConfig = {};
this.charCodeToPatternIdxToConfig = {};
this.modes = [];
this.emptyGroups = {};
this.config = undefined;
this.trackStartLines = true;
this.trackEndLines = true;
this.hasCustom = false;
this.canModeBeOptimized = {};
if (typeof config === "boolean") {
throw Error("The second argument to the Lexer constructor is now an ILexerConfig Object.\n" +
"a boolean 2nd argument is no longer supported");
}
// todo: defaults func?
this.config = merge(DEFAULT_LEXER_CONFIG, config);
var traceInitVal = this.config.traceInitPerf;
if (traceInitVal === true) {
this.traceInitMaxIdent = Infinity;
this.traceInitPerf = true;
}
else if (typeof traceInitVal === "number") {
this.traceInitMaxIdent = traceInitVal;
this.traceInitPerf = true;
}
this.traceInitIndent = -1;
this.TRACE_INIT("Lexer Constructor", function () {
var actualDefinition;
var hasOnlySingleMode = true;
_this.TRACE_INIT("Lexer Config handling", function () {
if (_this.config.lineTerminatorsPattern ===
DEFAULT_LEXER_CONFIG.lineTerminatorsPattern) {
// optimized built-in implementation for the defaults definition of lineTerminators
_this.config.lineTerminatorsPattern = LineTerminatorOptimizedTester;
}
else {
if (_this.config.lineTerminatorCharacters ===
DEFAULT_LEXER_CONFIG.lineTerminatorCharacters) {
throw Error("Error: Missing <lineTerminatorCharacters> property on the Lexer config.\n" +
"\tFor details See: https://sap.github.io/chevrotain/docs/guide/resolving_lexer_errors.html#MISSING_LINE_TERM_CHARS");
}
}
if (config.safeMode && config.ensureOptimizations) {
throw Error('"safeMode" and "ensureOptimizations" flags are mutually exclusive.');
}
_this.trackStartLines = /full|onlyStart/i.test(_this.config.positionTracking);
_this.trackEndLines = /full/i.test(_this.config.positionTracking);
// Convert SingleModeLexerDefinition into a IMultiModeLexerDefinition.
if (isArray(lexerDefinition)) {
actualDefinition = { modes: {} };
actualDefinition.modes[DEFAULT_MODE] = cloneArr(lexerDefinition);
actualDefinition[DEFAULT_MODE] = DEFAULT_MODE;
}
else {
// no conversion needed, input should already be a IMultiModeLexerDefinition
hasOnlySingleMode = false;
actualDefinition = cloneObj((lexerDefinition));
}
});
if (_this.config.skipValidations === false) {
_this.TRACE_INIT("performRuntimeChecks", function () {
_this.lexerDefinitionErrors = _this.lexerDefinitionErrors.concat(performRuntimeChecks(actualDefinition, _this.trackStartLines, _this.config.lineTerminatorCharacters));
});
_this.TRACE_INIT("performWarningRuntimeChecks", function () {
_this.lexerDefinitionWarning = _this.lexerDefinitionWarning.concat(performWarningRuntimeChecks(actualDefinition, _this.trackStartLines, _this.config.lineTerminatorCharacters));
});
}
// for extra robustness to avoid throwing an none informative error message
actualDefinition.modes = actualDefinition.modes
? actualDefinition.modes
: {};
// an error of undefined TokenTypes will be detected in "performRuntimeChecks" above.
// this transformation is to increase robustness in the case of partially invalid lexer definition.
forEach(actualDefinition.modes, function (currModeValue, currModeName) {
actualDefinition.modes[currModeName] = reject(currModeValue, function (currTokType) { return isUndefined(currTokType); });
});
var allModeNames = keys(actualDefinition.modes);
forEach(actualDefinition.modes, function (currModDef, currModName) {
_this.TRACE_INIT("Mode: <" + currModName + "> processing", function () {
_this.modes.push(currModName);
if (_this.config.skipValidations === false) {
_this.TRACE_INIT("validatePatterns", function () {
_this.lexerDefinitionErrors = _this.lexerDefinitionErrors.concat(validatePatterns(currModDef, allModeNames));
});
}
// If definition errors were encountered, the analysis phase may fail unexpectedly/
// Considering a lexer with definition errors may never be used, there is no point
// to performing the analysis anyhow...
if (isEmpty(_this.lexerDefinitionErrors)) {
augmentTokenTypes(currModDef);
var currAnalyzeResult_1;
_this.TRACE_INIT("analyzeTokenTypes", function () {
currAnalyzeResult_1 = analyzeTokenTypes(currModDef, {
lineTerminatorCharacters: _this.config
.lineTerminatorCharacters,
positionTracking: config.positionTracking,
ensureOptimizations: config.ensureOptimizations,
safeMode: config.safeMode,
tracer: _this.TRACE_INIT.bind(_this)
});
});
_this.patternIdxToConfig[currModName] =
currAnalyzeResult_1.patternIdxToConfig;
_this.charCodeToPatternIdxToConfig[currModName] =
currAnalyzeResult_1.charCodeToPatternIdxToConfig;
_this.emptyGroups = merge(_this.emptyGroups, currAnalyzeResult_1.emptyGroups);
_this.hasCustom =
currAnalyzeResult_1.hasCustom || _this.hasCustom;
_this.canModeBeOptimized[currModName] =
currAnalyzeResult_1.canBeOptimized;
}
});
});
_this.defaultMode = actualDefinition.defaultMode;
if (!isEmpty(_this.lexerDefinitionErrors) &&
!_this.config.deferDefinitionErrorsHandling) {
var allErrMessages = map(_this.lexerDefinitionErrors, function (error) {
return error.message;
});
var allErrMessagesString = allErrMessages.join("-----------------------\n");
throw new Error("Errors detected in definition of Lexer:\n" +
allErrMessagesString);
}
// Only print warning if there are no errors, This will avoid pl
forEach(_this.lexerDefinitionWarning, function (warningDescriptor) {
PRINT_WARNING(warningDescriptor.message);
});
_this.TRACE_INIT("Choosing sub-methods implementations", function () {
// Choose the relevant internal implementations for this specific parser.
// These implementations should be in-lined by the JavaScript engine
// to provide optimal performance in each scenario.
if (SUPPORT_STICKY) {
_this.chopInput = IDENTITY;
_this.match = _this.matchWithTest;
}
else {
_this.updateLastIndex = NOOP;
_this.match = _this.matchWithExec;
}
if (hasOnlySingleMode) {
_this.handleModes = NOOP;
}
if (_this.trackStartLines === false) {
_this.computeNewColumn = IDENTITY;
}
if (_this.trackEndLines === false) {
_this.updateTokenEndLineColumnLocation = NOOP;
}
if (/full/i.test(_this.config.positionTracking)) {
_this.createTokenInstance = _this.createFullToken;
}
else if (/onlyStart/i.test(_this.config.positionTracking)) {
_this.createTokenInstance = _this.createStartOnlyToken;
}
else if (/onlyOffset/i.test(_this.config.positionTracking)) {
_this.createTokenInstance = _this.createOffsetOnlyToken;
}
else {
throw Error("Invalid <positionTracking> config option: \"" + _this.config.positionTracking + "\"");
}
if (_this.hasCustom) {
_this.addToken = _this.addTokenUsingPush;
_this.handlePayload = _this.handlePayloadWithCustom;
}
else {
_this.addToken = _this.addTokenUsingMemberAccess;
_this.handlePayload = _this.handlePayloadNoCustom;
}
});
_this.TRACE_INIT("Failed Optimization Warnings", function () {
var unOptimizedModes = reduce(_this.canModeBeOptimized, function (cannotBeOptimized, canBeOptimized, modeName) {
if (canBeOptimized === false) {
cannotBeOptimized.push(modeName);
}
return cannotBeOptimized;
}, []);
if (config.ensureOptimizations && !isEmpty(unOptimizedModes)) {
throw Error("Lexer Modes: < " + unOptimizedModes.join(", ") + " > cannot be optimized.\n" +
'\t Disable the "ensureOptimizations" lexer config flag to silently ignore this and run the lexer in an un-optimized mode.\n' +
"\t Or inspect the console log for details on how to resolve these issues.");
}
});
_this.TRACE_INIT("clearRegExpParserCache", function () {
clearRegExpParserCache();
});
_this.TRACE_INIT("toFastProperties", function () {
toFastProperties(_this);
});
});
}
Lexer.prototype.tokenize = function (text, initialMode) {
if (initialMode === void 0) { initialMode = this.defaultMode; }
if (!isEmpty(this.lexerDefinitionErrors)) {
var allErrMessages = map(this.lexerDefinitionErrors, function (error) {
return error.message;
});
var allErrMessagesString = allErrMessages.join("-----------------------\n");
throw new Error("Unable to Tokenize because Errors detected in definition of Lexer:\n" +
allErrMessagesString);
}
var lexResult = this.tokenizeInternal(text, initialMode);
return lexResult;
};
// There is quite a bit of duplication between this and "tokenizeInternalLazy"
// This is intentional due to performance considerations.
Lexer.prototype.tokenizeInternal = function (text, initialMode) {
var _this = this;
var i, j, matchAltImage, longerAltIdx, matchedImage, payload, altPayload, imageLength, group, tokType, newToken, errLength, droppedChar, msg, match;
var orgText = text;
var orgLength = orgText.length;
var offset = 0;
var matchedTokensIndex = 0;
// initializing the tokensArray to the "guessed" size.
// guessing too little will still reduce the number of array re-sizes on pushes.
// guessing too large (Tested by guessing x4 too large) may cost a bit more of memory
// but would still have a faster runtime by avoiding (All but one) array resizing.
var guessedNumberOfTokens = this.hasCustom
? 0 // will break custom token pattern APIs the matchedTokens array will contain undefined elements.
: Math.floor(text.length / 10);
var matchedTokens = new Array(guessedNumberOfTokens);
var errors = [];
var line = this.trackStartLines ? 1 : undefined;
var column = this.trackStartLines ? 1 : undefined;
var groups = cloneEmptyGroups(this.emptyGroups);
var trackLines = this.trackStartLines;
var lineTerminatorPattern = this.config.lineTerminatorsPattern;
var currModePatternsLength = 0;
var patternIdxToConfig = [];
var currCharCodeToPatternIdxToConfig = [];
var modeStack = [];
var emptyArray = [];
Object.freeze(emptyArray);
var getPossiblePatterns = undefined;
function getPossiblePatternsSlow() {
return patternIdxToConfig;
}
function getPossiblePatternsOptimized(charCode) {
var optimizedCharIdx = charCodeToOptimizedIndex(charCode);
var possiblePatterns = currCharCodeToPatternIdxToConfig[optimizedCharIdx];
if (possiblePatterns === undefined) {
return emptyArray;
}
else {
return possiblePatterns;
}
}
var pop_mode = function (popToken) {
// TODO: perhaps avoid this error in the edge case there is no more input?
if (modeStack.length === 1 &&
// if we have both a POP_MODE and a PUSH_MODE this is in-fact a "transition"
// So no error should occur.
popToken.tokenType.PUSH_MODE === undefined) {
// if we try to pop the last mode there lexer will no longer have ANY mode.
// thus the pop is ignored, an error will be created and the lexer will continue parsing in the previous mode.
var msg_1 = _this.config.errorMessageProvider.buildUnableToPopLexerModeMessage(popToken);
errors.push({
offset: popToken.startOffset,
line: popToken.startLine !== undefined
? popToken.startLine
: undefined,
column: popToken.startColumn !== undefined
? popToken.startColumn
: undefined,
length: popToken.image.length,
message: msg_1
});
}
else {
modeStack.pop();
var newMode = last(modeStack);
patternIdxToConfig = _this.patternIdxToConfig[newMode];
currCharCodeToPatternIdxToConfig = _this
.charCodeToPatternIdxToConfig[newMode];
currModePatternsLength = patternIdxToConfig.length;
var modeCanBeOptimized = _this.canModeBeOptimized[newMode] &&
_this.config.safeMode === false;
if (currCharCodeToPatternIdxToConfig && modeCanBeOptimized) {
getPossiblePatterns = getPossiblePatternsOptimized;
}
else {
getPossiblePatterns = getPossiblePatternsSlow;
}
}
};
function push_mode(newMode) {
modeStack.push(newMode);
currCharCodeToPatternIdxToConfig = this
.charCodeToPatternIdxToConfig[newMode];
patternIdxToConfig = this.patternIdxToConfig[newMode];
currModePatternsLength = patternIdxToConfig.length;
currModePatternsLength = patternIdxToConfig.length;
var modeCanBeOptimized = this.canModeBeOptimized[newMode] &&
this.config.safeMode === false;
if (currCharCodeToPatternIdxToConfig && modeCanBeOptimized) {
getPossiblePatterns = getPossiblePatternsOptimized;
}
else {
getPossiblePatterns = getPossiblePatternsSlow;
}
}
// this pattern seems to avoid a V8 de-optimization, although that de-optimization does not
// seem to matter performance wise.
push_mode.call(this, initialMode);
var currConfig;
while (offset < orgLength) {
matchedImage = null;
var nextCharCode = orgText.charCodeAt(offset);
var chosenPatternIdxToConfig = getPossiblePatterns(nextCharCode);
var chosenPatternsLength = chosenPatternIdxToConfig.length;
for (i = 0; i < chosenPatternsLength; i++) {
currConfig = chosenPatternIdxToConfig[i];
var currPattern = currConfig.pattern;
payload = null;
// manually in-lined because > 600 chars won't be in-lined in V8
var singleCharCode = currConfig.short;
if (singleCharCode !== false) {
if (nextCharCode === singleCharCode) {
// single character string
matchedImage = currPattern;
}
}
else if (currConfig.isCustom === true) {
match = currPattern.exec(orgText, offset, matchedTokens, groups);
if (match !== null) {
matchedImage = match[0];
if (match.payload !== undefined) {
payload = match.payload;
}
}
else {
matchedImage = null;
}
}
else {
this.updateLastIndex(currPattern, offset);
matchedImage = this.match(currPattern, text, offset);
}
if (matchedImage !== null) {
// even though this pattern matched we must try a another longer alternative.
// this can be used to prioritize keywords over identifiers
longerAltIdx = currConfig.longerAlt;
if (longerAltIdx !== undefined) {
// TODO: micro optimize, avoid extra prop access
// by saving/linking longerAlt on the original config?
var longerAltConfig = patternIdxToConfig[longerAltIdx];
var longerAltPattern = longerAltConfig.pattern;
altPayload = null;
// single Char can never be a longer alt so no need to test it.
// manually in-lined because > 600 chars won't be in-lined in V8
if (longerAltConfig.isCustom === true) {
match = longerAltPattern.exec(orgText, offset, matchedTokens, groups);
if (match !== null) {
matchAltImage = match[0];
if (match.payload !== undefined) {
altPayload = match.payload;
}
}
else {
matchAltImage = null;
}
}
else {
this.updateLastIndex(longerAltPattern, offset);
matchAltImage = this.match(longerAltPattern, text, offset);
}
if (matchAltImage &&
matchAltImage.length > matchedImage.length) {
matchedImage = matchAltImage;
payload = altPayload;
currConfig = longerAltConfig;
}
}
break;
}
}
// successful match
if (matchedImage !== null) {
imageLength = matchedImage.length;
group = currConfig.group;
if (group !== undefined) {
tokType = currConfig.tokenTypeIdx;
// TODO: "offset + imageLength" and the new column may be computed twice in case of "full" location information inside
// createFullToken method
newToken = this.createTokenInstance(matchedImage, offset, tokType, currConfig.tokenType, line, column, imageLength);
this.handlePayload(newToken, payload);
// TODO: optimize NOOP in case there are no special groups?
if (group === false) {
matchedTokensIndex = this.addToken(matchedTokens, matchedTokensIndex, newToken);
}
else {
groups[group].push(newToken);
}
}
text = this.chopInput(text, imageLength);
offset = offset + imageLength;
// TODO: with newlines the column may be assigned twice
column = this.computeNewColumn(column, imageLength);
if (trackLines === true &&
currConfig.canLineTerminator === true) {
var numOfLTsInMatch = 0;
var foundTerminator = void 0;
var lastLTEndOffset = void 0;
lineTerminatorPattern.lastIndex = 0;
do {
foundTerminator = lineTerminatorPattern.test(matchedImage);
if (foundTerminator === true) {
lastLTEndOffset =
lineTerminatorPattern.lastIndex - 1;
numOfLTsInMatch++;
}
} while (foundTerminator === true);
if (numOfLTsInMatch !== 0) {
line = line + numOfLTsInMatch;
column = imageLength - lastLTEndOffset;
this.updateTokenEndLineColumnLocation(newToken, group, lastLTEndOffset, numOfLTsInMatch, line, column, imageLength);
}
}
// will be NOOP if no modes present
this.handleModes(currConfig, pop_mode, push_mode, newToken);
}
else {
// error recovery, drop characters until we identify a valid token's start point
var errorStartOffset = offset;
var errorLine = line;
var errorColumn = column;
var foundResyncPoint = false;
while (!foundResyncPoint && offset < orgLength) {
// drop chars until we succeed in matching something
droppedChar = orgText.charCodeAt(offset);
// Identity Func (when sticky flag is enabled)
text = this.chopInput(text, 1);
offset++;
for (j = 0; j < currModePatternsLength; j++) {
var currConfig_1 = patternIdxToConfig[j];
var currPattern = currConfig_1.pattern;
// manually in-lined because > 600 chars won't be in-lined in V8
var singleCharCode = currConfig_1.short;
if (singleCharCode !== false) {
if (orgText.charCodeAt(offset) === singleCharCode) {
// single character string
foundResyncPoint = true;
}
}
else if (currConfig_1.isCustom === true) {
foundResyncPoint =
currPattern.exec(orgText, offset, matchedTokens, groups) !== null;
}
else {
this.updateLastIndex(currPattern, offset);
foundResyncPoint = currPattern.exec(text) !== null;
}
if (foundResyncPoint === true) {
break;
}
}
}
errLength = offset - errorStartOffset;
// at this point we either re-synced or reached the end of the input text
msg = this.config.errorMessageProvider.buildUnexpectedCharactersMessage(orgText, errorStartOffset, errLength, errorLine, errorColumn);
errors.push({
offset: errorStartOffset,
line: errorLine,
column: errorColumn,
length: errLength,
message: msg
});
}
}
// if we do have custom patterns which push directly into the
// TODO: custom tokens should not push directly??
if (!this.hasCustom) {
// if we guessed a too large size for the tokens array this will shrink it to the right size.
matchedTokens.length = matchedTokensIndex;
}
return {
tokens: matchedTokens,
groups: groups,
errors: errors
};
};
Lexer.prototype.handleModes = function (config, pop_mode, push_mode, newToken) {
if (config.pop === true) {
// need to save the PUSH_MODE property as if the mode is popped
// patternIdxToPopMode is updated to reflect the new mode after popping the stack
var pushMode = config.push;
pop_mode(newToken);
if (pushMode !== undefined) {
push_mode.call(this, pushMode);
}
}
else if (config.push !== undefined) {
push_mode.call(this, config.push);
}
};
Lexer.prototype.chopInput = function (text, length) {
return text.substring(length);
};
Lexer.prototype.updateLastIndex = function (regExp, newLastIndex) {
regExp.lastIndex = newLastIndex;
};
// TODO: decrease this under 600 characters? inspect stripping comments option in TSC compiler
Lexer.prototype.updateTokenEndLineColumnLocation = function (newToken, group, lastLTIdx, numOfLTsInMatch, line, column, imageLength) {
var lastCharIsLT, fixForEndingInLT;
if (group !== undefined) {
// a none skipped multi line Token, need to update endLine/endColumn
lastCharIsLT = lastLTIdx === imageLength - 1;
fixForEndingInLT = lastCharIsLT ? -1 : 0;
if (!(numOfLTsInMatch === 1 && lastCharIsLT === true)) {
// if a token ends in a LT that last LT only affects the line numbering of following Tokens
newToken.endLine = line + fixForEndingInLT;
// the last LT in a token does not affect the endColumn either as the [columnStart ... columnEnd)
// inclusive to exclusive range.
newToken.endColumn = column - 1 + -fixForEndingInLT;
}
// else single LT in the last character of a token, no need to modify the endLine/EndColumn
}
};
Lexer.prototype.computeNewColumn = function (oldColumn, imageLength) {
return oldColumn + imageLength;
};
// Place holder, will be replaced by the correct variant according to the locationTracking option at runtime.
/* istanbul ignore next - place holder */
Lexer.prototype.createTokenInstance = function () {
var args = [];
for (var _i = 0; _i < arguments.length; _i++) {
args[_i] = arguments[_i];
}
return null;
};
Lexer.prototype.createOffsetOnlyToken = function (image, startOffset, tokenTypeIdx, tokenType) {
return {
image: image,
startOffset: startOffset,
tokenTypeIdx: tokenTypeIdx,
tokenType: tokenType
};
};
Lexer.prototype.createStartOnlyToken = function (image, startOffset, tokenTypeIdx, tokenType, startLine, startColumn) {
return {
image: image,
startOffset: startOffset,
startLine: startLine,
startColumn: startColumn,
tokenTypeIdx: tokenTypeIdx,
tokenType: tokenType
};
};
Lexer.prototype.createFullToken = function (image, startOffset, tokenTypeIdx, tokenType, startLine, startColumn, imageLength) {
return {
image: image,
startOffset: startOffset,
endOffset: startOffset + imageLength - 1,
startLine: startLine,
endLine: startLine,
startColumn: startColumn,
endColumn: startColumn + imageLength - 1,
tokenTypeIdx: tokenTypeIdx,
tokenType: tokenType
};
};
// Place holder, will be replaced by the correct variant according to the locationTracking option at runtime.
/* istanbul ignore next - place holder */
Lexer.prototype.addToken = function (tokenVector, index, tokenToAdd) {
return 666;
};
Lexer.prototype.addTokenUsingPush = function (tokenVector, index, tokenToAdd) {
tokenVector.push(tokenToAdd);
return index;
};
Lexer.prototype.addTokenUsingMemberAccess = function (tokenVector, index, tokenToAdd) {
tokenVector[index] = tokenToAdd;
index++;
return index;
};
// Place holder, will be replaced by the correct variant according to the hasCustom flag option at runtime.
/* istanbul ignore next - place holder */
Lexer.prototype.handlePayload = function (token, payload) { };
Lexer.prototype.handlePayloadNoCustom = function (token, payload) { };
Lexer.prototype.handlePayloadWithCustom = function (token, payload) {
if (payload !== null) {
token.payload = payload;
}
};
/* istanbul ignore next - place holder to be replaced with chosen alternative at runtime */
Lexer.prototype.match = function (pattern, text, offset) {
return null;
};
Lexer.prototype.matchWithTest = function (pattern, text, offset) {
var found = pattern.test(text);
if (found === true) {
return text.substring(offset, pattern.lastIndex);
}
return null;
};
Lexer.prototype.matchWithExec = function (pattern, text) {
var regExpArray = pattern.exec(text);
return regExpArray !== null ? regExpArray[0] : regExpArray;
};
// Duplicated from the parser's perf trace trait to allow future extraction
// of the lexer to a separate package.
Lexer.prototype.TRACE_INIT = function (phaseDesc, phaseImpl) {
// No need to optimize this using NOOP pattern because
// It is not called in a hot spot...
if (this.traceInitPerf === true) {
this.traceInitIndent++;
var indent = new Array(this.traceInitIndent + 1).join("\t");
if (this.traceInitIndent < this.traceInitMaxIdent) {
console.log(indent + "--> <" + phaseDesc + ">");
}
var _a = timer(phaseImpl), time = _a.time, value = _a.value;
/* istanbul ignore next - Difficult to reproduce specific performance behavior (>10ms) in tests */
var traceMethod = time > 10 ? console.warn : console.log;
if (this.traceInitIndent < this.traceInitMaxIdent) {
traceMethod(indent + "<-- <" + phaseDesc + "> time: " + time + "ms");
}
this.traceInitIndent--;
return value;
}
else {
return phaseImpl();
}
};
Lexer.SKIPPED = "This marks a skipped Token pattern, this means each token identified by it will" +
"be consumed and then thrown into oblivion, this can be used to for example to completely ignore whitespace.";
Lexer.NA = /NOT_APPLICABLE/;
return Lexer;
}());
export { Lexer };
//# sourceMappingURL=lexer_public.js.map

260
node_modules/chevrotain/lib_esm/src/scan/reg_exp.js generated vendored Normal file
View File

@@ -0,0 +1,260 @@
var __extends = (this && this.__extends) || (function () {
var extendStatics = function (d, b) {
extendStatics = Object.setPrototypeOf ||
({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) ||
function (d, b) { for (var p in b) if (b.hasOwnProperty(p)) d[p] = b[p]; };
return extendStatics(d, b);
};
return function (d, b) {
extendStatics(d, b);
function __() { this.constructor = d; }
d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __());
};
})();
import { VERSION, BaseRegExpVisitor } from "regexp-to-ast";
import { forEach, contains, PRINT_ERROR, PRINT_WARNING, find, isArray, every, values } from "../utils/utils";
import { getRegExpAst } from "./reg_exp_parser";
import { charCodeToOptimizedIndex, minOptimizationVal } from "./lexer";
var complementErrorMessage = "Complement Sets are not supported for first char optimization";
export var failedOptimizationPrefixMsg = 'Unable to use "first char" lexer optimizations:\n';
export function getOptimizedStartCodesIndices(regExp, ensureOptimizations) {
if (ensureOptimizations === void 0) { ensureOptimizations = false; }
try {
var ast = getRegExpAst(regExp);
var firstChars = firstCharOptimizedIndices(ast.value, {}, ast.flags.ignoreCase);
return firstChars;
}
catch (e) {
/* istanbul ignore next */
// Testing this relies on the regexp-to-ast library having a bug... */
// TODO: only the else branch needs to be ignored, try to fix with newer prettier / tsc
if (e.message === complementErrorMessage) {
if (ensureOptimizations) {
PRINT_WARNING("" + failedOptimizationPrefixMsg +
("\tUnable to optimize: < " + regExp.toString() + " >\n") +
"\tComplement Sets cannot be automatically optimized.\n" +
"\tThis will disable the lexer's first char optimizations.\n" +
"\tSee: https://sap.github.io/chevrotain/docs/guide/resolving_lexer_errors.html#COMPLEMENT for details.");
}
}
else {
var msgSuffix = "";
if (ensureOptimizations) {
msgSuffix =
"\n\tThis will disable the lexer's first char optimizations.\n" +
"\tSee: https://sap.github.io/chevrotain/docs/guide/resolving_lexer_errors.html#REGEXP_PARSING for details.";
}
PRINT_ERROR(failedOptimizationPrefixMsg + "\n" +
("\tFailed parsing: < " + regExp.toString() + " >\n") +
("\tUsing the regexp-to-ast library version: " + VERSION + "\n") +
"\tPlease open an issue at: https://github.com/bd82/regexp-to-ast/issues" +
msgSuffix);
}
}
return [];
}
export function firstCharOptimizedIndices(ast, result, ignoreCase) {
switch (ast.type) {
case "Disjunction":
for (var i = 0; i < ast.value.length; i++) {
firstCharOptimizedIndices(ast.value[i], result, ignoreCase);
}
break;
case "Alternative":
var terms = ast.value;
for (var i = 0; i < terms.length; i++) {
var term = terms[i];
// skip terms that cannot effect the first char results
switch (term.type) {
case "EndAnchor":
// A group back reference cannot affect potential starting char.
// because if a back reference is the first production than automatically
// the group being referenced has had to come BEFORE so its codes have already been added
case "GroupBackReference":
// assertions do not affect potential starting codes
case "Lookahead":
case "NegativeLookahead":
case "StartAnchor":
case "WordBoundary":
case "NonWordBoundary":
continue;
}
var atom = term;
switch (atom.type) {
case "Character":
addOptimizedIdxToResult(atom.value, result, ignoreCase);
break;
case "Set":
if (atom.complement === true) {
throw Error(complementErrorMessage);
}
forEach(atom.value, function (code) {
if (typeof code === "number") {
addOptimizedIdxToResult(code, result, ignoreCase);
}
else {
// range
var range = code;
// cannot optimize when ignoreCase is
if (ignoreCase === true) {
for (var rangeCode = range.from; rangeCode <= range.to; rangeCode++) {
addOptimizedIdxToResult(rangeCode, result, ignoreCase);
}
}
// Optimization (2 orders of magnitude less work for very large ranges)
else {
// handle unoptimized values
for (var rangeCode = range.from; rangeCode <= range.to &&
rangeCode < minOptimizationVal; rangeCode++) {
addOptimizedIdxToResult(rangeCode, result, ignoreCase);
}
// Less common charCode where we optimize for faster init time, by using larger "buckets"
if (range.to >= minOptimizationVal) {
var minUnOptVal = range.from >= minOptimizationVal
? range.from
: minOptimizationVal;
var maxUnOptVal = range.to;
var minOptIdx = charCodeToOptimizedIndex(minUnOptVal);
var maxOptIdx = charCodeToOptimizedIndex(maxUnOptVal);
for (var currOptIdx = minOptIdx; currOptIdx <= maxOptIdx; currOptIdx++) {
result[currOptIdx] = currOptIdx;
}
}
}
}
});
break;
case "Group":
firstCharOptimizedIndices(atom.value, result, ignoreCase);
break;
/* istanbul ignore next */
default:
throw Error("Non Exhaustive Match");
}
// reached a mandatory production, no more **start** codes can be found on this alternative
var isOptionalQuantifier = atom.quantifier !== undefined &&
atom.quantifier.atLeast === 0;
if (
// A group may be optional due to empty contents /(?:)/
// or if everything inside it is optional /((a)?)/
(atom.type === "Group" &&
isWholeOptional(atom) === false) ||
// If this term is not a group it may only be optional if it has an optional quantifier
(atom.type !== "Group" && isOptionalQuantifier === false)) {
break;
}
}
break;
/* istanbul ignore next */
default:
throw Error("non exhaustive match!");
}
// console.log(Object.keys(result).length)
return values(result);
}
function addOptimizedIdxToResult(code, result, ignoreCase) {
var optimizedCharIdx = charCodeToOptimizedIndex(code);
result[optimizedCharIdx] = optimizedCharIdx;
if (ignoreCase === true) {
handleIgnoreCase(code, result);
}
}
function handleIgnoreCase(code, result) {
var char = String.fromCharCode(code);
var upperChar = char.toUpperCase();
/* istanbul ignore else */
if (upperChar !== char) {
var optimizedCharIdx = charCodeToOptimizedIndex(upperChar.charCodeAt(0));
result[optimizedCharIdx] = optimizedCharIdx;
}
else {
var lowerChar = char.toLowerCase();
if (lowerChar !== char) {
var optimizedCharIdx = charCodeToOptimizedIndex(lowerChar.charCodeAt(0));
result[optimizedCharIdx] = optimizedCharIdx;
}
}
}
function findCode(setNode, targetCharCodes) {
return find(setNode.value, function (codeOrRange) {
if (typeof codeOrRange === "number") {
return contains(targetCharCodes, codeOrRange);
}
else {
// range
var range_1 = codeOrRange;
return (find(targetCharCodes, function (targetCode) {
return range_1.from <= targetCode && targetCode <= range_1.to;
}) !== undefined);
}
});
}
function isWholeOptional(ast) {
if (ast.quantifier && ast.quantifier.atLeast === 0) {
return true;
}
if (!ast.value) {
return false;
}
return isArray(ast.value)
? every(ast.value, isWholeOptional)
: isWholeOptional(ast.value);
}
var CharCodeFinder = /** @class */ (function (_super) {
__extends(CharCodeFinder, _super);
function CharCodeFinder(targetCharCodes) {
var _this = _super.call(this) || this;
_this.targetCharCodes = targetCharCodes;
_this.found = false;
return _this;
}
CharCodeFinder.prototype.visitChildren = function (node) {
// No need to keep looking...
if (this.found === true) {
return;
}
// switch lookaheads as they do not actually consume any characters thus
// finding a charCode at lookahead context does not mean that regexp can actually contain it in a match.
switch (node.type) {
case "Lookahead":
this.visitLookahead(node);
return;
case "NegativeLookahead":
this.visitNegativeLookahead(node);
return;
}
_super.prototype.visitChildren.call(this, node);
};
CharCodeFinder.prototype.visitCharacter = function (node) {
if (contains(this.targetCharCodes, node.value)) {
this.found = true;
}
};
CharCodeFinder.prototype.visitSet = function (node) {
if (node.complement) {
if (findCode(node, this.targetCharCodes) === undefined) {
this.found = true;
}
}
else {
if (findCode(node, this.targetCharCodes) !== undefined) {
this.found = true;
}
}
};
return CharCodeFinder;
}(BaseRegExpVisitor));
export function canMatchCharCode(charCodes, pattern) {
if (pattern instanceof RegExp) {
var ast = getRegExpAst(pattern);
var charCodeFinder = new CharCodeFinder(charCodes);
charCodeFinder.visit(ast);
return charCodeFinder.found;
}
else {
return (find(pattern, function (char) {
return contains(charCodes, char.charCodeAt(0));
}) !== undefined);
}
}
//# sourceMappingURL=reg_exp.js.map

View File

@@ -0,0 +1,18 @@
import { RegExpParser } from "regexp-to-ast";
var regExpAstCache = {};
var regExpParser = new RegExpParser();
export function getRegExpAst(regExp) {
var regExpStr = regExp.toString();
if (regExpAstCache.hasOwnProperty(regExpStr)) {
return regExpAstCache[regExpStr];
}
else {
var regExpAst = regExpParser.pattern(regExpStr);
regExpAstCache[regExpStr] = regExpAst;
return regExpAst;
}
}
export function clearRegExpParserCache() {
regExpAstCache = {};
}
//# sourceMappingURL=reg_exp_parser.js.map

114
node_modules/chevrotain/lib_esm/src/scan/tokens.js generated vendored Normal file
View File

@@ -0,0 +1,114 @@
import { cloneArr, compact, contains, difference, flatten, forEach, has, isArray, isEmpty, map } from "../utils/utils";
export function tokenStructuredMatcher(tokInstance, tokConstructor) {
var instanceType = tokInstance.tokenTypeIdx;
if (instanceType === tokConstructor.tokenTypeIdx) {
return true;
}
else {
return (tokConstructor.isParent === true &&
tokConstructor.categoryMatchesMap[instanceType] === true);
}
}
// Optimized tokenMatcher in case our grammar does not use token categories
// Being so tiny it is much more likely to be in-lined and this avoid the function call overhead
export function tokenStructuredMatcherNoCategories(token, tokType) {
return token.tokenTypeIdx === tokType.tokenTypeIdx;
}
export var tokenShortNameIdx = 1;
export var tokenIdxToClass = {};
export function augmentTokenTypes(tokenTypes) {
// collect the parent Token Types as well.
var tokenTypesAndParents = expandCategories(tokenTypes);
// add required tokenType and categoryMatches properties
assignTokenDefaultProps(tokenTypesAndParents);
// fill up the categoryMatches
assignCategoriesMapProp(tokenTypesAndParents);
assignCategoriesTokensProp(tokenTypesAndParents);
forEach(tokenTypesAndParents, function (tokType) {
tokType.isParent = tokType.categoryMatches.length > 0;
});
}
export function expandCategories(tokenTypes) {
var result = cloneArr(tokenTypes);
var categories = tokenTypes;
var searching = true;
while (searching) {
categories = compact(flatten(map(categories, function (currTokType) { return currTokType.CATEGORIES; })));
var newCategories = difference(categories, result);
result = result.concat(newCategories);
if (isEmpty(newCategories)) {
searching = false;
}
else {
categories = newCategories;
}
}
return result;
}
export function assignTokenDefaultProps(tokenTypes) {
forEach(tokenTypes, function (currTokType) {
if (!hasShortKeyProperty(currTokType)) {
tokenIdxToClass[tokenShortNameIdx] = currTokType;
currTokType.tokenTypeIdx = tokenShortNameIdx++;
}
// CATEGORIES? : TokenType | TokenType[]
if (hasCategoriesProperty(currTokType) &&
!isArray(currTokType.CATEGORIES)
// &&
// !isUndefined(currTokType.CATEGORIES.PATTERN)
) {
currTokType.CATEGORIES = [currTokType.CATEGORIES];
}
if (!hasCategoriesProperty(currTokType)) {
currTokType.CATEGORIES = [];
}
if (!hasExtendingTokensTypesProperty(currTokType)) {
currTokType.categoryMatches = [];
}
if (!hasExtendingTokensTypesMapProperty(currTokType)) {
currTokType.categoryMatchesMap = {};
}
});
}
export function assignCategoriesTokensProp(tokenTypes) {
forEach(tokenTypes, function (currTokType) {
// avoid duplications
currTokType.categoryMatches = [];
forEach(currTokType.categoryMatchesMap, function (val, key) {
currTokType.categoryMatches.push(tokenIdxToClass[key].tokenTypeIdx);
});
});
}
export function assignCategoriesMapProp(tokenTypes) {
forEach(tokenTypes, function (currTokType) {
singleAssignCategoriesToksMap([], currTokType);
});
}
export function singleAssignCategoriesToksMap(path, nextNode) {
forEach(path, function (pathNode) {
nextNode.categoryMatchesMap[pathNode.tokenTypeIdx] = true;
});
forEach(nextNode.CATEGORIES, function (nextCategory) {
var newPath = path.concat(nextNode);
// avoids infinite loops due to cyclic categories.
if (!contains(newPath, nextCategory)) {
singleAssignCategoriesToksMap(newPath, nextCategory);
}
});
}
export function hasShortKeyProperty(tokType) {
return has(tokType, "tokenTypeIdx");
}
export function hasCategoriesProperty(tokType) {
return has(tokType, "CATEGORIES");
}
export function hasExtendingTokensTypesProperty(tokType) {
return has(tokType, "categoryMatches");
}
export function hasExtendingTokensTypesMapProperty(tokType) {
return has(tokType, "categoryMatchesMap");
}
export function isTokenType(tokType) {
return has(tokType, "tokenTypeIdx");
}
//# sourceMappingURL=tokens.js.map

View File

@@ -0,0 +1,2 @@
export var EOF_TOKEN_TYPE = 1;
//# sourceMappingURL=tokens_constants.js.map

View File

@@ -0,0 +1,87 @@
import { has, isString, isUndefined } from "../utils/utils";
import { Lexer } from "./lexer_public";
import { augmentTokenTypes, tokenStructuredMatcher } from "./tokens";
export function tokenLabel(tokType) {
if (hasTokenLabel(tokType)) {
return tokType.LABEL;
}
else {
return tokType.name;
}
}
export function tokenName(tokType) {
return tokType.name;
}
export function hasTokenLabel(obj) {
return isString(obj.LABEL) && obj.LABEL !== "";
}
var PARENT = "parent";
var CATEGORIES = "categories";
var LABEL = "label";
var GROUP = "group";
var PUSH_MODE = "push_mode";
var POP_MODE = "pop_mode";
var LONGER_ALT = "longer_alt";
var LINE_BREAKS = "line_breaks";
var START_CHARS_HINT = "start_chars_hint";
export function createToken(config) {
return createTokenInternal(config);
}
function createTokenInternal(config) {
var pattern = config.pattern;
var tokenType = {};
tokenType.name = config.name;
if (!isUndefined(pattern)) {
tokenType.PATTERN = pattern;
}
if (has(config, PARENT)) {
throw "The parent property is no longer supported.\n" +
"See: https://github.com/SAP/chevrotain/issues/564#issuecomment-349062346 for details.";
}
if (has(config, CATEGORIES)) {
// casting to ANY as this will be fixed inside `augmentTokenTypes``
tokenType.CATEGORIES = config[CATEGORIES];
}
augmentTokenTypes([tokenType]);
if (has(config, LABEL)) {
tokenType.LABEL = config[LABEL];
}
if (has(config, GROUP)) {
tokenType.GROUP = config[GROUP];
}
if (has(config, POP_MODE)) {
tokenType.POP_MODE = config[POP_MODE];
}
if (has(config, PUSH_MODE)) {
tokenType.PUSH_MODE = config[PUSH_MODE];
}
if (has(config, LONGER_ALT)) {
tokenType.LONGER_ALT = config[LONGER_ALT];
}
if (has(config, LINE_BREAKS)) {
tokenType.LINE_BREAKS = config[LINE_BREAKS];
}
if (has(config, START_CHARS_HINT)) {
tokenType.START_CHARS_HINT = config[START_CHARS_HINT];
}
return tokenType;
}
export var EOF = createToken({ name: "EOF", pattern: Lexer.NA });
augmentTokenTypes([EOF]);
export function createTokenInstance(tokType, image, startOffset, endOffset, startLine, endLine, startColumn, endColumn) {
return {
image: image,
startOffset: startOffset,
endOffset: endOffset,
startLine: startLine,
endLine: endLine,
startColumn: startColumn,
endColumn: endColumn,
tokenTypeIdx: tokType.tokenTypeIdx,
tokenType: tokType
};
}
export function tokenMatcher(token, tokType) {
return tokenStructuredMatcher(token, tokType);
}
//# sourceMappingURL=tokens_public.js.map