NEW : REBASE THE ENTIRE WORKING PROJECT
This commit is contained in:
63
node_modules/regexp-to-ast/CHANGELOG.md
generated
vendored
Normal file
63
node_modules/regexp-to-ast/CHANGELOG.md
generated
vendored
Normal file
@@ -0,0 +1,63 @@
|
||||
## 0.4.0 (3-16-2019)
|
||||
|
||||
- [Huge (x75) performance improvement](https://github.com/bd82/regexp-to-ast/pull/18).
|
||||
- Thanks to [@morwen](https://github.com/morwen) :thumbsup
|
||||
|
||||
## 0.3.5 (7-12-2018)
|
||||
|
||||
- A Set AST can now contain ranges of char codes as well as single char codes.
|
||||
```typescript
|
||||
export interface Set extends IRegExpAST {
|
||||
type: "Set"
|
||||
complement: boolean
|
||||
value: (number | Range)[]
|
||||
quantifier?: Quantifier
|
||||
}
|
||||
```
|
||||
|
||||
## 0.3.4 (6-16-2018)
|
||||
|
||||
- Types: Set now declares a complement property.
|
||||
- Types: BaseAstVisitor now declares a visitChildren method.
|
||||
|
||||
## 0.3.3 (2018-6-9)
|
||||
|
||||
- Types: All AST node types extend a base interface.
|
||||
|
||||
## 0.3.2 (2018-6-9)
|
||||
|
||||
- Fixed: Visitor APIs were lacking the node argument.
|
||||
|
||||
## 0.3.1 (2018-6-9)
|
||||
|
||||
- Added "typings" property in package.json for TypeScript consumers.
|
||||
- Fixed: Version number in regexpToAst.VERSION property.
|
||||
|
||||
## 0.3.0 (2018-6-9)
|
||||
|
||||
- An AST Visitor class is provided to easily traverse the AST output (See main README.md)
|
||||
|
||||
## 0.2.4 (2018-6-6)
|
||||
|
||||
- Fixed: Quantifier identifying using backtracking instead of lookahead.
|
||||
|
||||
## 0.2.3 (2018-6-3)
|
||||
|
||||
- Fixed: Quantifier from range can be zero.
|
||||
|
||||
## 0.2.2 (2018-4-10)
|
||||
|
||||
- VERSION constant exported.
|
||||
|
||||
## 0.2.1 (2018-4-10)
|
||||
|
||||
- Fixed class atoms to allow syntax characters (?, +, \*, ...).
|
||||
- Fixed regular atoms to allow closing curly and square brackets.
|
||||
|
||||
## 0.2.0 (2018-4-7)
|
||||
|
||||
- Updated npm metadata.
|
||||
|
||||
## 0.1.0 (2018-4-7)
|
||||
|
||||
- Initial Release.
|
21
node_modules/regexp-to-ast/LICENSE
generated
vendored
Normal file
21
node_modules/regexp-to-ast/LICENSE
generated
vendored
Normal file
@@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2018 Shahar Soel
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
101
node_modules/regexp-to-ast/README.md
generated
vendored
Normal file
101
node_modules/regexp-to-ast/README.md
generated
vendored
Normal file
@@ -0,0 +1,101 @@
|
||||
[](https://badge.fury.io/js/regexp-to-ast)
|
||||
[](https://circleci.com/gh/bd82/regexp-to-ast)
|
||||
[](https://coveralls.io/github/bd82/regexp-to-ast?branch=master) [](https://greenkeeper.io/)
|
||||
|
||||
# regexp-to-ast
|
||||
|
||||
Reads a JavaScript Regular Expression **literal**(text) and outputs an Abstract Syntax Tree.
|
||||
|
||||
## Installation
|
||||
|
||||
- npm
|
||||
```
|
||||
npm install regexp-to-ast
|
||||
```
|
||||
- Browser
|
||||
```
|
||||
<script src="https://unpkg.com/regexp-to-ast/lib/parser.js"></script>
|
||||
```
|
||||
|
||||
## API
|
||||
|
||||
The [API](https://github.com/bd82/regexp-to-ast/blob/master/api.d.ts) is defined as a TypeScript definition file.
|
||||
|
||||
## Usage
|
||||
|
||||
- Parsing to an AST:
|
||||
|
||||
```javascript
|
||||
const RegExpParser = require("regexp-to-ast").RegExpParser
|
||||
const regexpParser = new RegExpParser.parser()
|
||||
|
||||
// from a regexp text
|
||||
const astOutput = regexpParser.pattern("/a|b|c/g")
|
||||
|
||||
// text from regexp instance.
|
||||
const input2 = /a|b/.toString()
|
||||
// The same parser instance can be reused
|
||||
const anotherAstOutput = regexpParser.pattern(input2)
|
||||
```
|
||||
|
||||
- Visiting the AST:
|
||||
|
||||
```javascript
|
||||
// parse to an AST as before.
|
||||
const { RegExpParser, BaseRegExpVisitor } = require("regexp-to-ast")
|
||||
const regexpParser = new RegExpParser.parser()
|
||||
const regExpAst = regexpParser.pattern("/a|b|c/g")
|
||||
|
||||
// Override the visitor methods to add your logic.
|
||||
class MyRegExpVisitor extends BaseRegExpVisitor {
|
||||
visitPattern(node) {}
|
||||
|
||||
visitFlags(node) {}
|
||||
|
||||
visitDisjunction(node) {}
|
||||
|
||||
visitAlternative(node) {}
|
||||
|
||||
// Assertion
|
||||
visitStartAnchor(node) {}
|
||||
|
||||
visitEndAnchor(node) {}
|
||||
|
||||
visitWordBoundary(node) {}
|
||||
|
||||
visitNonWordBoundary(node) {}
|
||||
|
||||
visitLookahead(node) {}
|
||||
|
||||
visitNegativeLookahead(node) {}
|
||||
|
||||
// atoms
|
||||
visitCharacter(node) {}
|
||||
|
||||
visitSet(node) {}
|
||||
|
||||
visitGroup(node) {}
|
||||
|
||||
visitGroupBackReference(node) {}
|
||||
|
||||
visitQuantifier(node) {}
|
||||
}
|
||||
|
||||
const myVisitor = new MyRegExpVisitor()
|
||||
myVisitor.visit(regExpAst)
|
||||
// extract visit results from the visitor state.
|
||||
```
|
||||
|
||||
## Compatibility
|
||||
|
||||
This library is written in ES**5** style and is compatiable with all major browsers and **modern** node.js versions.
|
||||
|
||||
## TODO / Limitations
|
||||
|
||||
- Use polyFill for [string.prototype.at](https://github.com/mathiasbynens/String.prototype.at)
|
||||
to support unicode characters outside BMP.
|
||||
- Descriptive error messages.
|
||||
- Position information in error messages.
|
||||
- Support unicode flag escapes.
|
||||
- Ensure edge cases described in ["The madness of parsing real world JavaScript regexps"](https://hackernoon.com/the-madness-of-parsing-real-world-javascript-regexps-d9ee336df983) are supported.
|
||||
- Support deprecated octal escapes
|
123
node_modules/regexp-to-ast/api.d.ts
generated
vendored
Normal file
123
node_modules/regexp-to-ast/api.d.ts
generated
vendored
Normal file
@@ -0,0 +1,123 @@
|
||||
export as namespace regexpToAst
|
||||
|
||||
export const VERSION: number
|
||||
|
||||
export class RegExpParser {
|
||||
pattern: (input: string) => RegExpPattern
|
||||
}
|
||||
|
||||
export interface IRegExpAST {
|
||||
type: string
|
||||
}
|
||||
|
||||
export interface RegExpPattern extends IRegExpAST {
|
||||
type: "Pattern"
|
||||
flags: RegExpFlags
|
||||
value: Disjunction
|
||||
}
|
||||
|
||||
export interface RegExpFlags extends IRegExpAST {
|
||||
type: "Flags"
|
||||
global: boolean
|
||||
ignoreCase: boolean
|
||||
multiLine: boolean
|
||||
unicode: boolean
|
||||
sticky: boolean
|
||||
}
|
||||
|
||||
export interface Disjunction extends IRegExpAST {
|
||||
type: "Disjunction"
|
||||
value: Alternative[]
|
||||
}
|
||||
|
||||
export interface Alternative extends IRegExpAST {
|
||||
type: "Alternative"
|
||||
value: Term[]
|
||||
}
|
||||
|
||||
export type Term = Atom | Assertion
|
||||
|
||||
export interface Assertion extends IRegExpAST {
|
||||
type:
|
||||
| "StartAnchor"
|
||||
| "EndAnchor"
|
||||
| "WordBoundary"
|
||||
| "NonWordBoundary"
|
||||
| "Lookahead"
|
||||
| "NegativeLookahead"
|
||||
|
||||
value?: Disjunction
|
||||
}
|
||||
|
||||
export type Atom = Character | Set | Group | GroupBackReference
|
||||
|
||||
export interface Character extends IRegExpAST {
|
||||
type: "Character"
|
||||
value: number
|
||||
quantifier?: Quantifier
|
||||
}
|
||||
|
||||
export type Range = { from: number; to: number }
|
||||
|
||||
export interface Set extends IRegExpAST {
|
||||
type: "Set"
|
||||
complement: boolean
|
||||
value: (number | Range)[]
|
||||
quantifier?: Quantifier
|
||||
}
|
||||
|
||||
export interface Group extends IRegExpAST {
|
||||
type: "Group"
|
||||
value: Disjunction
|
||||
capturing: boolean
|
||||
idx?: number
|
||||
quantifier?: Quantifier
|
||||
}
|
||||
|
||||
export interface GroupBackReference extends IRegExpAST {
|
||||
type: "GroupBackReference"
|
||||
value: number
|
||||
quantifier?: Quantifier
|
||||
}
|
||||
|
||||
export interface Quantifier extends IRegExpAST {
|
||||
type: "Quantifier"
|
||||
atLeast: number
|
||||
atMost: number
|
||||
greedy: boolean
|
||||
}
|
||||
|
||||
export class BaseRegExpVisitor {
|
||||
/**
|
||||
* The entry point visitor method.
|
||||
* This will dispatch to the specific visitor method.
|
||||
*/
|
||||
visit(node: IRegExpAST)
|
||||
|
||||
/**
|
||||
* The entry point for visiting the children of a node.
|
||||
* Override this to filter the types of children visited
|
||||
* or to support new types of children in extended ASTs.
|
||||
*/
|
||||
visitChildren(node: IRegExpAST)
|
||||
|
||||
/**
|
||||
* The specific visitor methods
|
||||
* Override some of these of create custom visitors.
|
||||
*/
|
||||
visitPattern(node: RegExpPattern)
|
||||
visitFlags(node: RegExpFlags)
|
||||
visitDisjunction(node: Disjunction)
|
||||
visitAlternative(node: Alternative)
|
||||
visitStartAnchor(node: Assertion)
|
||||
visitEndAnchor(node: Assertion)
|
||||
visitWordBoundary(node: Assertion)
|
||||
visitNonWordBoundary(node: Assertion)
|
||||
visitLookahead(node: Assertion)
|
||||
visitNegativeLookahead(node: Assertion)
|
||||
visitCharacter(node: Character)
|
||||
visitSet(node: Set)
|
||||
visitGroup(Node: Group)
|
||||
visitGroupBackReference(Node: GroupBackReference)
|
||||
visitQuantifier(Node: Quantifier)
|
||||
}
|
972
node_modules/regexp-to-ast/lib/regexp-to-ast.js
generated
vendored
Normal file
972
node_modules/regexp-to-ast/lib/regexp-to-ast.js
generated
vendored
Normal file
@@ -0,0 +1,972 @@
|
||||
;(function(root, factory) {
|
||||
// istanbul ignore next
|
||||
if (typeof define === "function" && define.amd) {
|
||||
// istanbul ignore next
|
||||
define([], factory)
|
||||
} else if (typeof module === "object" && module.exports) {
|
||||
module.exports = factory()
|
||||
} else {
|
||||
// istanbul ignore next
|
||||
root.regexpToAst = factory()
|
||||
}
|
||||
})(
|
||||
typeof self !== "undefined"
|
||||
? // istanbul ignore next
|
||||
self
|
||||
: this,
|
||||
function() {
|
||||
// references
|
||||
// https://hackernoon.com/the-madness-of-parsing-real-world-javascript-regexps-d9ee336df983
|
||||
// https://www.ecma-international.org/ecma-262/8.0/index.html#prod-Pattern
|
||||
function RegExpParser() {}
|
||||
|
||||
RegExpParser.prototype.saveState = function() {
|
||||
return {
|
||||
idx: this.idx,
|
||||
input: this.input,
|
||||
groupIdx: this.groupIdx
|
||||
}
|
||||
}
|
||||
|
||||
RegExpParser.prototype.restoreState = function(newState) {
|
||||
this.idx = newState.idx
|
||||
this.input = newState.input
|
||||
this.groupIdx = newState.groupIdx
|
||||
}
|
||||
|
||||
RegExpParser.prototype.pattern = function(input) {
|
||||
// parser state
|
||||
this.idx = 0
|
||||
this.input = input
|
||||
this.groupIdx = 0
|
||||
|
||||
this.consumeChar("/")
|
||||
var value = this.disjunction()
|
||||
this.consumeChar("/")
|
||||
|
||||
var flags = {
|
||||
type: "Flags",
|
||||
global: false,
|
||||
ignoreCase: false,
|
||||
multiLine: false,
|
||||
unicode: false,
|
||||
sticky: false
|
||||
}
|
||||
|
||||
while (this.isRegExpFlag()) {
|
||||
switch (this.popChar()) {
|
||||
case "g":
|
||||
addFlag(flags, "global")
|
||||
break
|
||||
case "i":
|
||||
addFlag(flags, "ignoreCase")
|
||||
break
|
||||
case "m":
|
||||
addFlag(flags, "multiLine")
|
||||
break
|
||||
case "u":
|
||||
addFlag(flags, "unicode")
|
||||
break
|
||||
case "y":
|
||||
addFlag(flags, "sticky")
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if (this.idx !== this.input.length) {
|
||||
throw Error(
|
||||
"Redundant input: " + this.input.substring(this.idx)
|
||||
)
|
||||
}
|
||||
return { type: "Pattern", flags: flags, value: value }
|
||||
}
|
||||
|
||||
RegExpParser.prototype.disjunction = function() {
|
||||
var alts = []
|
||||
alts.push(this.alternative())
|
||||
|
||||
while (this.peekChar() === "|") {
|
||||
this.consumeChar("|")
|
||||
alts.push(this.alternative())
|
||||
}
|
||||
|
||||
return { type: "Disjunction", value: alts }
|
||||
}
|
||||
|
||||
RegExpParser.prototype.alternative = function() {
|
||||
var terms = []
|
||||
|
||||
while (this.isTerm()) {
|
||||
terms.push(this.term())
|
||||
}
|
||||
|
||||
return { type: "Alternative", value: terms }
|
||||
}
|
||||
|
||||
RegExpParser.prototype.term = function() {
|
||||
if (this.isAssertion()) {
|
||||
return this.assertion()
|
||||
} else {
|
||||
return this.atom()
|
||||
}
|
||||
}
|
||||
|
||||
RegExpParser.prototype.assertion = function() {
|
||||
switch (this.popChar()) {
|
||||
case "^":
|
||||
return { type: "StartAnchor" }
|
||||
case "$":
|
||||
return { type: "EndAnchor" }
|
||||
// '\b' or '\B'
|
||||
case "\\":
|
||||
switch (this.popChar()) {
|
||||
case "b":
|
||||
return { type: "WordBoundary" }
|
||||
case "B":
|
||||
return { type: "NonWordBoundary" }
|
||||
}
|
||||
// istanbul ignore next
|
||||
throw Error("Invalid Assertion Escape")
|
||||
// '(?=' or '(?!'
|
||||
case "(":
|
||||
this.consumeChar("?")
|
||||
|
||||
var type
|
||||
switch (this.popChar()) {
|
||||
case "=":
|
||||
type = "Lookahead"
|
||||
break
|
||||
case "!":
|
||||
type = "NegativeLookahead"
|
||||
break
|
||||
}
|
||||
ASSERT_EXISTS(type)
|
||||
|
||||
var disjunction = this.disjunction()
|
||||
|
||||
this.consumeChar(")")
|
||||
|
||||
return { type: type, value: disjunction }
|
||||
}
|
||||
// istanbul ignore next
|
||||
ASSERT_NEVER_REACH_HERE()
|
||||
}
|
||||
|
||||
RegExpParser.prototype.quantifier = function(isBacktracking) {
|
||||
var range
|
||||
switch (this.popChar()) {
|
||||
case "*":
|
||||
range = {
|
||||
atLeast: 0,
|
||||
atMost: Infinity
|
||||
}
|
||||
break
|
||||
case "+":
|
||||
range = {
|
||||
atLeast: 1,
|
||||
atMost: Infinity
|
||||
}
|
||||
break
|
||||
case "?":
|
||||
range = {
|
||||
atLeast: 0,
|
||||
atMost: 1
|
||||
}
|
||||
break
|
||||
case "{":
|
||||
var atLeast = this.integerIncludingZero()
|
||||
switch (this.popChar()) {
|
||||
case "}":
|
||||
range = {
|
||||
atLeast: atLeast,
|
||||
atMost: atLeast
|
||||
}
|
||||
break
|
||||
case ",":
|
||||
var atMost
|
||||
if (this.isDigit()) {
|
||||
atMost = this.integerIncludingZero()
|
||||
range = {
|
||||
atLeast: atLeast,
|
||||
atMost: atMost
|
||||
}
|
||||
} else {
|
||||
range = {
|
||||
atLeast: atLeast,
|
||||
atMost: Infinity
|
||||
}
|
||||
}
|
||||
this.consumeChar("}")
|
||||
break
|
||||
}
|
||||
// throwing exceptions from "ASSERT_EXISTS" during backtracking
|
||||
// causes severe performance degradations
|
||||
if (isBacktracking === true && range === undefined) {
|
||||
return undefined
|
||||
}
|
||||
ASSERT_EXISTS(range)
|
||||
break
|
||||
}
|
||||
|
||||
// throwing exceptions from "ASSERT_EXISTS" during backtracking
|
||||
// causes severe performance degradations
|
||||
if (isBacktracking === true && range === undefined) {
|
||||
return undefined
|
||||
}
|
||||
|
||||
ASSERT_EXISTS(range)
|
||||
|
||||
if (this.peekChar(0) === "?") {
|
||||
this.consumeChar("?")
|
||||
range.greedy = false
|
||||
} else {
|
||||
range.greedy = true
|
||||
}
|
||||
|
||||
range.type = "Quantifier"
|
||||
return range
|
||||
}
|
||||
|
||||
RegExpParser.prototype.atom = function() {
|
||||
var atom
|
||||
switch (this.peekChar()) {
|
||||
case ".":
|
||||
atom = this.dotAll()
|
||||
break
|
||||
case "\\":
|
||||
atom = this.atomEscape()
|
||||
break
|
||||
case "[":
|
||||
atom = this.characterClass()
|
||||
break
|
||||
case "(":
|
||||
atom = this.group()
|
||||
break
|
||||
}
|
||||
|
||||
if (atom === undefined && this.isPatternCharacter()) {
|
||||
atom = this.patternCharacter()
|
||||
}
|
||||
|
||||
ASSERT_EXISTS(atom)
|
||||
|
||||
if (this.isQuantifier()) {
|
||||
atom.quantifier = this.quantifier()
|
||||
}
|
||||
|
||||
return atom
|
||||
}
|
||||
|
||||
RegExpParser.prototype.dotAll = function() {
|
||||
this.consumeChar(".")
|
||||
return {
|
||||
type: "Set",
|
||||
complement: true,
|
||||
value: [cc("\n"), cc("\r"), cc("\u2028"), cc("\u2029")]
|
||||
}
|
||||
}
|
||||
|
||||
RegExpParser.prototype.atomEscape = function() {
|
||||
this.consumeChar("\\")
|
||||
|
||||
switch (this.peekChar()) {
|
||||
case "1":
|
||||
case "2":
|
||||
case "3":
|
||||
case "4":
|
||||
case "5":
|
||||
case "6":
|
||||
case "7":
|
||||
case "8":
|
||||
case "9":
|
||||
return this.decimalEscapeAtom()
|
||||
case "d":
|
||||
case "D":
|
||||
case "s":
|
||||
case "S":
|
||||
case "w":
|
||||
case "W":
|
||||
return this.characterClassEscape()
|
||||
case "f":
|
||||
case "n":
|
||||
case "r":
|
||||
case "t":
|
||||
case "v":
|
||||
return this.controlEscapeAtom()
|
||||
case "c":
|
||||
return this.controlLetterEscapeAtom()
|
||||
case "0":
|
||||
return this.nulCharacterAtom()
|
||||
case "x":
|
||||
return this.hexEscapeSequenceAtom()
|
||||
case "u":
|
||||
return this.regExpUnicodeEscapeSequenceAtom()
|
||||
default:
|
||||
return this.identityEscapeAtom()
|
||||
}
|
||||
}
|
||||
|
||||
RegExpParser.prototype.decimalEscapeAtom = function() {
|
||||
var value = this.positiveInteger()
|
||||
|
||||
return { type: "GroupBackReference", value: value }
|
||||
}
|
||||
|
||||
RegExpParser.prototype.characterClassEscape = function() {
|
||||
var set
|
||||
var complement = false
|
||||
switch (this.popChar()) {
|
||||
case "d":
|
||||
set = digitsCharCodes
|
||||
break
|
||||
case "D":
|
||||
set = digitsCharCodes
|
||||
complement = true
|
||||
break
|
||||
case "s":
|
||||
set = whitespaceCodes
|
||||
break
|
||||
case "S":
|
||||
set = whitespaceCodes
|
||||
complement = true
|
||||
break
|
||||
case "w":
|
||||
set = wordCharCodes
|
||||
break
|
||||
case "W":
|
||||
set = wordCharCodes
|
||||
complement = true
|
||||
break
|
||||
}
|
||||
|
||||
ASSERT_EXISTS(set)
|
||||
|
||||
return { type: "Set", value: set, complement: complement }
|
||||
}
|
||||
|
||||
RegExpParser.prototype.controlEscapeAtom = function() {
|
||||
var escapeCode
|
||||
switch (this.popChar()) {
|
||||
case "f":
|
||||
escapeCode = cc("\f")
|
||||
break
|
||||
case "n":
|
||||
escapeCode = cc("\n")
|
||||
break
|
||||
case "r":
|
||||
escapeCode = cc("\r")
|
||||
break
|
||||
case "t":
|
||||
escapeCode = cc("\t")
|
||||
break
|
||||
case "v":
|
||||
escapeCode = cc("\v")
|
||||
break
|
||||
}
|
||||
ASSERT_EXISTS(escapeCode)
|
||||
|
||||
return { type: "Character", value: escapeCode }
|
||||
}
|
||||
|
||||
RegExpParser.prototype.controlLetterEscapeAtom = function() {
|
||||
this.consumeChar("c")
|
||||
var letter = this.popChar()
|
||||
if (/[a-zA-Z]/.test(letter) === false) {
|
||||
throw Error("Invalid ")
|
||||
}
|
||||
|
||||
var letterCode = letter.toUpperCase().charCodeAt(0) - 64
|
||||
return { type: "Character", value: letterCode }
|
||||
}
|
||||
|
||||
RegExpParser.prototype.nulCharacterAtom = function() {
|
||||
// TODO implement '[lookahead ∉ DecimalDigit]'
|
||||
// TODO: for the deprecated octal escape sequence
|
||||
this.consumeChar("0")
|
||||
return { type: "Character", value: cc("\0") }
|
||||
}
|
||||
|
||||
RegExpParser.prototype.hexEscapeSequenceAtom = function() {
|
||||
this.consumeChar("x")
|
||||
return this.parseHexDigits(2)
|
||||
}
|
||||
|
||||
RegExpParser.prototype.regExpUnicodeEscapeSequenceAtom = function() {
|
||||
this.consumeChar("u")
|
||||
return this.parseHexDigits(4)
|
||||
}
|
||||
|
||||
RegExpParser.prototype.identityEscapeAtom = function() {
|
||||
// TODO: implement "SourceCharacter but not UnicodeIDContinue"
|
||||
// // http://unicode.org/reports/tr31/#Specific_Character_Adjustments
|
||||
var escapedChar = this.popChar()
|
||||
return { type: "Character", value: cc(escapedChar) }
|
||||
}
|
||||
|
||||
RegExpParser.prototype.classPatternCharacterAtom = function() {
|
||||
switch (this.peekChar()) {
|
||||
// istanbul ignore next
|
||||
case "\n":
|
||||
// istanbul ignore next
|
||||
case "\r":
|
||||
// istanbul ignore next
|
||||
case "\u2028":
|
||||
// istanbul ignore next
|
||||
case "\u2029":
|
||||
// istanbul ignore next
|
||||
case "\\":
|
||||
// istanbul ignore next
|
||||
case "]":
|
||||
throw Error("TBD")
|
||||
default:
|
||||
var nextChar = this.popChar()
|
||||
return { type: "Character", value: cc(nextChar) }
|
||||
}
|
||||
}
|
||||
|
||||
RegExpParser.prototype.characterClass = function() {
|
||||
var set = []
|
||||
var complement = false
|
||||
this.consumeChar("[")
|
||||
if (this.peekChar(0) === "^") {
|
||||
this.consumeChar("^")
|
||||
complement = true
|
||||
}
|
||||
|
||||
while (this.isClassAtom()) {
|
||||
var from = this.classAtom()
|
||||
var isFromSingleChar = from.type === "Character"
|
||||
if (isFromSingleChar && this.isRangeDash()) {
|
||||
this.consumeChar("-")
|
||||
var to = this.classAtom()
|
||||
var isToSingleChar = to.type === "Character"
|
||||
|
||||
// a range can only be used when both sides are single characters
|
||||
if (isToSingleChar) {
|
||||
if (to.value < from.value) {
|
||||
throw Error("Range out of order in character class")
|
||||
}
|
||||
set.push({ from: from.value, to: to.value })
|
||||
} else {
|
||||
// literal dash
|
||||
insertToSet(from.value, set)
|
||||
set.push(cc("-"))
|
||||
insertToSet(to.value, set)
|
||||
}
|
||||
} else {
|
||||
insertToSet(from.value, set)
|
||||
}
|
||||
}
|
||||
|
||||
this.consumeChar("]")
|
||||
|
||||
return { type: "Set", complement: complement, value: set }
|
||||
}
|
||||
|
||||
RegExpParser.prototype.classAtom = function() {
|
||||
switch (this.peekChar()) {
|
||||
// istanbul ignore next
|
||||
case "]":
|
||||
// istanbul ignore next
|
||||
case "\n":
|
||||
// istanbul ignore next
|
||||
case "\r":
|
||||
// istanbul ignore next
|
||||
case "\u2028":
|
||||
// istanbul ignore next
|
||||
case "\u2029":
|
||||
throw Error("TBD")
|
||||
case "\\":
|
||||
return this.classEscape()
|
||||
default:
|
||||
return this.classPatternCharacterAtom()
|
||||
}
|
||||
}
|
||||
|
||||
RegExpParser.prototype.classEscape = function() {
|
||||
this.consumeChar("\\")
|
||||
switch (this.peekChar()) {
|
||||
// Matches a backspace.
|
||||
// (Not to be confused with \b word boundary outside characterClass)
|
||||
case "b":
|
||||
this.consumeChar("b")
|
||||
return { type: "Character", value: cc("\u0008") }
|
||||
case "d":
|
||||
case "D":
|
||||
case "s":
|
||||
case "S":
|
||||
case "w":
|
||||
case "W":
|
||||
return this.characterClassEscape()
|
||||
case "f":
|
||||
case "n":
|
||||
case "r":
|
||||
case "t":
|
||||
case "v":
|
||||
return this.controlEscapeAtom()
|
||||
case "c":
|
||||
return this.controlLetterEscapeAtom()
|
||||
case "0":
|
||||
return this.nulCharacterAtom()
|
||||
case "x":
|
||||
return this.hexEscapeSequenceAtom()
|
||||
case "u":
|
||||
return this.regExpUnicodeEscapeSequenceAtom()
|
||||
default:
|
||||
return this.identityEscapeAtom()
|
||||
}
|
||||
}
|
||||
|
||||
RegExpParser.prototype.group = function() {
|
||||
var capturing = true
|
||||
this.consumeChar("(")
|
||||
switch (this.peekChar(0)) {
|
||||
case "?":
|
||||
this.consumeChar("?")
|
||||
this.consumeChar(":")
|
||||
capturing = false
|
||||
break
|
||||
default:
|
||||
this.groupIdx++
|
||||
break
|
||||
}
|
||||
var value = this.disjunction()
|
||||
this.consumeChar(")")
|
||||
|
||||
var groupAst = {
|
||||
type: "Group",
|
||||
capturing: capturing,
|
||||
value: value
|
||||
}
|
||||
|
||||
if (capturing) {
|
||||
groupAst.idx = this.groupIdx
|
||||
}
|
||||
|
||||
return groupAst
|
||||
}
|
||||
|
||||
RegExpParser.prototype.positiveInteger = function() {
|
||||
var number = this.popChar()
|
||||
|
||||
// istanbul ignore next - can't ever get here due to previous lookahead checks
|
||||
// still implementing this error checking in case this ever changes.
|
||||
if (decimalPatternNoZero.test(number) === false) {
|
||||
throw Error("Expecting a positive integer")
|
||||
}
|
||||
|
||||
while (decimalPattern.test(this.peekChar(0))) {
|
||||
number += this.popChar()
|
||||
}
|
||||
|
||||
return parseInt(number, 10)
|
||||
}
|
||||
|
||||
RegExpParser.prototype.integerIncludingZero = function() {
|
||||
var number = this.popChar()
|
||||
if (decimalPattern.test(number) === false) {
|
||||
throw Error("Expecting an integer")
|
||||
}
|
||||
|
||||
while (decimalPattern.test(this.peekChar(0))) {
|
||||
number += this.popChar()
|
||||
}
|
||||
|
||||
return parseInt(number, 10)
|
||||
}
|
||||
|
||||
RegExpParser.prototype.patternCharacter = function() {
|
||||
var nextChar = this.popChar()
|
||||
switch (nextChar) {
|
||||
// istanbul ignore next
|
||||
case "\n":
|
||||
// istanbul ignore next
|
||||
case "\r":
|
||||
// istanbul ignore next
|
||||
case "\u2028":
|
||||
// istanbul ignore next
|
||||
case "\u2029":
|
||||
// istanbul ignore next
|
||||
case "^":
|
||||
// istanbul ignore next
|
||||
case "$":
|
||||
// istanbul ignore next
|
||||
case "\\":
|
||||
// istanbul ignore next
|
||||
case ".":
|
||||
// istanbul ignore next
|
||||
case "*":
|
||||
// istanbul ignore next
|
||||
case "+":
|
||||
// istanbul ignore next
|
||||
case "?":
|
||||
// istanbul ignore next
|
||||
case "(":
|
||||
// istanbul ignore next
|
||||
case ")":
|
||||
// istanbul ignore next
|
||||
case "[":
|
||||
// istanbul ignore next
|
||||
case "|":
|
||||
// istanbul ignore next
|
||||
throw Error("TBD")
|
||||
default:
|
||||
return { type: "Character", value: cc(nextChar) }
|
||||
}
|
||||
}
|
||||
RegExpParser.prototype.isRegExpFlag = function() {
|
||||
switch (this.peekChar(0)) {
|
||||
case "g":
|
||||
case "i":
|
||||
case "m":
|
||||
case "u":
|
||||
case "y":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
RegExpParser.prototype.isRangeDash = function() {
|
||||
return this.peekChar() === "-" && this.isClassAtom(1)
|
||||
}
|
||||
|
||||
RegExpParser.prototype.isDigit = function() {
|
||||
return decimalPattern.test(this.peekChar(0))
|
||||
}
|
||||
|
||||
RegExpParser.prototype.isClassAtom = function(howMuch) {
|
||||
if (howMuch === undefined) {
|
||||
howMuch = 0
|
||||
}
|
||||
|
||||
switch (this.peekChar(howMuch)) {
|
||||
case "]":
|
||||
case "\n":
|
||||
case "\r":
|
||||
case "\u2028":
|
||||
case "\u2029":
|
||||
return false
|
||||
default:
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
RegExpParser.prototype.isTerm = function() {
|
||||
return this.isAtom() || this.isAssertion()
|
||||
}
|
||||
|
||||
RegExpParser.prototype.isAtom = function() {
|
||||
if (this.isPatternCharacter()) {
|
||||
return true
|
||||
}
|
||||
|
||||
switch (this.peekChar(0)) {
|
||||
case ".":
|
||||
case "\\": // atomEscape
|
||||
case "[": // characterClass
|
||||
// TODO: isAtom must be called before isAssertion - disambiguate
|
||||
case "(": // group
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
RegExpParser.prototype.isAssertion = function() {
|
||||
switch (this.peekChar(0)) {
|
||||
case "^":
|
||||
case "$":
|
||||
return true
|
||||
// '\b' or '\B'
|
||||
case "\\":
|
||||
switch (this.peekChar(1)) {
|
||||
case "b":
|
||||
case "B":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
// '(?=' or '(?!'
|
||||
case "(":
|
||||
return (
|
||||
this.peekChar(1) === "?" &&
|
||||
(this.peekChar(2) === "=" || this.peekChar(2) === "!")
|
||||
)
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
RegExpParser.prototype.isQuantifier = function() {
|
||||
var prevState = this.saveState()
|
||||
try {
|
||||
return this.quantifier(true) !== undefined
|
||||
} catch (e) {
|
||||
return false
|
||||
} finally {
|
||||
this.restoreState(prevState)
|
||||
}
|
||||
}
|
||||
|
||||
RegExpParser.prototype.isPatternCharacter = function() {
|
||||
switch (this.peekChar()) {
|
||||
case "^":
|
||||
case "$":
|
||||
case "\\":
|
||||
case ".":
|
||||
case "*":
|
||||
case "+":
|
||||
case "?":
|
||||
case "(":
|
||||
case ")":
|
||||
case "[":
|
||||
case "|":
|
||||
case "/":
|
||||
case "\n":
|
||||
case "\r":
|
||||
case "\u2028":
|
||||
case "\u2029":
|
||||
return false
|
||||
default:
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
RegExpParser.prototype.parseHexDigits = function(howMany) {
|
||||
var hexString = ""
|
||||
for (var i = 0; i < howMany; i++) {
|
||||
var hexChar = this.popChar()
|
||||
if (hexDigitPattern.test(hexChar) === false) {
|
||||
throw Error("Expecting a HexDecimal digits")
|
||||
}
|
||||
hexString += hexChar
|
||||
}
|
||||
var charCode = parseInt(hexString, 16)
|
||||
return { type: "Character", value: charCode }
|
||||
}
|
||||
|
||||
RegExpParser.prototype.peekChar = function(howMuch) {
|
||||
if (howMuch === undefined) {
|
||||
howMuch = 0
|
||||
}
|
||||
return this.input[this.idx + howMuch]
|
||||
}
|
||||
|
||||
RegExpParser.prototype.popChar = function() {
|
||||
var nextChar = this.peekChar(0)
|
||||
this.consumeChar()
|
||||
return nextChar
|
||||
}
|
||||
|
||||
RegExpParser.prototype.consumeChar = function(char) {
|
||||
if (char !== undefined && this.input[this.idx] !== char) {
|
||||
throw Error(
|
||||
"Expected: '" +
|
||||
char +
|
||||
"' but found: '" +
|
||||
this.input[this.idx] +
|
||||
"' at offset: " +
|
||||
this.idx
|
||||
)
|
||||
}
|
||||
|
||||
if (this.idx >= this.input.length) {
|
||||
throw Error("Unexpected end of input")
|
||||
}
|
||||
this.idx++
|
||||
}
|
||||
|
||||
// consts and utilities
|
||||
var hexDigitPattern = /[0-9a-fA-F]/
|
||||
var decimalPattern = /[0-9]/
|
||||
var decimalPatternNoZero = /[1-9]/
|
||||
|
||||
function cc(char) {
|
||||
return char.charCodeAt(0)
|
||||
}
|
||||
|
||||
function insertToSet(item, set) {
|
||||
if (item.length !== undefined) {
|
||||
item.forEach(function(subItem) {
|
||||
set.push(subItem)
|
||||
})
|
||||
} else {
|
||||
set.push(item)
|
||||
}
|
||||
}
|
||||
|
||||
function addFlag(flagObj, flagKey) {
|
||||
if (flagObj[flagKey] === true) {
|
||||
throw "duplicate flag " + flagKey
|
||||
}
|
||||
|
||||
flagObj[flagKey] = true
|
||||
}
|
||||
|
||||
function ASSERT_EXISTS(obj) {
|
||||
// istanbul ignore next
|
||||
if (obj === undefined) {
|
||||
throw Error("Internal Error - Should never get here!")
|
||||
}
|
||||
}
|
||||
|
||||
// istanbul ignore next
|
||||
function ASSERT_NEVER_REACH_HERE() {
|
||||
throw Error("Internal Error - Should never get here!")
|
||||
}
|
||||
|
||||
var i
|
||||
var digitsCharCodes = []
|
||||
for (i = cc("0"); i <= cc("9"); i++) {
|
||||
digitsCharCodes.push(i)
|
||||
}
|
||||
|
||||
var wordCharCodes = [cc("_")].concat(digitsCharCodes)
|
||||
for (i = cc("a"); i <= cc("z"); i++) {
|
||||
wordCharCodes.push(i)
|
||||
}
|
||||
|
||||
for (i = cc("A"); i <= cc("Z"); i++) {
|
||||
wordCharCodes.push(i)
|
||||
}
|
||||
|
||||
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp#character-classes
|
||||
var whitespaceCodes = [
|
||||
cc(" "),
|
||||
cc("\f"),
|
||||
cc("\n"),
|
||||
cc("\r"),
|
||||
cc("\t"),
|
||||
cc("\v"),
|
||||
cc("\t"),
|
||||
cc("\u00a0"),
|
||||
cc("\u1680"),
|
||||
cc("\u2000"),
|
||||
cc("\u2001"),
|
||||
cc("\u2002"),
|
||||
cc("\u2003"),
|
||||
cc("\u2004"),
|
||||
cc("\u2005"),
|
||||
cc("\u2006"),
|
||||
cc("\u2007"),
|
||||
cc("\u2008"),
|
||||
cc("\u2009"),
|
||||
cc("\u200a"),
|
||||
cc("\u2028"),
|
||||
cc("\u2029"),
|
||||
cc("\u202f"),
|
||||
cc("\u205f"),
|
||||
cc("\u3000"),
|
||||
cc("\ufeff")
|
||||
]
|
||||
|
||||
function BaseRegExpVisitor() {}
|
||||
|
||||
BaseRegExpVisitor.prototype.visitChildren = function(node) {
|
||||
for (var key in node) {
|
||||
var child = node[key]
|
||||
/* istanbul ignore else */
|
||||
if (node.hasOwnProperty(key)) {
|
||||
if (child.type !== undefined) {
|
||||
this.visit(child)
|
||||
} else if (Array.isArray(child)) {
|
||||
child.forEach(function(subChild) {
|
||||
this.visit(subChild)
|
||||
}, this)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BaseRegExpVisitor.prototype.visit = function(node) {
|
||||
switch (node.type) {
|
||||
case "Pattern":
|
||||
this.visitPattern(node)
|
||||
break
|
||||
case "Flags":
|
||||
this.visitFlags(node)
|
||||
break
|
||||
case "Disjunction":
|
||||
this.visitDisjunction(node)
|
||||
break
|
||||
case "Alternative":
|
||||
this.visitAlternative(node)
|
||||
break
|
||||
case "StartAnchor":
|
||||
this.visitStartAnchor(node)
|
||||
break
|
||||
case "EndAnchor":
|
||||
this.visitEndAnchor(node)
|
||||
break
|
||||
case "WordBoundary":
|
||||
this.visitWordBoundary(node)
|
||||
break
|
||||
case "NonWordBoundary":
|
||||
this.visitNonWordBoundary(node)
|
||||
break
|
||||
case "Lookahead":
|
||||
this.visitLookahead(node)
|
||||
break
|
||||
case "NegativeLookahead":
|
||||
this.visitNegativeLookahead(node)
|
||||
break
|
||||
case "Character":
|
||||
this.visitCharacter(node)
|
||||
break
|
||||
case "Set":
|
||||
this.visitSet(node)
|
||||
break
|
||||
case "Group":
|
||||
this.visitGroup(node)
|
||||
break
|
||||
case "GroupBackReference":
|
||||
this.visitGroupBackReference(node)
|
||||
break
|
||||
case "Quantifier":
|
||||
this.visitQuantifier(node)
|
||||
break
|
||||
}
|
||||
|
||||
this.visitChildren(node)
|
||||
}
|
||||
|
||||
BaseRegExpVisitor.prototype.visitPattern = function(node) {}
|
||||
|
||||
BaseRegExpVisitor.prototype.visitFlags = function(node) {}
|
||||
|
||||
BaseRegExpVisitor.prototype.visitDisjunction = function(node) {}
|
||||
|
||||
BaseRegExpVisitor.prototype.visitAlternative = function(node) {}
|
||||
|
||||
// Assertion
|
||||
BaseRegExpVisitor.prototype.visitStartAnchor = function(node) {}
|
||||
|
||||
BaseRegExpVisitor.prototype.visitEndAnchor = function(node) {}
|
||||
|
||||
BaseRegExpVisitor.prototype.visitWordBoundary = function(node) {}
|
||||
|
||||
BaseRegExpVisitor.prototype.visitNonWordBoundary = function(node) {}
|
||||
|
||||
BaseRegExpVisitor.prototype.visitLookahead = function(node) {}
|
||||
|
||||
BaseRegExpVisitor.prototype.visitNegativeLookahead = function(node) {}
|
||||
|
||||
// atoms
|
||||
BaseRegExpVisitor.prototype.visitCharacter = function(node) {}
|
||||
|
||||
BaseRegExpVisitor.prototype.visitSet = function(node) {}
|
||||
|
||||
BaseRegExpVisitor.prototype.visitGroup = function(node) {}
|
||||
|
||||
BaseRegExpVisitor.prototype.visitGroupBackReference = function(node) {}
|
||||
|
||||
BaseRegExpVisitor.prototype.visitQuantifier = function(node) {}
|
||||
|
||||
return {
|
||||
RegExpParser: RegExpParser,
|
||||
BaseRegExpVisitor: BaseRegExpVisitor,
|
||||
VERSION: "0.4.0"
|
||||
}
|
||||
}
|
||||
)
|
Reference in New Issue
Block a user