import { every, flatten, forEach, has, isEmpty, map, reduce } from "lodash-es"; import { possiblePathsFrom } from "./interpreter.js"; import { RestWalker } from "./rest.js"; import { tokenStructuredMatcher, tokenStructuredMatcherNoCategories, } from "../../scan/tokens.js"; import { Alternation, Alternative as AlternativeGAST, GAstVisitor, Option, Repetition, RepetitionMandatory, RepetitionMandatoryWithSeparator, RepetitionWithSeparator, } from "@chevrotain/gast"; export var PROD_TYPE; (function (PROD_TYPE) { PROD_TYPE[PROD_TYPE["OPTION"] = 0] = "OPTION"; PROD_TYPE[PROD_TYPE["REPETITION"] = 1] = "REPETITION"; PROD_TYPE[PROD_TYPE["REPETITION_MANDATORY"] = 2] = "REPETITION_MANDATORY"; PROD_TYPE[PROD_TYPE["REPETITION_MANDATORY_WITH_SEPARATOR"] = 3] = "REPETITION_MANDATORY_WITH_SEPARATOR"; PROD_TYPE[PROD_TYPE["REPETITION_WITH_SEPARATOR"] = 4] = "REPETITION_WITH_SEPARATOR"; PROD_TYPE[PROD_TYPE["ALTERNATION"] = 5] = "ALTERNATION"; })(PROD_TYPE || (PROD_TYPE = {})); export function getProdType(prod) { /* istanbul ignore else */ if (prod instanceof Option || prod === "Option") { return PROD_TYPE.OPTION; } else if (prod instanceof Repetition || prod === "Repetition") { return PROD_TYPE.REPETITION; } else if (prod instanceof RepetitionMandatory || prod === "RepetitionMandatory") { return PROD_TYPE.REPETITION_MANDATORY; } else if (prod instanceof RepetitionMandatoryWithSeparator || prod === "RepetitionMandatoryWithSeparator") { return PROD_TYPE.REPETITION_MANDATORY_WITH_SEPARATOR; } else if (prod instanceof RepetitionWithSeparator || prod === "RepetitionWithSeparator") { return PROD_TYPE.REPETITION_WITH_SEPARATOR; } else if (prod instanceof Alternation || prod === "Alternation") { return PROD_TYPE.ALTERNATION; } else { throw Error("non exhaustive match"); } } export function getLookaheadPaths(options) { const { occurrence, rule, prodType, maxLookahead } = options; const type = getProdType(prodType); if (type === PROD_TYPE.ALTERNATION) { return getLookaheadPathsForOr(occurrence, rule, maxLookahead); } else { return getLookaheadPathsForOptionalProd(occurrence, rule, type, maxLookahead); } } export function buildLookaheadFuncForOr(occurrence, ruleGrammar, maxLookahead, hasPredicates, dynamicTokensEnabled, laFuncBuilder) { const lookAheadPaths = getLookaheadPathsForOr(occurrence, ruleGrammar, maxLookahead); const tokenMatcher = areTokenCategoriesNotUsed(lookAheadPaths) ? tokenStructuredMatcherNoCategories : tokenStructuredMatcher; return laFuncBuilder(lookAheadPaths, hasPredicates, tokenMatcher, dynamicTokensEnabled); } /** * When dealing with an Optional production (OPTION/MANY/2nd iteration of AT_LEAST_ONE/...) we need to compare * the lookahead "inside" the production and the lookahead immediately "after" it in the same top level rule (context free). * * Example: given a production: * ABC(DE)?DF * * The optional '(DE)?' should only be entered if we see 'DE'. a single Token 'D' is not sufficient to distinguish between the two * alternatives. * * @returns A Lookahead function which will return true IFF the parser should parse the Optional production. */ export function buildLookaheadFuncForOptionalProd(occurrence, ruleGrammar, k, dynamicTokensEnabled, prodType, lookaheadBuilder) { const lookAheadPaths = getLookaheadPathsForOptionalProd(occurrence, ruleGrammar, prodType, k); const tokenMatcher = areTokenCategoriesNotUsed(lookAheadPaths) ? tokenStructuredMatcherNoCategories : tokenStructuredMatcher; return lookaheadBuilder(lookAheadPaths[0], tokenMatcher, dynamicTokensEnabled); } export function buildAlternativesLookAheadFunc(alts, hasPredicates, tokenMatcher, dynamicTokensEnabled) { const numOfAlts = alts.length; const areAllOneTokenLookahead = every(alts, (currAlt) => { return every(currAlt, (currPath) => { return currPath.length === 1; }); }); // This version takes into account the predicates as well. if (hasPredicates) { /** * @returns {number} - The chosen alternative index */ return function (orAlts) { // unfortunately the predicates must be extracted every single time // as they cannot be cached due to references to parameters(vars) which are no longer valid. // note that in the common case of no predicates, no cpu time will be wasted on this (see else block) const predicates = map(orAlts, (currAlt) => currAlt.GATE); for (let t = 0; t < numOfAlts; t++) { const currAlt = alts[t]; const currNumOfPaths = currAlt.length; const currPredicate = predicates[t]; if (currPredicate !== undefined && currPredicate.call(this) === false) { // if the predicate does not match there is no point in checking the paths continue; } nextPath: for (let j = 0; j < currNumOfPaths; j++) { const currPath = currAlt[j]; const currPathLength = currPath.length; for (let i = 0; i < currPathLength; i++) { const nextToken = this.LA(i + 1); if (tokenMatcher(nextToken, currPath[i]) === false) { // mismatch in current path // try the next pth continue nextPath; } } // found a full path that matches. // this will also work for an empty ALT as the loop will be skipped return t; } // none of the paths for the current alternative matched // try the next alternative } // none of the alternatives could be matched return undefined; }; } else if (areAllOneTokenLookahead && !dynamicTokensEnabled) { // optimized (common) case of all the lookaheads paths requiring only // a single token lookahead. These Optimizations cannot work if dynamically defined Tokens are used. const singleTokenAlts = map(alts, (currAlt) => { return flatten(currAlt); }); const choiceToAlt = reduce(singleTokenAlts, (result, currAlt, idx) => { forEach(currAlt, (currTokType) => { if (!has(result, currTokType.tokenTypeIdx)) { result[currTokType.tokenTypeIdx] = idx; } forEach(currTokType.categoryMatches, (currExtendingType) => { if (!has(result, currExtendingType)) { result[currExtendingType] = idx; } }); }); return result; }, {}); /** * @returns {number} - The chosen alternative index */ return function () { const nextToken = this.LA(1); return choiceToAlt[nextToken.tokenTypeIdx]; }; } else { // optimized lookahead without needing to check the predicates at all. // this causes code duplication which is intentional to improve performance. /** * @returns {number} - The chosen alternative index */ return function () { for (let t = 0; t < numOfAlts; t++) { const currAlt = alts[t]; const currNumOfPaths = currAlt.length; nextPath: for (let j = 0; j < currNumOfPaths; j++) { const currPath = currAlt[j]; const currPathLength = currPath.length; for (let i = 0; i < currPathLength; i++) { const nextToken = this.LA(i + 1); if (tokenMatcher(nextToken, currPath[i]) === false) { // mismatch in current path // try the next pth continue nextPath; } } // found a full path that matches. // this will also work for an empty ALT as the loop will be skipped return t; } // none of the paths for the current alternative matched // try the next alternative } // none of the alternatives could be matched return undefined; }; } } export function buildSingleAlternativeLookaheadFunction(alt, tokenMatcher, dynamicTokensEnabled) { const areAllOneTokenLookahead = every(alt, (currPath) => { return currPath.length === 1; }); const numOfPaths = alt.length; // optimized (common) case of all the lookaheads paths requiring only // a single token lookahead. if (areAllOneTokenLookahead && !dynamicTokensEnabled) { const singleTokensTypes = flatten(alt); if (singleTokensTypes.length === 1 && isEmpty(singleTokensTypes[0].categoryMatches)) { const expectedTokenType = singleTokensTypes[0]; const expectedTokenUniqueKey = expectedTokenType.tokenTypeIdx; return function () { return this.LA(1).tokenTypeIdx === expectedTokenUniqueKey; }; } else { const choiceToAlt = reduce(singleTokensTypes, (result, currTokType, idx) => { result[currTokType.tokenTypeIdx] = true; forEach(currTokType.categoryMatches, (currExtendingType) => { result[currExtendingType] = true; }); return result; }, []); return function () { const nextToken = this.LA(1); return choiceToAlt[nextToken.tokenTypeIdx] === true; }; } } else { return function () { nextPath: for (let j = 0; j < numOfPaths; j++) { const currPath = alt[j]; const currPathLength = currPath.length; for (let i = 0; i < currPathLength; i++) { const nextToken = this.LA(i + 1); if (tokenMatcher(nextToken, currPath[i]) === false) { // mismatch in current path // try the next pth continue nextPath; } } // found a full path that matches. return true; } // none of the paths matched return false; }; } } class RestDefinitionFinderWalker extends RestWalker { constructor(topProd, targetOccurrence, targetProdType) { super(); this.topProd = topProd; this.targetOccurrence = targetOccurrence; this.targetProdType = targetProdType; } startWalking() { this.walk(this.topProd); return this.restDef; } checkIsTarget(node, expectedProdType, currRest, prevRest) { if (node.idx === this.targetOccurrence && this.targetProdType === expectedProdType) { this.restDef = currRest.concat(prevRest); return true; } // performance optimization, do not iterate over the entire Grammar ast after we have found the target return false; } walkOption(optionProd, currRest, prevRest) { if (!this.checkIsTarget(optionProd, PROD_TYPE.OPTION, currRest, prevRest)) { super.walkOption(optionProd, currRest, prevRest); } } walkAtLeastOne(atLeastOneProd, currRest, prevRest) { if (!this.checkIsTarget(atLeastOneProd, PROD_TYPE.REPETITION_MANDATORY, currRest, prevRest)) { super.walkOption(atLeastOneProd, currRest, prevRest); } } walkAtLeastOneSep(atLeastOneSepProd, currRest, prevRest) { if (!this.checkIsTarget(atLeastOneSepProd, PROD_TYPE.REPETITION_MANDATORY_WITH_SEPARATOR, currRest, prevRest)) { super.walkOption(atLeastOneSepProd, currRest, prevRest); } } walkMany(manyProd, currRest, prevRest) { if (!this.checkIsTarget(manyProd, PROD_TYPE.REPETITION, currRest, prevRest)) { super.walkOption(manyProd, currRest, prevRest); } } walkManySep(manySepProd, currRest, prevRest) { if (!this.checkIsTarget(manySepProd, PROD_TYPE.REPETITION_WITH_SEPARATOR, currRest, prevRest)) { super.walkOption(manySepProd, currRest, prevRest); } } } /** * Returns the definition of a target production in a top level level rule. */ class InsideDefinitionFinderVisitor extends GAstVisitor { constructor(targetOccurrence, targetProdType, targetRef) { super(); this.targetOccurrence = targetOccurrence; this.targetProdType = targetProdType; this.targetRef = targetRef; this.result = []; } checkIsTarget(node, expectedProdName) { if (node.idx === this.targetOccurrence && this.targetProdType === expectedProdName && (this.targetRef === undefined || node === this.targetRef)) { this.result = node.definition; } } visitOption(node) { this.checkIsTarget(node, PROD_TYPE.OPTION); } visitRepetition(node) { this.checkIsTarget(node, PROD_TYPE.REPETITION); } visitRepetitionMandatory(node) { this.checkIsTarget(node, PROD_TYPE.REPETITION_MANDATORY); } visitRepetitionMandatoryWithSeparator(node) { this.checkIsTarget(node, PROD_TYPE.REPETITION_MANDATORY_WITH_SEPARATOR); } visitRepetitionWithSeparator(node) { this.checkIsTarget(node, PROD_TYPE.REPETITION_WITH_SEPARATOR); } visitAlternation(node) { this.checkIsTarget(node, PROD_TYPE.ALTERNATION); } } function initializeArrayOfArrays(size) { const result = new Array(size); for (let i = 0; i < size; i++) { result[i] = []; } return result; } /** * A sort of hash function between a Path in the grammar and a string. * Note that this returns multiple "hashes" to support the scenario of token categories. * - A single path with categories may match multiple **actual** paths. */ function pathToHashKeys(path) { let keys = [""]; for (let i = 0; i < path.length; i++) { const tokType = path[i]; const longerKeys = []; for (let j = 0; j < keys.length; j++) { const currShorterKey = keys[j]; longerKeys.push(currShorterKey + "_" + tokType.tokenTypeIdx); for (let t = 0; t < tokType.categoryMatches.length; t++) { const categoriesKeySuffix = "_" + tokType.categoryMatches[t]; longerKeys.push(currShorterKey + categoriesKeySuffix); } } keys = longerKeys; } return keys; } /** * Imperative style due to being called from a hot spot */ function isUniquePrefixHash(altKnownPathsKeys, searchPathKeys, idx) { for (let currAltIdx = 0; currAltIdx < altKnownPathsKeys.length; currAltIdx++) { // We only want to test vs the other alternatives if (currAltIdx === idx) { continue; } const otherAltKnownPathsKeys = altKnownPathsKeys[currAltIdx]; for (let searchIdx = 0; searchIdx < searchPathKeys.length; searchIdx++) { const searchKey = searchPathKeys[searchIdx]; if (otherAltKnownPathsKeys[searchKey] === true) { return false; } } } // None of the SearchPathKeys were found in any of the other alternatives return true; } export function lookAheadSequenceFromAlternatives(altsDefs, k) { const partialAlts = map(altsDefs, (currAlt) => possiblePathsFrom([currAlt], 1)); const finalResult = initializeArrayOfArrays(partialAlts.length); const altsHashes = map(partialAlts, (currAltPaths) => { const dict = {}; forEach(currAltPaths, (item) => { const keys = pathToHashKeys(item.partialPath); forEach(keys, (currKey) => { dict[currKey] = true; }); }); return dict; }); let newData = partialAlts; // maxLookahead loop for (let pathLength = 1; pathLength <= k; pathLength++) { const currDataset = newData; newData = initializeArrayOfArrays(currDataset.length); // alternatives loop for (let altIdx = 0; altIdx < currDataset.length; altIdx++) { const currAltPathsAndSuffixes = currDataset[altIdx]; // paths in current alternative loop for (let currPathIdx = 0; currPathIdx < currAltPathsAndSuffixes.length; currPathIdx++) { const currPathPrefix = currAltPathsAndSuffixes[currPathIdx].partialPath; const suffixDef = currAltPathsAndSuffixes[currPathIdx].suffixDef; const prefixKeys = pathToHashKeys(currPathPrefix); const isUnique = isUniquePrefixHash(altsHashes, prefixKeys, altIdx); // End of the line for this path. if (isUnique || isEmpty(suffixDef) || currPathPrefix.length === k) { const currAltResult = finalResult[altIdx]; // TODO: Can we implement a containsPath using Maps/Dictionaries? if (containsPath(currAltResult, currPathPrefix) === false) { currAltResult.push(currPathPrefix); // Update all new keys for the current path. for (let j = 0; j < prefixKeys.length; j++) { const currKey = prefixKeys[j]; altsHashes[altIdx][currKey] = true; } } } // Expand longer paths else { const newPartialPathsAndSuffixes = possiblePathsFrom(suffixDef, pathLength + 1, currPathPrefix); newData[altIdx] = newData[altIdx].concat(newPartialPathsAndSuffixes); // Update keys for new known paths forEach(newPartialPathsAndSuffixes, (item) => { const prefixKeys = pathToHashKeys(item.partialPath); forEach(prefixKeys, (key) => { altsHashes[altIdx][key] = true; }); }); } } } } return finalResult; } export function getLookaheadPathsForOr(occurrence, ruleGrammar, k, orProd) { const visitor = new InsideDefinitionFinderVisitor(occurrence, PROD_TYPE.ALTERNATION, orProd); ruleGrammar.accept(visitor); return lookAheadSequenceFromAlternatives(visitor.result, k); } export function getLookaheadPathsForOptionalProd(occurrence, ruleGrammar, prodType, k) { const insideDefVisitor = new InsideDefinitionFinderVisitor(occurrence, prodType); ruleGrammar.accept(insideDefVisitor); const insideDef = insideDefVisitor.result; const afterDefWalker = new RestDefinitionFinderWalker(ruleGrammar, occurrence, prodType); const afterDef = afterDefWalker.startWalking(); const insideFlat = new AlternativeGAST({ definition: insideDef }); const afterFlat = new AlternativeGAST({ definition: afterDef }); return lookAheadSequenceFromAlternatives([insideFlat, afterFlat], k); } export function containsPath(alternative, searchPath) { compareOtherPath: for (let i = 0; i < alternative.length; i++) { const otherPath = alternative[i]; if (otherPath.length !== searchPath.length) { continue; } for (let j = 0; j < otherPath.length; j++) { const searchTok = searchPath[j]; const otherTok = otherPath[j]; const matchingTokens = searchTok === otherTok || otherTok.categoryMatchesMap[searchTok.tokenTypeIdx] !== undefined; if (matchingTokens === false) { continue compareOtherPath; } } return true; } return false; } export function isStrictPrefixOfPath(prefix, other) { return (prefix.length < other.length && every(prefix, (tokType, idx) => { const otherTokType = other[idx]; return (tokType === otherTokType || otherTokType.categoryMatchesMap[tokType.tokenTypeIdx]); })); } export function areTokenCategoriesNotUsed(lookAheadPaths) { return every(lookAheadPaths, (singleAltPaths) => every(singleAltPaths, (singlePath) => every(singlePath, (token) => isEmpty(token.categoryMatches)))); } //# sourceMappingURL=lookahead.js.map