File: //proc/1991220/root/usr/share/nodejs/puka/src/internal/parse.js
import { ShellStringText } from '../ShellStringText';
import { ShellStringUnquoted } from '../ShellStringUnquoted';
import { shellStringSemicolon } from '../shellStringSemicolon';
import { execFrom, sticky } from './regex-utils';
import { memoize } from './utils';
export const PLACEHOLDER = {};
export const parse = memoize(templateSpans => {
// These are the token types our DSL can recognize. Their values won't escape
// this function.
const TOKEN_TEXT = 0;
const TOKEN_QUOTE = 1;
const TOKEN_SEMI = 2;
const TOKEN_UNQUOTED = 3;
const TOKEN_SPACE = 4;
const TOKEN_REDIRECT = 5;
const result = [];
let placeholderCount = 0;
let prefix = null;
let onlyPrefixOnce = false;
let contents = [];
let quote = 0;
const lastSpan = templateSpans.length - 1;
for (let spanIndex = 0; spanIndex <= lastSpan; spanIndex++) {
const templateSpan = templateSpans[spanIndex];
const posEnd = templateSpan.length;
let tokenStart = 0;
if (spanIndex) {
placeholderCount++;
contents.push(PLACEHOLDER);
}
// For each span, we first do a recognizing pass in which we use regular
// expressions to identify the positions of tokens in the text, and then
// a second pass that actually splits the text into the minimum number of
// substrings necessary.
const recognized = []; // [type1, index1, type2, index2...]
let firstWordBreak = -1;
let lastWordBreak = -1;
{
let pos = 0, match;
while (pos < posEnd) {
if (quote) {
if (
match = execFrom(quote === CHAR_SQUO ? reQuotation1 : reQuotation2,
templateSpan, pos)
) {
recognized.push(TOKEN_TEXT, pos);
pos += match[0].length;
}
if (pos < posEnd) {
recognized.push(TOKEN_QUOTE, pos++);
quote = 0;
}
} else {
if (match = execFrom(reRedirectOrSpace, templateSpan, pos)) {
firstWordBreak < 0 && (firstWordBreak = pos);
lastWordBreak = pos;
recognized.push(match[1] ? TOKEN_REDIRECT : TOKEN_SPACE, pos);
pos += match[0].length;
}
if (match = execFrom(reText, templateSpan, pos)) {
const setBreaks = match[1] != null;
setBreaks && firstWordBreak < 0 && (firstWordBreak = pos);
recognized.push(setBreaks ? TOKEN_UNQUOTED : TOKEN_TEXT, pos);
pos += match[0].length;
setBreaks && (lastWordBreak = pos);
}
const char = templateSpan.charCodeAt(pos);
if (char === CHAR_SEMI) {
firstWordBreak < 0 && (firstWordBreak = pos);
recognized.push(TOKEN_SEMI, pos++);
lastWordBreak = pos;
} else if (char === CHAR_SQUO || char === CHAR_DQUO) {
recognized.push(TOKEN_QUOTE, pos++);
quote = char;
}
}
}
}
// Word breaks are only important if they separate words with placeholders,
// so we can ignore the first/last break if this is the first/last span.
spanIndex === 0 && (firstWordBreak = -1);
spanIndex === lastSpan && (lastWordBreak = posEnd);
// Here begins the second pass mentioned above. This loop runs one more
// iteration than there are tokens in recognized, because it handles tokens
// on a one-iteration delay; hence the i <= iEnd instead of i < iEnd.
const iEnd = recognized.length;
for (let i = 0, type = -1; i <= iEnd; i += 2) {
let typeNext = -1, pos;
if (i === iEnd) {
pos = posEnd;
} else {
typeNext = recognized[i];
pos = recognized[i + 1];
// If the next token is space or redirect, but there's another word
// break in this span, then we can handle that token the same way we
// would handle unquoted text because it isn't being attached to a
// placeholder.
typeNext >= TOKEN_SPACE
&& pos !== lastWordBreak
&& (typeNext = TOKEN_UNQUOTED);
}
const breakHere = pos === firstWordBreak || pos === lastWordBreak;
if (pos && (breakHere || typeNext !== type)) {
let value = type === TOKEN_QUOTE ? null
: type === TOKEN_SEMI ? shellStringSemicolon
: templateSpan.substring(tokenStart, pos);
if (type >= TOKEN_SEMI) {
// This branch handles semicolons, unquoted text, spaces, and
// redirects. shellStringSemicolon is already a formatSymbol object;
// the rest need to be wrapped.
type === TOKEN_SEMI || (value = new ShellStringUnquoted(value));
// We don't need to check placeholderCount here like we do below;
// that's only relevant during the first word break of the span, and
// because this iteration of the loop is processing the token that
// was checked for breaks in the previous iteration, it will have
// already been handled. For the same reason, prefix is guaranteed to
// be null.
if (contents.length) {
result.push(new ShellStringText(contents, null));
contents = [];
}
// Only spaces and redirects become prefixes, but not if they've been
// rewritten to unquoted above.
if (type >= TOKEN_SPACE) {
prefix = value;
onlyPrefixOnce = type === TOKEN_SPACE;
} else {
result.push(value);
}
} else {
contents.push(value);
}
tokenStart = pos;
}
if (breakHere) {
if (placeholderCount) {
result.push({ contents, placeholderCount, prefix, onlyPrefixOnce });
} else {
// There's no prefix to handle in this branch; a prefix prior to this
// span would mean placeholderCount > 0, and a prefix in this span
// can't be created because spaces and redirects get rewritten to
// unquoted before the last word break.
contents.length && result.push(new ShellStringText(contents, null));
}
placeholderCount = 0; prefix = null; onlyPrefixOnce = false;
contents = [];
}
type = typeNext;
}
}
if (quote) {
throw new SyntaxError(
`String is missing a ${String.fromCharCode(quote)} character`);
}
return result;
});
const CHAR_SEMI = ';'.charCodeAt();
const CHAR_SQUO = "'".charCodeAt();
const CHAR_DQUO = '"'.charCodeAt();
const reQuotation1 = sticky("[^']+");
const reQuotation2 = sticky('[^"]+');
const reText = sticky('[^\\s"#$&\'();<>\\\\`|]+|([#$&()\\\\`|]+)');
const reRedirectOrSpace = sticky('(\\s*\\d*[<>]+\\s*)|\\s+');