Sunday, November 14, 2021

Split keep function. Lookbehind, match, loop

Common code:
function escapeRegExp(text) {
    return text.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, "\\$&");
}
Lookbehind:
String.prototype.splitKeep = function (tokens) {
    const escaped = escapeRegExp(tokens);
    return this.split(new RegExp(`(?=[${escaped}])|(?<=[${escaped}])`, "g"));
};
Match. Safari does not support lookbehind, use match approach:
String.prototype.splitKeep = function (tokens) {
    const tokensEscaped = tokens
        .split("")
        .map((s) => escapeRegExp(s))
        .join("|");

    const wordMatch = `[^${escapeRegExp(tokens)}]+`;
    return this.match(new RegExp(tokensEscaped + "|" + wordMatch, "g"));
};
Loop-based approach:
String.prototype.splitKeep = function (tokens) {
    let toPush = "";
    const splitList = [];
    for (const c of this) {
        if (tokens.includes(c)) {
            if (toPush.length > 0) {
                splitList.push(toPush);
            }
            splitList.push(c);
            toPush = "";
            continue;
        }

        toPush += c;
    }

    if (toPush.length > 0) {
        splitList.push(toPush);
    }

    return splitList;
};
Benchmark code:
console.time("test");
for (let i = 0; i < 10000; ++i) {
    "pin1yin1".splitKeep("12345 ");
}
console.timeEnd("test");
Results:
% node splitkeep-lookbehind.js
test: 19.35ms
% node splitkeep-lookbehind.js
test: 18.951ms
% node splitkeep-match.js       
test: 54.635ms
% node splitkeep-match.js
test: 51.998ms
% node splitkeep-loop.js 
test: 14.647ms
% node splitkeep-loop.js
test: 13.035ms

No comments:

Post a Comment