Files
my_site/node_modules/diff/libcjs/util/string.js
2026-03-24 14:30:59 +08:00

201 lines
7.6 KiB
JavaScript

"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.longestCommonPrefix = longestCommonPrefix;
exports.longestCommonSuffix = longestCommonSuffix;
exports.replacePrefix = replacePrefix;
exports.replaceSuffix = replaceSuffix;
exports.removePrefix = removePrefix;
exports.removeSuffix = removeSuffix;
exports.maximumOverlap = maximumOverlap;
exports.hasOnlyWinLineEndings = hasOnlyWinLineEndings;
exports.hasOnlyUnixLineEndings = hasOnlyUnixLineEndings;
exports.segment = segment;
exports.trailingWs = trailingWs;
exports.leadingWs = leadingWs;
exports.leadingAndTrailingWs = leadingAndTrailingWs;
function longestCommonPrefix(str1, str2) {
var i;
for (i = 0; i < str1.length && i < str2.length; i++) {
if (str1[i] != str2[i]) {
return str1.slice(0, i);
}
}
return str1.slice(0, i);
}
function longestCommonSuffix(str1, str2) {
var i;
// Unlike longestCommonPrefix, we need a special case to handle all scenarios
// where we return the empty string since str1.slice(-0) will return the
// entire string.
if (!str1 || !str2 || str1[str1.length - 1] != str2[str2.length - 1]) {
return '';
}
for (i = 0; i < str1.length && i < str2.length; i++) {
if (str1[str1.length - (i + 1)] != str2[str2.length - (i + 1)]) {
return str1.slice(-i);
}
}
return str1.slice(-i);
}
function replacePrefix(string, oldPrefix, newPrefix) {
if (string.slice(0, oldPrefix.length) != oldPrefix) {
throw Error("string ".concat(JSON.stringify(string), " doesn't start with prefix ").concat(JSON.stringify(oldPrefix), "; this is a bug"));
}
return newPrefix + string.slice(oldPrefix.length);
}
function replaceSuffix(string, oldSuffix, newSuffix) {
if (!oldSuffix) {
return string + newSuffix;
}
if (string.slice(-oldSuffix.length) != oldSuffix) {
throw Error("string ".concat(JSON.stringify(string), " doesn't end with suffix ").concat(JSON.stringify(oldSuffix), "; this is a bug"));
}
return string.slice(0, -oldSuffix.length) + newSuffix;
}
function removePrefix(string, oldPrefix) {
return replacePrefix(string, oldPrefix, '');
}
function removeSuffix(string, oldSuffix) {
return replaceSuffix(string, oldSuffix, '');
}
function maximumOverlap(string1, string2) {
return string2.slice(0, overlapCount(string1, string2));
}
// Nicked from https://stackoverflow.com/a/60422853/1709587
function overlapCount(a, b) {
// Deal with cases where the strings differ in length
var startA = 0;
if (a.length > b.length) {
startA = a.length - b.length;
}
var endB = b.length;
if (a.length < b.length) {
endB = a.length;
}
// Create a back-reference for each index
// that should be followed in case of a mismatch.
// We only need B to make these references:
var map = Array(endB);
var k = 0; // Index that lags behind j
map[0] = 0;
for (var j = 1; j < endB; j++) {
if (b[j] == b[k]) {
map[j] = map[k]; // skip over the same character (optional optimisation)
}
else {
map[j] = k;
}
while (k > 0 && b[j] != b[k]) {
k = map[k];
}
if (b[j] == b[k]) {
k++;
}
}
// Phase 2: use these references while iterating over A
k = 0;
for (var i = startA; i < a.length; i++) {
while (k > 0 && a[i] != b[k]) {
k = map[k];
}
if (a[i] == b[k]) {
k++;
}
}
return k;
}
/**
* Returns true if the string consistently uses Windows line endings.
*/
function hasOnlyWinLineEndings(string) {
return string.includes('\r\n') && !string.startsWith('\n') && !string.match(/[^\r]\n/);
}
/**
* Returns true if the string consistently uses Unix line endings.
*/
function hasOnlyUnixLineEndings(string) {
return !string.includes('\r\n') && string.includes('\n');
}
/**
* Split a string into segments using a word segmenter, merging consecutive
* segments if they are both whitespace segments. Whitespace segments can
* appear adjacent to one another for two reasons:
* - newlines always get their own segment
* - where a diacritic is attached to a whitespace character in the text, the
* segment ends after the diacritic, so e.g. " \u0300 " becomes two segments.
* This function therefore runs the segmenter's .segment() method and then
* merges consecutive segments of whitespace into a single part.
*/
function segment(string, segmenter) {
var parts = [];
for (var _i = 0, _a = Array.from(segmenter.segment(string)); _i < _a.length; _i++) {
var segmentObj = _a[_i];
var segment_1 = segmentObj.segment;
if (parts.length && (/\s/).test(parts[parts.length - 1]) && (/\s/).test(segment_1)) {
parts[parts.length - 1] += segment_1;
}
else {
parts.push(segment_1);
}
}
return parts;
}
// The functions below take a `segmenter` argument so that, when called from
// diffWords when it is using a segmenter, they can use a notion of what
// constitutes "whitespace" that is consistent with the segmenter.
//
// USUALLY this will be identical to the result of the non-segmenter-based
// logic, but it differs in at least one case: when whitespace characters are
// modified by diacritics. A word segmenter considers these diacritics to be
// part of the whitespace, whereas our non-segmenter-based logic does not.
//
// Because the segmenter-based approach necessarily requires segmenting the
// entire string, we offer a leadingAndTrailingWs function to allow getting the
// whitespace prefix AND whitespace suffix with a single call to the segmenter,
// for efficiency's sake.
function trailingWs(string, segmenter) {
if (segmenter) {
return leadingAndTrailingWs(string, segmenter)[1];
}
// Yes, this looks overcomplicated and dumb - why not replace the whole function with
// return string.match(/\s*$/)[0]
// you ask? Because:
// 1. the trap described at https://markamery.com/blog/quadratic-time-regexes/ would mean doing
// this would cause this function to take O(n²) time in the worst case (specifically when
// there is a massive run of NON-TRAILING whitespace in `string`), and
// 2. the fix proposed in the same blog post, of using a negative lookbehind, is incompatible
// with old Safari versions that we'd like to not break if possible (see
// https://github.com/kpdecker/jsdiff/pull/550)
// It feels absurd to do this with an explicit loop instead of a regex, but I really can't see a
// better way that doesn't result in broken behaviour.
var i;
for (i = string.length - 1; i >= 0; i--) {
if (!string[i].match(/\s/)) {
break;
}
}
return string.substring(i + 1);
}
function leadingWs(string, segmenter) {
if (segmenter) {
return leadingAndTrailingWs(string, segmenter)[0];
}
// Thankfully the annoying considerations described in trailingWs don't apply here:
var match = string.match(/^\s*/);
return match ? match[0] : '';
}
function leadingAndTrailingWs(string, segmenter) {
if (!segmenter) {
return [leadingWs(string), trailingWs(string)];
}
if (segmenter.resolvedOptions().granularity != 'word') {
throw new Error('The segmenter passed must have a granularity of "word"');
}
var segments = segment(string, segmenter);
var firstSeg = segments[0];
var lastSeg = segments[segments.length - 1];
var head = (/\s/).test(firstSeg) ? firstSeg : '';
var tail = (/\s/).test(lastSeg) ? lastSeg : '';
return [head, tail];
}