201 lines
7.6 KiB
JavaScript
201 lines
7.6 KiB
JavaScript
"use strict";
|
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
exports.longestCommonPrefix = longestCommonPrefix;
|
|
exports.longestCommonSuffix = longestCommonSuffix;
|
|
exports.replacePrefix = replacePrefix;
|
|
exports.replaceSuffix = replaceSuffix;
|
|
exports.removePrefix = removePrefix;
|
|
exports.removeSuffix = removeSuffix;
|
|
exports.maximumOverlap = maximumOverlap;
|
|
exports.hasOnlyWinLineEndings = hasOnlyWinLineEndings;
|
|
exports.hasOnlyUnixLineEndings = hasOnlyUnixLineEndings;
|
|
exports.segment = segment;
|
|
exports.trailingWs = trailingWs;
|
|
exports.leadingWs = leadingWs;
|
|
exports.leadingAndTrailingWs = leadingAndTrailingWs;
|
|
function longestCommonPrefix(str1, str2) {
|
|
var i;
|
|
for (i = 0; i < str1.length && i < str2.length; i++) {
|
|
if (str1[i] != str2[i]) {
|
|
return str1.slice(0, i);
|
|
}
|
|
}
|
|
return str1.slice(0, i);
|
|
}
|
|
function longestCommonSuffix(str1, str2) {
|
|
var i;
|
|
// Unlike longestCommonPrefix, we need a special case to handle all scenarios
|
|
// where we return the empty string since str1.slice(-0) will return the
|
|
// entire string.
|
|
if (!str1 || !str2 || str1[str1.length - 1] != str2[str2.length - 1]) {
|
|
return '';
|
|
}
|
|
for (i = 0; i < str1.length && i < str2.length; i++) {
|
|
if (str1[str1.length - (i + 1)] != str2[str2.length - (i + 1)]) {
|
|
return str1.slice(-i);
|
|
}
|
|
}
|
|
return str1.slice(-i);
|
|
}
|
|
function replacePrefix(string, oldPrefix, newPrefix) {
|
|
if (string.slice(0, oldPrefix.length) != oldPrefix) {
|
|
throw Error("string ".concat(JSON.stringify(string), " doesn't start with prefix ").concat(JSON.stringify(oldPrefix), "; this is a bug"));
|
|
}
|
|
return newPrefix + string.slice(oldPrefix.length);
|
|
}
|
|
function replaceSuffix(string, oldSuffix, newSuffix) {
|
|
if (!oldSuffix) {
|
|
return string + newSuffix;
|
|
}
|
|
if (string.slice(-oldSuffix.length) != oldSuffix) {
|
|
throw Error("string ".concat(JSON.stringify(string), " doesn't end with suffix ").concat(JSON.stringify(oldSuffix), "; this is a bug"));
|
|
}
|
|
return string.slice(0, -oldSuffix.length) + newSuffix;
|
|
}
|
|
function removePrefix(string, oldPrefix) {
|
|
return replacePrefix(string, oldPrefix, '');
|
|
}
|
|
function removeSuffix(string, oldSuffix) {
|
|
return replaceSuffix(string, oldSuffix, '');
|
|
}
|
|
function maximumOverlap(string1, string2) {
|
|
return string2.slice(0, overlapCount(string1, string2));
|
|
}
|
|
// Nicked from https://stackoverflow.com/a/60422853/1709587
|
|
function overlapCount(a, b) {
|
|
// Deal with cases where the strings differ in length
|
|
var startA = 0;
|
|
if (a.length > b.length) {
|
|
startA = a.length - b.length;
|
|
}
|
|
var endB = b.length;
|
|
if (a.length < b.length) {
|
|
endB = a.length;
|
|
}
|
|
// Create a back-reference for each index
|
|
// that should be followed in case of a mismatch.
|
|
// We only need B to make these references:
|
|
var map = Array(endB);
|
|
var k = 0; // Index that lags behind j
|
|
map[0] = 0;
|
|
for (var j = 1; j < endB; j++) {
|
|
if (b[j] == b[k]) {
|
|
map[j] = map[k]; // skip over the same character (optional optimisation)
|
|
}
|
|
else {
|
|
map[j] = k;
|
|
}
|
|
while (k > 0 && b[j] != b[k]) {
|
|
k = map[k];
|
|
}
|
|
if (b[j] == b[k]) {
|
|
k++;
|
|
}
|
|
}
|
|
// Phase 2: use these references while iterating over A
|
|
k = 0;
|
|
for (var i = startA; i < a.length; i++) {
|
|
while (k > 0 && a[i] != b[k]) {
|
|
k = map[k];
|
|
}
|
|
if (a[i] == b[k]) {
|
|
k++;
|
|
}
|
|
}
|
|
return k;
|
|
}
|
|
/**
|
|
* Returns true if the string consistently uses Windows line endings.
|
|
*/
|
|
function hasOnlyWinLineEndings(string) {
|
|
return string.includes('\r\n') && !string.startsWith('\n') && !string.match(/[^\r]\n/);
|
|
}
|
|
/**
|
|
* Returns true if the string consistently uses Unix line endings.
|
|
*/
|
|
function hasOnlyUnixLineEndings(string) {
|
|
return !string.includes('\r\n') && string.includes('\n');
|
|
}
|
|
/**
|
|
* Split a string into segments using a word segmenter, merging consecutive
|
|
* segments if they are both whitespace segments. Whitespace segments can
|
|
* appear adjacent to one another for two reasons:
|
|
* - newlines always get their own segment
|
|
* - where a diacritic is attached to a whitespace character in the text, the
|
|
* segment ends after the diacritic, so e.g. " \u0300 " becomes two segments.
|
|
* This function therefore runs the segmenter's .segment() method and then
|
|
* merges consecutive segments of whitespace into a single part.
|
|
*/
|
|
function segment(string, segmenter) {
|
|
var parts = [];
|
|
for (var _i = 0, _a = Array.from(segmenter.segment(string)); _i < _a.length; _i++) {
|
|
var segmentObj = _a[_i];
|
|
var segment_1 = segmentObj.segment;
|
|
if (parts.length && (/\s/).test(parts[parts.length - 1]) && (/\s/).test(segment_1)) {
|
|
parts[parts.length - 1] += segment_1;
|
|
}
|
|
else {
|
|
parts.push(segment_1);
|
|
}
|
|
}
|
|
return parts;
|
|
}
|
|
// The functions below take a `segmenter` argument so that, when called from
|
|
// diffWords when it is using a segmenter, they can use a notion of what
|
|
// constitutes "whitespace" that is consistent with the segmenter.
|
|
//
|
|
// USUALLY this will be identical to the result of the non-segmenter-based
|
|
// logic, but it differs in at least one case: when whitespace characters are
|
|
// modified by diacritics. A word segmenter considers these diacritics to be
|
|
// part of the whitespace, whereas our non-segmenter-based logic does not.
|
|
//
|
|
// Because the segmenter-based approach necessarily requires segmenting the
|
|
// entire string, we offer a leadingAndTrailingWs function to allow getting the
|
|
// whitespace prefix AND whitespace suffix with a single call to the segmenter,
|
|
// for efficiency's sake.
|
|
function trailingWs(string, segmenter) {
|
|
if (segmenter) {
|
|
return leadingAndTrailingWs(string, segmenter)[1];
|
|
}
|
|
// Yes, this looks overcomplicated and dumb - why not replace the whole function with
|
|
// return string.match(/\s*$/)[0]
|
|
// you ask? Because:
|
|
// 1. the trap described at https://markamery.com/blog/quadratic-time-regexes/ would mean doing
|
|
// this would cause this function to take O(n²) time in the worst case (specifically when
|
|
// there is a massive run of NON-TRAILING whitespace in `string`), and
|
|
// 2. the fix proposed in the same blog post, of using a negative lookbehind, is incompatible
|
|
// with old Safari versions that we'd like to not break if possible (see
|
|
// https://github.com/kpdecker/jsdiff/pull/550)
|
|
// It feels absurd to do this with an explicit loop instead of a regex, but I really can't see a
|
|
// better way that doesn't result in broken behaviour.
|
|
var i;
|
|
for (i = string.length - 1; i >= 0; i--) {
|
|
if (!string[i].match(/\s/)) {
|
|
break;
|
|
}
|
|
}
|
|
return string.substring(i + 1);
|
|
}
|
|
function leadingWs(string, segmenter) {
|
|
if (segmenter) {
|
|
return leadingAndTrailingWs(string, segmenter)[0];
|
|
}
|
|
// Thankfully the annoying considerations described in trailingWs don't apply here:
|
|
var match = string.match(/^\s*/);
|
|
return match ? match[0] : '';
|
|
}
|
|
function leadingAndTrailingWs(string, segmenter) {
|
|
if (!segmenter) {
|
|
return [leadingWs(string), trailingWs(string)];
|
|
}
|
|
if (segmenter.resolvedOptions().granularity != 'word') {
|
|
throw new Error('The segmenter passed must have a granularity of "word"');
|
|
}
|
|
var segments = segment(string, segmenter);
|
|
var firstSeg = segments[0];
|
|
var lastSeg = segments[segments.length - 1];
|
|
var head = (/\s/).test(firstSeg) ? firstSeg : '';
|
|
var tail = (/\s/).test(lastSeg) ? lastSeg : '';
|
|
return [head, tail];
|
|
}
|