similarityDS()
The Dice-Sørensen coefficient used as a measure of string similarity.
function similarityDS(strA, strB) {
function bigrams(str) {
return Array.from(
{ length: str.length - 1 },
(_, i) => str.substring(i, i + 2)
).reduce((res, it) => {
res[it] = (res[it] || 0) + 1;
return res;
}, {});
}
const countsA = bigrams((strA || '').toLowerCase());
const countsB = bigrams((strB || '').toLowerCase());
const common = Object.keys(countsA).map(
it => Math.min(countsA[it], countsB[it] || 0)
);
const sum = (a, b) => a + b;
const sizeA = Object.values(countsA).reduce(sum, 0);
const sizeB = Object.values(countsB).reduce(sum, 0);
return 2 * common.reduce(sum, 0) / (sizeA + sizeB);
}