Files
Zos/Skills/@be/node_modules/stemmer/index.js

325 lines
6.6 KiB
JavaScript
Raw Normal View History

'use strict';
/*
* Define few standard suffix manipulations.
*/
var step2list,
step3list;
step2list = {
'ational': 'ate',
'tional': 'tion',
'enci': 'ence',
'anci': 'ance',
'izer': 'ize',
'bli': 'ble',
'alli': 'al',
'entli': 'ent',
'eli': 'e',
'ousli': 'ous',
'ization': 'ize',
'ation': 'ate',
'ator': 'ate',
'alism': 'al',
'iveness': 'ive',
'fulness': 'ful',
'ousness': 'ous',
'aliti': 'al',
'iviti': 'ive',
'biliti': 'ble',
'logi': 'log'
};
step3list = {
'icate': 'ic',
'ative': '',
'alize': 'al',
'iciti': 'ic',
'ical': 'ic',
'ful': '',
'ness': ''
};
/*
* Define few consonant-vowel sequences.
*/
var consonant,
vowel,
consonantSequence,
vowelSequence,
EXPRESSION_MEASURE_GREATER_THAN_0,
EXPRESSION_MEASURE_EQUAL_TO_1,
EXPRESSION_MEASURE_GREATER_THAN_1,
EXPRESSION_VOWEL_IN_STEM,
EXPRESSION_CONSONANT_LIKE;
consonant = '[^aeiou]';
vowel = '[aeiouy]';
consonantSequence = '(' + consonant + '[^aeiouy]*)';
vowelSequence = '(' + vowel + '[aeiou]*)';
EXPRESSION_MEASURE_GREATER_THAN_0 = new RegExp(
'^' + consonantSequence + '?' + vowelSequence + consonantSequence
);
EXPRESSION_MEASURE_EQUAL_TO_1 = new RegExp(
'^' + consonantSequence + '?' + vowelSequence + consonantSequence +
vowelSequence + '?$'
);
EXPRESSION_MEASURE_GREATER_THAN_1 = new RegExp(
'^' + consonantSequence + '?' + '(' + vowelSequence +
consonantSequence + '){2,}'
);
EXPRESSION_VOWEL_IN_STEM = new RegExp(
'^' + consonantSequence + '?' + vowel
);
EXPRESSION_CONSONANT_LIKE = new RegExp(
'^' + consonantSequence + vowel + '[^aeiouwxy]$'
);
/*
* Define few exception-expressions.
*/
var EXPRESSION_SUFFIX_LL,
EXPRESSION_SUFFIX_E,
EXPRESSION_SUFFIX_Y,
EXPRESSION_SUFFIX_ION,
EXPRESSION_SUFFIX_ED_OR_ING,
EXPRESSION_SUFFIX_AT_OR_BL_OR_IZ,
EXPRESSION_SUFFIX_EED,
EXPRESSION_SUFFIX_S,
EXPRESSION_SUFFIX_SSES_OR_IES,
EXPRESSION_SUFFIX_MULTI_CONSONANT_LIKE,
EXPRESSION_STEP_2,
EXPRESSION_STEP_3,
EXPRESSION_STEP_4;
EXPRESSION_SUFFIX_LL = /ll$/;
EXPRESSION_SUFFIX_E = /^(.+?)e$/;
EXPRESSION_SUFFIX_Y = /^(.+?)y$/;
EXPRESSION_SUFFIX_ION = /^(.+?(s|t))(ion)$/;
EXPRESSION_SUFFIX_ED_OR_ING = /^(.+?)(ed|ing)$/;
EXPRESSION_SUFFIX_AT_OR_BL_OR_IZ = /(at|bl|iz)$/;
EXPRESSION_SUFFIX_EED = /^(.+?)eed$/;
EXPRESSION_SUFFIX_S = /^.+?[^s]s$/;
EXPRESSION_SUFFIX_SSES_OR_IES = /^.+?(ss|i)es$/;
EXPRESSION_SUFFIX_MULTI_CONSONANT_LIKE = /([^aeiouylsz])\1$/;
EXPRESSION_STEP_2 = new RegExp(
'^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|' +
'ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|' +
'biliti|logi)$'
);
EXPRESSION_STEP_3 = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/;
EXPRESSION_STEP_4 = new RegExp(
'^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|' +
'iti|ous|ive|ize)$'
);
/*
* Detect the character code for `y`.
*/
var CHARACTER_CODE_Y;
CHARACTER_CODE_Y = 'y'.charCodeAt(0);
/**
* Stem `value`.
*
* @param {string} value
* @return {string} - Stem corresponding to `value`.
*/
function stemmer(value) {
var firstCharacterWasLowerCaseY,
match;
value = String(value).toLowerCase();
/*
* Exit early.
*/
if (value.length < 3) {
return value;
}
/*
* Detect initial `y`, make sure it never
* matches.
*/
if (value.charCodeAt(0) === CHARACTER_CODE_Y) {
firstCharacterWasLowerCaseY = true;
value = 'Y' + value.substr(1);
}
/*
* Step 1a.
*/
if (EXPRESSION_SUFFIX_SSES_OR_IES.test(value)) {
/*
* Remove last two characters.
*/
value = value.substr(0, value.length - 2);
} else if (EXPRESSION_SUFFIX_S.test(value)) {
/*
* Remove last character.
*/
value = value.substr(0, value.length - 1);
}
/*
* Step 1b.
*/
if (match = EXPRESSION_SUFFIX_EED.exec(value)) {
if (EXPRESSION_MEASURE_GREATER_THAN_0.test(match[1])) {
/*
* Remove last character.
*/
value = value.substr(0, value.length - 1);
}
} else if (
(match = EXPRESSION_SUFFIX_ED_OR_ING.exec(value)) &&
EXPRESSION_VOWEL_IN_STEM.test(match[1])
) {
value = match[1];
if (EXPRESSION_SUFFIX_AT_OR_BL_OR_IZ.test(value)) {
/*
* Append `e`.
*/
value += 'e';
} else if (
EXPRESSION_SUFFIX_MULTI_CONSONANT_LIKE.test(value)
) {
/*
* Remove last character.
*/
value = value.substr(0, value.length - 1);
} else if (EXPRESSION_CONSONANT_LIKE.test(value)) {
/*
* Append `e`.
*/
value += 'e';
}
}
/*
* Step 1c.
*/
if (
(match = EXPRESSION_SUFFIX_Y.exec(value)) &&
EXPRESSION_VOWEL_IN_STEM.test(match[1])
) {
/*
* Remove suffixing `y` and append `i`.
*/
value = match[1] + 'i';
}
/*
* Step 2.
*/
if (
(match = EXPRESSION_STEP_2.exec(value)) &&
EXPRESSION_MEASURE_GREATER_THAN_0.test(match[1])
) {
value = match[1] + step2list[match[2]];
}
/*
* Step 3.
*/
if (
(match = EXPRESSION_STEP_3.exec(value)) &&
EXPRESSION_MEASURE_GREATER_THAN_0.test(match[1])
) {
value = match[1] + step3list[match[2]];
}
/*
* Step 4.
*/
if (match = EXPRESSION_STEP_4.exec(value)) {
if (EXPRESSION_MEASURE_GREATER_THAN_1.test(match[1])) {
value = match[1];
}
} else if (
(match = EXPRESSION_SUFFIX_ION.exec(value)) &&
EXPRESSION_MEASURE_GREATER_THAN_1.test(match[1])
) {
value = match[1];
}
/*
* Step 5.
*/
if (
(match = EXPRESSION_SUFFIX_E.exec(value)) &&
(
EXPRESSION_MEASURE_GREATER_THAN_1.test(match[1]) ||
(
EXPRESSION_MEASURE_EQUAL_TO_1.test(match[1]) &&
!EXPRESSION_CONSONANT_LIKE.test(match[1])
)
)
) {
value = match[1];
}
if (
EXPRESSION_SUFFIX_LL.test(value) &&
EXPRESSION_MEASURE_GREATER_THAN_1.test(value)
) {
value = value.substr(0, value.length - 1);
}
/*
* Turn initial `Y` back to `y`.
*/
if (firstCharacterWasLowerCaseY) {
value = 'y' + value.substr(1);
}
return value;
}
/*
* Expose `stemmer`.
*/
module.exports = stemmer;