5 * This script contains the language-specific data used by searchtools.js,
6 * namely the list of stopwords, stemmer, scorer and splitter.
8 * :copyright: Copyright 2007-2021 by the Sphinx team, see AUTHORS.
9 * :license: BSD, see LICENSE for details.
13 var stopwords = ["a","and","are","as","at","be","but","by","for","if","in","into","is","it","near","no","not","of","on","or","such","that","the","their","then","there","these","they","this","to","was","will","with"];
16 /* Non-minified version is copied as a separate JS file, is available */
21 var Stemmer = function() {
57 var c = "[^aeiou]"; // consonant
58 var v = "[aeiouy]"; // vowel
59 var C = c + "[^aeiouy]*"; // consonant sequence
60 var V = v + "[aeiou]*"; // vowel sequence
62 var mgr0 = "^(" + C + ")?" + V + C; // [C]VC... is m>0
63 var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1
64 var mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1
65 var s_v = "^(" + C + ")?" + v; // vowel in stem
67 this.stemWord = function (w) {
81 firstch = w.substr(0,1);
83 w = firstch.toUpperCase() + w.substr(1);
86 re = /^(.+?)(ss|i)es$/;
87 re2 = /^(.+?)([^s])s$/;
90 w = w.replace(re,"$1$2");
92 w = w.replace(re2,"$1$2");
96 re2 = /^(.+?)(ed|ing)$/;
99 re = new RegExp(mgr0);
100 if (re.test(fp[1])) {
102 w = w.replace(re,"");
105 else if (re2.test(w)) {
106 var fp = re2.exec(w);
108 re2 = new RegExp(s_v);
109 if (re2.test(stem)) {
112 re3 = new RegExp("([^aeiouylsz])\\1$");
113 re4 = new RegExp("^" + C + v + "[^aeiouwxy]$");
116 else if (re3.test(w)) {
118 w = w.replace(re,"");
120 else if (re4.test(w))
130 re = new RegExp(s_v);
136 re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/;
141 re = new RegExp(mgr0);
143 w = stem + step2list[suffix];
147 re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/;
152 re = new RegExp(mgr0);
154 w = stem + step3list[suffix];
158 re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/;
159 re2 = /^(.+?)(s|t)(ion)$/;
163 re = new RegExp(mgr1);
167 else if (re2.test(w)) {
168 var fp = re2.exec(w);
169 stem = fp[1] + fp[2];
170 re2 = new RegExp(mgr1);
180 re = new RegExp(mgr1);
181 re2 = new RegExp(meq1);
182 re3 = new RegExp("^" + C + v + "[^aeiouwxy]$");
183 if (re.test(stem) || (re2.test(stem) && !(re3.test(stem))))
187 re2 = new RegExp(mgr1);
188 if (re.test(w) && re2.test(w)) {
190 w = w.replace(re,"");
193 // and turn initial Y back to y
195 w = firstch.toLowerCase() + w.substr(1);
203 var splitChars = (function() {
205 var singles = [96, 180, 187, 191, 215, 247, 749, 885, 903, 907, 909, 930, 1014, 1648,
206 1748, 1809, 2416, 2473, 2481, 2526, 2601, 2609, 2612, 2615, 2653, 2702,
207 2706, 2729, 2737, 2740, 2857, 2865, 2868, 2910, 2928, 2948, 2961, 2971,
208 2973, 3085, 3089, 3113, 3124, 3213, 3217, 3241, 3252, 3295, 3341, 3345,
209 3369, 3506, 3516, 3633, 3715, 3721, 3736, 3744, 3748, 3750, 3756, 3761,
210 3781, 3912, 4239, 4347, 4681, 4695, 4697, 4745, 4785, 4799, 4801, 4823,
211 4881, 5760, 5901, 5997, 6313, 7405, 8024, 8026, 8028, 8030, 8117, 8125,
212 8133, 8181, 8468, 8485, 8487, 8489, 8494, 8527, 11311, 11359, 11687, 11695,
213 11703, 11711, 11719, 11727, 11735, 12448, 12539, 43010, 43014, 43019, 43587,
214 43696, 43713, 64286, 64297, 64311, 64317, 64319, 64322, 64325, 65141];
215 var i, j, start, end;
216 for (i = 0; i < singles.length; i++) {
217 result[singles[i]] = true;
219 var ranges = [[0, 47], [58, 64], [91, 94], [123, 169], [171, 177], [182, 184], [706, 709],
220 [722, 735], [741, 747], [751, 879], [888, 889], [894, 901], [1154, 1161],
221 [1318, 1328], [1367, 1368], [1370, 1376], [1416, 1487], [1515, 1519], [1523, 1568],
222 [1611, 1631], [1642, 1645], [1750, 1764], [1767, 1773], [1789, 1790], [1792, 1807],
223 [1840, 1868], [1958, 1968], [1970, 1983], [2027, 2035], [2038, 2041], [2043, 2047],
224 [2070, 2073], [2075, 2083], [2085, 2087], [2089, 2307], [2362, 2364], [2366, 2383],
225 [2385, 2391], [2402, 2405], [2419, 2424], [2432, 2436], [2445, 2446], [2449, 2450],
226 [2483, 2485], [2490, 2492], [2494, 2509], [2511, 2523], [2530, 2533], [2546, 2547],
227 [2554, 2564], [2571, 2574], [2577, 2578], [2618, 2648], [2655, 2661], [2672, 2673],
228 [2677, 2692], [2746, 2748], [2750, 2767], [2769, 2783], [2786, 2789], [2800, 2820],
229 [2829, 2830], [2833, 2834], [2874, 2876], [2878, 2907], [2914, 2917], [2930, 2946],
230 [2955, 2957], [2966, 2968], [2976, 2978], [2981, 2983], [2987, 2989], [3002, 3023],
231 [3025, 3045], [3059, 3076], [3130, 3132], [3134, 3159], [3162, 3167], [3170, 3173],
232 [3184, 3191], [3199, 3204], [3258, 3260], [3262, 3293], [3298, 3301], [3312, 3332],
233 [3386, 3388], [3390, 3423], [3426, 3429], [3446, 3449], [3456, 3460], [3479, 3481],
234 [3518, 3519], [3527, 3584], [3636, 3647], [3655, 3663], [3674, 3712], [3717, 3718],
235 [3723, 3724], [3726, 3731], [3752, 3753], [3764, 3772], [3774, 3775], [3783, 3791],
236 [3802, 3803], [3806, 3839], [3841, 3871], [3892, 3903], [3949, 3975], [3980, 4095],
237 [4139, 4158], [4170, 4175], [4182, 4185], [4190, 4192], [4194, 4196], [4199, 4205],
238 [4209, 4212], [4226, 4237], [4250, 4255], [4294, 4303], [4349, 4351], [4686, 4687],
239 [4702, 4703], [4750, 4751], [4790, 4791], [4806, 4807], [4886, 4887], [4955, 4968],
240 [4989, 4991], [5008, 5023], [5109, 5120], [5741, 5742], [5787, 5791], [5867, 5869],
241 [5873, 5887], [5906, 5919], [5938, 5951], [5970, 5983], [6001, 6015], [6068, 6102],
242 [6104, 6107], [6109, 6111], [6122, 6127], [6138, 6159], [6170, 6175], [6264, 6271],
243 [6315, 6319], [6390, 6399], [6429, 6469], [6510, 6511], [6517, 6527], [6572, 6592],
244 [6600, 6607], [6619, 6655], [6679, 6687], [6741, 6783], [6794, 6799], [6810, 6822],
245 [6824, 6916], [6964, 6980], [6988, 6991], [7002, 7042], [7073, 7085], [7098, 7167],
246 [7204, 7231], [7242, 7244], [7294, 7400], [7410, 7423], [7616, 7679], [7958, 7959],
247 [7966, 7967], [8006, 8007], [8014, 8015], [8062, 8063], [8127, 8129], [8141, 8143],
248 [8148, 8149], [8156, 8159], [8173, 8177], [8189, 8303], [8306, 8307], [8314, 8318],
249 [8330, 8335], [8341, 8449], [8451, 8454], [8456, 8457], [8470, 8472], [8478, 8483],
250 [8506, 8507], [8512, 8516], [8522, 8525], [8586, 9311], [9372, 9449], [9472, 10101],
251 [10132, 11263], [11493, 11498], [11503, 11516], [11518, 11519], [11558, 11567],
252 [11622, 11630], [11632, 11647], [11671, 11679], [11743, 11822], [11824, 12292],
253 [12296, 12320], [12330, 12336], [12342, 12343], [12349, 12352], [12439, 12444],
254 [12544, 12548], [12590, 12592], [12687, 12689], [12694, 12703], [12728, 12783],
255 [12800, 12831], [12842, 12880], [12896, 12927], [12938, 12976], [12992, 13311],
256 [19894, 19967], [40908, 40959], [42125, 42191], [42238, 42239], [42509, 42511],
257 [42540, 42559], [42592, 42593], [42607, 42622], [42648, 42655], [42736, 42774],
258 [42784, 42785], [42889, 42890], [42893, 43002], [43043, 43055], [43062, 43071],
259 [43124, 43137], [43188, 43215], [43226, 43249], [43256, 43258], [43260, 43263],
260 [43302, 43311], [43335, 43359], [43389, 43395], [43443, 43470], [43482, 43519],
261 [43561, 43583], [43596, 43599], [43610, 43615], [43639, 43641], [43643, 43647],
262 [43698, 43700], [43703, 43704], [43710, 43711], [43715, 43738], [43742, 43967],
263 [44003, 44015], [44026, 44031], [55204, 55215], [55239, 55242], [55292, 55295],
264 [57344, 63743], [64046, 64047], [64110, 64111], [64218, 64255], [64263, 64274],
265 [64280, 64284], [64434, 64466], [64830, 64847], [64912, 64913], [64968, 65007],
266 [65020, 65135], [65277, 65295], [65306, 65312], [65339, 65344], [65371, 65381],
267 [65471, 65473], [65480, 65481], [65488, 65489], [65496, 65497]];
268 for (i = 0; i < ranges.length; i++) {
269 start = ranges[i][0];
271 for (j = start; j <= end; j++) {
278 function splitQuery(query) {
281 for (var i = 0; i < query.length; i++) {
282 if (splitChars[query.charCodeAt(i)]) {
284 result.push(query.slice(start, i));
287 } else if (start === -1) {
292 result.push(query.slice(start));