1/* 2 * language_data.js 3 * ~~~~~~~~~~~~~~~~ 4 * 5 * This script contains the language-specific data used by searchtools.js, 6 * namely the list of stopwords, stemmer, scorer and splitter. 7 * 8 * :copyright: Copyright 2007-2021 by the Sphinx team, see AUTHORS. 9 * :license: BSD, see LICENSE for details. 10 * 11 */ 12 13var stopwords = ["a","and","are","as","at","be","but","by","for","if","in","into","is","it","near","no","not","of","on","or","such","that","the","their","then","there","these","they","this","to","was","will","with"]; 14 15 16/* Non-minified version is copied as a separate JS file, is available */ 17 18/** 19 * Porter Stemmer 20 */ 21var Stemmer = function() { 22 23 var step2list = { 24 ational: 'ate', 25 tional: 'tion', 26 enci: 'ence', 27 anci: 'ance', 28 izer: 'ize', 29 bli: 'ble', 30 alli: 'al', 31 entli: 'ent', 32 eli: 'e', 33 ousli: 'ous', 34 ization: 'ize', 35 ation: 'ate', 36 ator: 'ate', 37 alism: 'al', 38 iveness: 'ive', 39 fulness: 'ful', 40 ousness: 'ous', 41 aliti: 'al', 42 iviti: 'ive', 43 biliti: 'ble', 44 logi: 'log' 45 }; 46 47 var step3list = { 48 icate: 'ic', 49 ative: '', 50 alize: 'al', 51 iciti: 'ic', 52 ical: 'ic', 53 ful: '', 54 ness: '' 55 }; 56 57 var c = "[^aeiou]"; // consonant 58 var v = "[aeiouy]"; // vowel 59 var C = c + "[^aeiouy]*"; // consonant sequence 60 var V = v + "[aeiou]*"; // vowel sequence 61 62 var mgr0 = "^(" + C + ")?" + V + C; // [C]VC... is m>0 63 var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1 64 var mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1 65 var s_v = "^(" + C + ")?" + v; // vowel in stem 66 67 this.stemWord = function (w) { 68 var stem; 69 var suffix; 70 var firstch; 71 var origword = w; 72 73 if (w.length < 3) 74 return w; 75 76 var re; 77 var re2; 78 var re3; 79 var re4; 80 81 firstch = w.substr(0,1); 82 if (firstch == "y") 83 w = firstch.toUpperCase() + w.substr(1); 84 85 // Step 1a 86 re = /^(.+?)(ss|i)es$/; 87 re2 = /^(.+?)([^s])s$/; 88 89 if (re.test(w)) 90 w = w.replace(re,"$1$2"); 91 else if (re2.test(w)) 92 w = w.replace(re2,"$1$2"); 93 94 // Step 1b 95 re = /^(.+?)eed$/; 96 re2 = /^(.+?)(ed|ing)$/; 97 if (re.test(w)) { 98 var fp = re.exec(w); 99 re = new RegExp(mgr0); 100 if (re.test(fp[1])) { 101 re = /.$/; 102 w = w.replace(re,""); 103 } 104 } 105 else if (re2.test(w)) { 106 var fp = re2.exec(w); 107 stem = fp[1]; 108 re2 = new RegExp(s_v); 109 if (re2.test(stem)) { 110 w = stem; 111 re2 = /(at|bl|iz)$/; 112 re3 = new RegExp("([^aeiouylsz])\\1$"); 113 re4 = new RegExp("^" + C + v + "[^aeiouwxy]$"); 114 if (re2.test(w)) 115 w = w + "e"; 116 else if (re3.test(w)) { 117 re = /.$/; 118 w = w.replace(re,""); 119 } 120 else if (re4.test(w)) 121 w = w + "e"; 122 } 123 } 124 125 // Step 1c 126 re = /^(.+?)y$/; 127 if (re.test(w)) { 128 var fp = re.exec(w); 129 stem = fp[1]; 130 re = new RegExp(s_v); 131 if (re.test(stem)) 132 w = stem + "i"; 133 } 134 135 // Step 2 136 re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/; 137 if (re.test(w)) { 138 var fp = re.exec(w); 139 stem = fp[1]; 140 suffix = fp[2]; 141 re = new RegExp(mgr0); 142 if (re.test(stem)) 143 w = stem + step2list[suffix]; 144 } 145 146 // Step 3 147 re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/; 148 if (re.test(w)) { 149 var fp = re.exec(w); 150 stem = fp[1]; 151 suffix = fp[2]; 152 re = new RegExp(mgr0); 153 if (re.test(stem)) 154 w = stem + step3list[suffix]; 155 } 156 157 // Step 4 158 re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/; 159 re2 = /^(.+?)(s|t)(ion)$/; 160 if (re.test(w)) { 161 var fp = re.exec(w); 162 stem = fp[1]; 163 re = new RegExp(mgr1); 164 if (re.test(stem)) 165 w = stem; 166 } 167 else if (re2.test(w)) { 168 var fp = re2.exec(w); 169 stem = fp[1] + fp[2]; 170 re2 = new RegExp(mgr1); 171 if (re2.test(stem)) 172 w = stem; 173 } 174 175 // Step 5 176 re = /^(.+?)e$/; 177 if (re.test(w)) { 178 var fp = re.exec(w); 179 stem = fp[1]; 180 re = new RegExp(mgr1); 181 re2 = new RegExp(meq1); 182 re3 = new RegExp("^" + C + v + "[^aeiouwxy]$"); 183 if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) 184 w = stem; 185 } 186 re = /ll$/; 187 re2 = new RegExp(mgr1); 188 if (re.test(w) && re2.test(w)) { 189 re = /.$/; 190 w = w.replace(re,""); 191 } 192 193 // and turn initial Y back to y 194 if (firstch == "y") 195 w = firstch.toLowerCase() + w.substr(1); 196 return w; 197 } 198} 199 200 201 202 203var splitChars = (function() { 204 var result = {}; 205 var singles = [96, 180, 187, 191, 215, 247, 749, 885, 903, 907, 909, 930, 1014, 1648, 206 1748, 1809, 2416, 2473, 2481, 2526, 2601, 2609, 2612, 2615, 2653, 2702, 207 2706, 2729, 2737, 2740, 2857, 2865, 2868, 2910, 2928, 2948, 2961, 2971, 208 2973, 3085, 3089, 3113, 3124, 3213, 3217, 3241, 3252, 3295, 3341, 3345, 209 3369, 3506, 3516, 3633, 3715, 3721, 3736, 3744, 3748, 3750, 3756, 3761, 210 3781, 3912, 4239, 4347, 4681, 4695, 4697, 4745, 4785, 4799, 4801, 4823, 211 4881, 5760, 5901, 5997, 6313, 7405, 8024, 8026, 8028, 8030, 8117, 8125, 212 8133, 8181, 8468, 8485, 8487, 8489, 8494, 8527, 11311, 11359, 11687, 11695, 213 11703, 11711, 11719, 11727, 11735, 12448, 12539, 43010, 43014, 43019, 43587, 214 43696, 43713, 64286, 64297, 64311, 64317, 64319, 64322, 64325, 65141]; 215 var i, j, start, end; 216 for (i = 0; i < singles.length; i++) { 217 result[singles[i]] = true; 218 } 219 var ranges = [[0, 47], [58, 64], [91, 94], [123, 169], [171, 177], [182, 184], [706, 709], 220 [722, 735], [741, 747], [751, 879], [888, 889], [894, 901], [1154, 1161], 221 [1318, 1328], [1367, 1368], [1370, 1376], [1416, 1487], [1515, 1519], [1523, 1568], 222 [1611, 1631], [1642, 1645], [1750, 1764], [1767, 1773], [1789, 1790], [1792, 1807], 223 [1840, 1868], [1958, 1968], [1970, 1983], [2027, 2035], [2038, 2041], [2043, 2047], 224 [2070, 2073], [2075, 2083], [2085, 2087], [2089, 2307], [2362, 2364], [2366, 2383], 225 [2385, 2391], [2402, 2405], [2419, 2424], [2432, 2436], [2445, 2446], [2449, 2450], 226 [2483, 2485], [2490, 2492], [2494, 2509], [2511, 2523], [2530, 2533], [2546, 2547], 227 [2554, 2564], [2571, 2574], [2577, 2578], [2618, 2648], [2655, 2661], [2672, 2673], 228 [2677, 2692], [2746, 2748], [2750, 2767], [2769, 2783], [2786, 2789], [2800, 2820], 229 [2829, 2830], [2833, 2834], [2874, 2876], [2878, 2907], [2914, 2917], [2930, 2946], 230 [2955, 2957], [2966, 2968], [2976, 2978], [2981, 2983], [2987, 2989], [3002, 3023], 231 [3025, 3045], [3059, 3076], [3130, 3132], [3134, 3159], [3162, 3167], [3170, 3173], 232 [3184, 3191], [3199, 3204], [3258, 3260], [3262, 3293], [3298, 3301], [3312, 3332], 233 [3386, 3388], [3390, 3423], [3426, 3429], [3446, 3449], [3456, 3460], [3479, 3481], 234 [3518, 3519], [3527, 3584], [3636, 3647], [3655, 3663], [3674, 3712], [3717, 3718], 235 [3723, 3724], [3726, 3731], [3752, 3753], [3764, 3772], [3774, 3775], [3783, 3791], 236 [3802, 3803], [3806, 3839], [3841, 3871], [3892, 3903], [3949, 3975], [3980, 4095], 237 [4139, 4158], [4170, 4175], [4182, 4185], [4190, 4192], [4194, 4196], [4199, 4205], 238 [4209, 4212], [4226, 4237], [4250, 4255], [4294, 4303], [4349, 4351], [4686, 4687], 239 [4702, 4703], [4750, 4751], [4790, 4791], [4806, 4807], [4886, 4887], [4955, 4968], 240 [4989, 4991], [5008, 5023], [5109, 5120], [5741, 5742], [5787, 5791], [5867, 5869], 241 [5873, 5887], [5906, 5919], [5938, 5951], [5970, 5983], [6001, 6015], [6068, 6102], 242 [6104, 6107], [6109, 6111], [6122, 6127], [6138, 6159], [6170, 6175], [6264, 6271], 243 [6315, 6319], [6390, 6399], [6429, 6469], [6510, 6511], [6517, 6527], [6572, 6592], 244 [6600, 6607], [6619, 6655], [6679, 6687], [6741, 6783], [6794, 6799], [6810, 6822], 245 [6824, 6916], [6964, 6980], [6988, 6991], [7002, 7042], [7073, 7085], [7098, 7167], 246 [7204, 7231], [7242, 7244], [7294, 7400], [7410, 7423], [7616, 7679], [7958, 7959], 247 [7966, 7967], [8006, 8007], [8014, 8015], [8062, 8063], [8127, 8129], [8141, 8143], 248 [8148, 8149], [8156, 8159], [8173, 8177], [8189, 8303], [8306, 8307], [8314, 8318], 249 [8330, 8335], [8341, 8449], [8451, 8454], [8456, 8457], [8470, 8472], [8478, 8483], 250 [8506, 8507], [8512, 8516], [8522, 8525], [8586, 9311], [9372, 9449], [9472, 10101], 251 [10132, 11263], [11493, 11498], [11503, 11516], [11518, 11519], [11558, 11567], 252 [11622, 11630], [11632, 11647], [11671, 11679], [11743, 11822], [11824, 12292], 253 [12296, 12320], [12330, 12336], [12342, 12343], [12349, 12352], [12439, 12444], 254 [12544, 12548], [12590, 12592], [12687, 12689], [12694, 12703], [12728, 12783], 255 [12800, 12831], [12842, 12880], [12896, 12927], [12938, 12976], [12992, 13311], 256 [19894, 19967], [40908, 40959], [42125, 42191], [42238, 42239], [42509, 42511], 257 [42540, 42559], [42592, 42593], [42607, 42622], [42648, 42655], [42736, 42774], 258 [42784, 42785], [42889, 42890], [42893, 43002], [43043, 43055], [43062, 43071], 259 [43124, 43137], [43188, 43215], [43226, 43249], [43256, 43258], [43260, 43263], 260 [43302, 43311], [43335, 43359], [43389, 43395], [43443, 43470], [43482, 43519], 261 [43561, 43583], [43596, 43599], [43610, 43615], [43639, 43641], [43643, 43647], 262 [43698, 43700], [43703, 43704], [43710, 43711], [43715, 43738], [43742, 43967], 263 [44003, 44015], [44026, 44031], [55204, 55215], [55239, 55242], [55292, 55295], 264 [57344, 63743], [64046, 64047], [64110, 64111], [64218, 64255], [64263, 64274], 265 [64280, 64284], [64434, 64466], [64830, 64847], [64912, 64913], [64968, 65007], 266 [65020, 65135], [65277, 65295], [65306, 65312], [65339, 65344], [65371, 65381], 267 [65471, 65473], [65480, 65481], [65488, 65489], [65496, 65497]]; 268 for (i = 0; i < ranges.length; i++) { 269 start = ranges[i][0]; 270 end = ranges[i][1]; 271 for (j = start; j <= end; j++) { 272 result[j] = true; 273 } 274 } 275 return result; 276})(); 277 278function splitQuery(query) { 279 var result = []; 280 var start = -1; 281 for (var i = 0; i < query.length; i++) { 282 if (splitChars[query.charCodeAt(i)]) { 283 if (start !== -1) { 284 result.push(query.slice(start, i)); 285 start = -1; 286 } 287 } else if (start === -1) { 288 start = i; 289 } 290 } 291 if (start !== -1) { 292 result.push(query.slice(start)); 293 } 294 return result; 295} 296 297 298