1
|
|
|
<?php |
2
|
|
|
/* |
3
|
|
|
You may not change or alter any portion of this comment or credits |
4
|
|
|
of supporting developers from this source code or any supporting source code |
5
|
|
|
which is considered copyrighted (c) material of the original comment or credit authors. |
6
|
|
|
|
7
|
|
|
This program is distributed in the hope that it will be useful, |
8
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of |
9
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
10
|
|
|
*/ |
11
|
|
|
|
12
|
|
|
namespace Xmf; |
13
|
|
|
|
14
|
|
|
/** |
15
|
|
|
* StopWords - facilitate filtering of common or purely connective words for natural language processing |
16
|
|
|
* |
17
|
|
|
* @category Xmf\StopWords |
18
|
|
|
* @package Xmf |
19
|
|
|
* @author Richard Griffith <[email protected]> |
20
|
|
|
* @author trabis <[email protected]> |
21
|
|
|
* @copyright 2011-2018 XOOPS Project (https://xoops.org) |
22
|
|
|
* @license GNU GPL 2.0 or later (https://www.gnu.org/licenses/gpl-2.0.html) |
23
|
|
|
* @link https://xoops.org |
24
|
|
|
* @see https://en.wikipedia.org/wiki/Stop_words |
25
|
|
|
*/ |
26
|
|
|
class StopWords |
27
|
|
|
{ |
28
|
|
|
|
29
|
|
|
/** |
30
|
|
|
* mbstring encoding |
31
|
|
|
*/ |
32
|
|
|
const ENCODING = 'UTF-8'; |
33
|
|
|
|
34
|
|
|
/** @var string[] */ |
35
|
|
|
protected $stopwordList = array(); |
36
|
|
|
|
37
|
|
|
/** |
38
|
|
|
* StopWords constructor - load stop words for current locale |
39
|
|
|
* |
40
|
|
|
* @todo specify locale to constructor, will require shift away from defined constant |
41
|
|
|
*/ |
42
|
|
|
public function __construct() |
43
|
|
|
{ |
44
|
|
|
if (!defined('_XMF_STOPWORDS')) { |
45
|
|
|
Language::load('stopwords'); |
46
|
|
|
} |
47
|
|
|
if (defined('_XMF_STOPWORDS')) { |
48
|
|
|
$sw = explode(' ', _XMF_STOPWORDS); |
49
|
|
|
$this->stopwordList = array_fill_keys($sw, true); |
50
|
|
|
} |
51
|
|
|
} |
52
|
|
|
|
53
|
|
|
/** |
54
|
|
|
* check - look up a word in a list of stop words and |
55
|
|
|
* classify it as a significant word or a stop word. |
56
|
|
|
* |
57
|
|
|
* @param string $key the word to check |
58
|
|
|
* |
59
|
|
|
* @return bool True if word is significant, false if it is a stop word |
60
|
|
|
*/ |
61
|
|
|
public function check($key) |
62
|
|
|
{ |
63
|
|
|
$key = function_exists('mb_strtolower') |
64
|
|
|
? mb_strtolower($key, static::ENCODING) |
65
|
|
|
: strtolower($key); |
66
|
|
|
return !isset($this->stopwordList[$key]); |
67
|
|
|
} |
68
|
|
|
} |
69
|
|
|
|