Issues (11)

src/Mystem/Word.php (1 issue)

Severity
1
<?php
2
namespace Mystem;
3
4
/**
5
 * Class Word
6
 * @property array[] $variants lexical interpretation variants:
7
 *  - string $normalized - normalized word representation
8
 *  - boolean $strict - dictionary or predictable normalized representation
9
 *  - array $grammems - lexical information (constants from MystemConst)
10
 */
11
class Word
12
{
13
    /**
14
     * @var string $grammemeRegexp cached constants regular expression from MystemConst
15
     */
16
    private static $grammemeRegexp = null;
17
18
    /* @var string $constructorClass need for instantiate properly class in newFrom* methods */
19
    protected static $constructorClass = '\Mystem\Word';
20
21
    /**
22
     * @var string $original original string
23
     */
24
    public $original;
25
26
    public $variants = array();
27
28
29
    /* @var string[] $falsePositiveList */
30
    public static $falsePositiveList = array();
31
32
    /* @var string[] $falsePositiveList */
33
    public static $falsePositiveNormalizedList = array();
34
35
    /* @var string[] $falseNegativeList */
36
    public static $falseNegativeList = array();
37
38
    /* @var string[] $falseNegativeList */
39
    public static $falseNegativeNormalizedList = array();
40
41 37
    public function __construct()
42
    {
43 37
        if (self::$grammemeRegexp === null) {
0 ignored issues
show
The condition self::grammemeRegexp === null is always false.
Loading history...
44 37
            self::$grammemeRegexp = '#(' . implode('|', MystemConst::grammemeList()) . ')#u';
45 37
        }
46 37
    }
47
48
    /**
49
     * @param array|string $lexicalString - prepared structure from Mystem
50
     * @param int $maxVariants
51
     * @return Word
52
     */
53 37
    public static function newFromLexicalString($lexicalString, $maxVariants = null)
54
    {
55
        /* @var Word $word */
56 37
        $word = new static::$constructorClass();
57 37
        if (is_array($lexicalString)) {
58 35
            $word->parse($lexicalString, $maxVariants);
59 35
        } else {
60 2
            $word->original = $lexicalString;
61
        }
62 37
        return $word;
63
    }
64
65
    /**
66
     * @param string $word
67
     * @param int $maxVariants
68
     * @return Word
69
     */
70 33
    public static function stemm($word, $maxVariants = null)
71
    {
72 33
        $lexicalString = Mystem::stemm($word);
73 33
        return self::newFromLexicalString(isset($lexicalString[0]) ? $lexicalString[0] : $word, $maxVariants);
74
    }
75
76
    /**
77
     * Normalized word
78
     * @return string
79
     */
80 15
    public function normalized()
81
    {
82 15
        if (isset($this->variants[0], $this->variants[0]['normalized'])) {
83 14
            return $this->variants[0]['normalized'];
84
        } else {
85 1
            return '';
86
        }
87
    }
88
89 2
    public function __toString()
90
    {
91 2
        return $this->normalized();
92
    }
93
94
    /**
95
     * Parse raw morphological data from mystem and fill Word object data
96
     * @param array $lexicalString - prepared string from Mystem
97
     * @param int $maxVariants
98
     */
99 35
    protected function parse($lexicalString, $maxVariants = null)
100
    {
101 35
        $counter = 0;
102 35
        $this->original = $lexicalString['text'];
103 35
        $analysis = $lexicalString['analysis'];
104 35
        foreach ($analysis as $aVariant) {
105
            $variant = array(
106 35
                'normalized' => $aVariant['lex'],
107 35
                'strict' => isset($aVariant['qual']) && $aVariant['qual'] === 'bastard',
108 35
                'grammems' => array(),
109 35
            );
110 35
            preg_match_all(self::$grammemeRegexp, $aVariant['gr'], $match);
111 35
            if (!empty($match[0])) {
112 35
                $variant['grammems'] = $match[0];
113 35
            }
114 35
            $this->variants[$counter++] = $variant;
115 35
            if ($maxVariants !== null && $counter >= $maxVariants) {
116 4
                break;
117
            }
118 35
        }
119 35
    }
120
121
    /**
122
     * @param string $gramm - grammar primitive from MystemConst
123
     * @return int
124
     */
125 3
    public function addGrammeme($gramm)
126
    {
127 3
        $counter = 0;
128 3
        $count = count($this->variants);
129 3
        for ($i = 0; $i < $count; $i++) {
130 3
            $counter += $this->addGrammemeInVariant($gramm, $i);
131 3
        }
132 3
        return $counter;
133
    }
134
135
    /**
136
     * @param string $gramm - grammar primitive from MystemConst
137
     * @param int $level
138
     * @return bool
139
     */
140 3
    protected function addGrammemeInVariant($gramm, $level = null)
141
    {
142 3
        if (!isset($this->variants[$level]) || in_array($gramm, $this->variants[$level]['grammems'])) {
143
            return false;
144
        }
145 3
        $this->variants[$level]['grammems'][] = $gramm;
146 3
        return true;
147
    }
148
149
    /**
150
     * @param string $gramm - grammar primitive from MystemConst
151
     * @return int
152
     */
153 3
    public function removeGrammeme($gramm)
154
    {
155 3
        $counter = 0;
156 3
        $count = count($this->variants);
157 3
        for ($i = 0; $i < $count; $i++) {
158 3
            $counter += $this->removeGrammemeInVariant($gramm, $i);
159 3
        }
160 3
        return $counter;
161
    }
162
163
    /**
164
     * @param string $gramm - grammar primitive from MystemConst
165
     * @param int $level
166
     * @return bool
167
     */
168 3
    protected function removeGrammemeInVariant($gramm, $level)
169
    {
170 3
        if (!isset($this->variants[$level]['grammems'])) {
171
            return false;
172
        }
173 3
        $key = array_search($gramm, $this->variants[$level]['grammems']);
174 3
        unset($this->variants[$level]['grammems'][$key]);
175 3
        return $key !== false;
176
    }
177
178
    /**
179
     * Search grammese primitive in word variants
180
     * @param string $gramm - grammar primitive from MystemConst
181
     * @param integer $level - variants maximum depth
182
     * @return boolean
183
     */
184 12
    public function checkGrammeme($gramm, $level = null)
185
    {
186 12
        $counter = 0;
187 12
        foreach ($this->variants as $variant) {
188 11
            if (in_array($gramm, $variant['grammems'])) {
189 6
                return true;
190 7
            } elseif ($level !== null && ++$counter >= $level) {
191 1
                return false;
192
            }
193 8
        }
194 7
        return false;
195
    }
196
197
    /**
198
     * Get verb time: present, past or future
199
     * @param int $variant find in which morphological variant
200
     * @return null|string MystemConst::PRESENT, MystemConst::PAST or MystemConst::FUTURE
201
     */
202 2
    public function getVerbTime($variant = 0)
203
    {
204 2
        return $this->searchGrammemeInList(array(
205 2
            MystemConst::PRESENT, MystemConst::FUTURE, MystemConst::PAST
206 2
        ), $variant);
207
    }
208
209
    /**
210
     * Get count: single or plural
211
     * @param int $variant find in which morphological variant
212
     * @return null|string - MystemConst
213
     */
214 4
    public function getCount($variant = 0)
215
    {
216 4
        return $this->searchGrammemeInList(array(
217 4
            MystemConst::SINGULAR, MystemConst::PLURAL
218 4
        ), $variant);
219
    }
220
221
    /**
222
     * Get gender
223
     * @param int $variant find in which morphological variant
224
     * @return null|string - MystemConst
225
     */
226 3
    public function getGender($variant = 0)
227
    {
228 3
        return $this->searchGrammemeInList(array(
229 3
            MystemConst::FEMININE, MystemConst::MASCULINE, MystemConst::NEUTER
230 3
        ), $variant);
231
    }
232
233
    /**
234
     * Get animate
235
     * @param int $variant find in which morphological variant
236
     * @return null|string - MystemConst
237
     */
238 2
    public function getAnimate($variant = 0)
239
    {
240 2
        return $this->searchGrammemeInList(array(
241 2
            MystemConst::ANIMATE, MystemConst::INANIMATE
242 2
        ), $variant);
243
    }
244
245
    /**
246
     * Get noun case
247
     * @param int $variant
248
     * @return null|string - MystemConst
249
     */
250 7
    public function getNounCase($variant = 0)
251
    {
252 7
        return $this->searchGrammemeInList(array(
253 7
            MystemConst::NOMINATIVE,
254 7
            MystemConst::GENITIVE,
255 7
            MystemConst::DATIVE,
256 7
            MystemConst::ACCUSATIVE,
257 7
            MystemConst::INSTRUMENTAL,
258 7
            MystemConst::PREPOSITIONAL,
259 7
            MystemConst::PARTITIVE,
260 7
            MystemConst::LOCATIVE,
261 7
            MystemConst::VOCATIVE,
262 7
        ), $variant);
263
    }
264
265
    /**
266
     * @param array $constants
267
     * @param int $variant
268
     * @return null|string
269
     */
270 18
    protected function searchGrammemeInList(array $constants, $variant = 0)
271
    {
272 18
        if (!isset($this->variants[$variant])) {
273 1
            return null;
274
        }
275
276 17
        foreach ($constants as $grammeme) {
277 17
            if (in_array($grammeme, $this->variants[$variant]['grammems'])) {
278 16
                return $grammeme;
279
            }
280 12
        }
281
282 1
        return null;
283
    }
284
285
    /**
286
     * @return bool
287
     */
288 10
    public function isBadWord()
289
    {
290 10
        $original = mb_strtolower($this->original, 'UTF-8');
291 10
        if ($this->checkGrammeme(MystemConst::OTHER_VULGARISM)) {
292 5
            $inExceptions = in_array($original, self::$falsePositiveList) ||
293 5
                (in_array($this->normalized(), self::$falsePositiveNormalizedList) &&
294 5
                    !in_array($original, self::$falseNegativeList));
295 5
            if ($inExceptions) {
296 3
                $this->removeGrammeme(MystemConst::OTHER_VULGARISM);
297 3
            }
298 5
            return !$inExceptions;
299
        } else {
300 6
            $inExceptions = in_array($original, self::$falseNegativeList) ||
301 6
                (in_array($this->normalized(), self::$falseNegativeNormalizedList) &&
302 6
                    !in_array($original, self::$falsePositiveList));
303 6
            if ($inExceptions) {
304 3
                $this->addGrammeme(MystemConst::OTHER_VULGARISM);
305 3
            }
306 6
            return $inExceptions;
307
        }
308
    }
309
}
310