|
1
|
|
|
<?php |
|
2
|
|
|
namespace Mystem; |
|
3
|
|
|
|
|
4
|
|
|
/** |
|
5
|
|
|
* @class YiiMystem |
|
6
|
|
|
*/ |
|
7
|
|
|
class YiiMystem extends \CApplicationComponent |
|
|
|
|
|
|
8
|
|
|
{ |
|
9
|
|
|
/** |
|
10
|
|
|
* @var string|array list of words or filePath |
|
11
|
|
|
*/ |
|
12
|
|
|
public $falsePositive; |
|
13
|
|
|
public $falsePositiveNormalized; |
|
14
|
|
|
public $falseNegative; |
|
15
|
|
|
public $falseNegativeNormalized; |
|
16
|
|
|
|
|
17
|
|
|
public $heuristicsCheck = true; |
|
18
|
|
|
|
|
19
|
|
|
public function init() |
|
20
|
|
|
{ |
|
21
|
|
|
$lists = array('falsePositive', 'falsePositiveNormalized', 'falseNegative', 'falseNegativeNormalized'); |
|
22
|
|
|
foreach ($lists as $listName) { |
|
23
|
|
|
if (is_string($this->$listName)) { |
|
24
|
|
|
if (!file_exists($this->$listName)) { |
|
25
|
|
|
throw new \CException("List file $listName '{$this->$listName}' not found"); |
|
|
|
|
|
|
26
|
|
|
} |
|
27
|
|
|
Word::${$listName . 'List'} = array_filter(explode("\n", file_get_contents($this->$listName)), 'trim'); |
|
28
|
|
|
} elseif (is_array($this->$listName)) { |
|
29
|
|
|
Word::${$listName . 'List'} = $this->$listName; |
|
30
|
|
|
} |
|
31
|
|
|
} |
|
32
|
|
|
parent::init(); |
|
33
|
|
|
} |
|
34
|
|
|
|
|
35
|
|
|
/** |
|
36
|
|
|
* @param string $article |
|
37
|
|
|
* @return string[] |
|
38
|
|
|
*/ |
|
39
|
|
|
public function checkArticle($article) |
|
40
|
|
|
{ |
|
41
|
|
|
$article = new Article($article); |
|
42
|
|
|
$result = $article->checkBadWords(false); |
|
43
|
|
|
if (!empty($result) && $this->heuristicsCheck) { |
|
44
|
|
|
$result = $this->heuristicsCheck($article); |
|
45
|
|
|
} |
|
46
|
|
|
return $result; |
|
47
|
|
|
} |
|
48
|
|
|
|
|
49
|
|
|
/** |
|
50
|
|
|
* Make article from nominative not strict words and runs check again |
|
51
|
|
|
* @param Article $article |
|
52
|
|
|
* @return string[] |
|
53
|
|
|
*/ |
|
54
|
|
|
protected function heuristicsCheck(Article $article) |
|
55
|
|
|
{ |
|
56
|
|
|
$nominativeArticle = ''; |
|
57
|
|
|
foreach ($article->words as $word) { |
|
58
|
|
|
if (!$word->variants[0]['strict'] && !$word->checkGrammeme(MystemConst::OTHER_VULGARISM, 0)) { |
|
59
|
|
|
$nominativeArticle .= ' ' . $word; |
|
60
|
|
|
} |
|
61
|
|
|
} |
|
62
|
|
|
if ($nominativeArticle === '') { |
|
63
|
|
|
return array(); |
|
64
|
|
|
} |
|
65
|
|
|
|
|
66
|
|
|
$newArticle = new Article($nominativeArticle); |
|
67
|
|
|
$words = $newArticle->checkBadWords(false); |
|
68
|
|
|
|
|
69
|
|
|
$result = array(); |
|
70
|
|
|
foreach ($words as $original => $word) { |
|
71
|
|
|
foreach ($article->words as $originalWord) { |
|
72
|
|
|
if ($original === $originalWord->normalized()) { |
|
73
|
|
|
$result[$originalWord->original] = $word; |
|
74
|
|
|
break; |
|
75
|
|
|
} |
|
76
|
|
|
} |
|
77
|
|
|
} |
|
78
|
|
|
return $result; |
|
79
|
|
|
} |
|
80
|
|
|
} |
|
81
|
|
|
|
The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g.
excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths