1 | <?php |
||
2 | |||
3 | declare(strict_types=1); |
||
4 | |||
5 | namespace PhpSpellcheck\Spellchecker; |
||
6 | |||
7 | use PhpSpellcheck\Misspelling; |
||
8 | use PhpSpellcheck\Spellchecker\LanguageTool\LanguageToolApiClient; |
||
9 | use PhpSpellcheck\Utils\SortedNumericArrayNearestValueFinder; |
||
10 | use PhpSpellcheck\Utils\TextEncoding; |
||
11 | use Webmozart\Assert\Assert; |
||
12 | |||
13 | class LanguageTool implements SpellcheckerInterface |
||
14 | { |
||
15 | /** |
||
16 | * @var LanguageToolApiClient |
||
17 | */ |
||
18 | private $apiClient; |
||
19 | |||
20 | 4 | public function __construct(LanguageToolApiClient $apiClient) |
|
21 | { |
||
22 | 4 | $this->apiClient = $apiClient; |
|
23 | 4 | } |
|
24 | |||
25 | /** |
||
26 | * @return Misspelling[] |
||
27 | */ |
||
28 | 2 | public function check( |
|
29 | string $text, |
||
30 | array $languages = [], |
||
31 | array $context = [], |
||
32 | ?string $encoding = TextEncoding::UTF8 |
||
33 | ): iterable { |
||
34 | 2 | Assert::notEmpty($languages, 'LanguageTool requires at least one language to run it\'s spellchecking process'); |
|
35 | |||
36 | 2 | $check = $this->apiClient->spellCheck($text, $languages, $context[self::class] ?? []); |
|
37 | 2 | $lineBreaksOffset = $this->getLineBreaksOffset($text, $encoding); |
|
38 | |||
39 | 2 | foreach ($check['matches'] as $match) { |
|
40 | 2 | list($offsetFromLine, $line) = $this->computeRealOffsetAndLine($match, $lineBreaksOffset); |
|
41 | |||
42 | 2 | yield new Misspelling( |
|
0 ignored issues
–
show
Bug
Best Practice
introduced
by
![]() |
|||
43 | 2 | mb_substr($match['context']['text'], $match['context']['offset'], $match['context']['length']), |
|
44 | 2 | $offsetFromLine, |
|
45 | 2 | $line, // line break index transformed in line number |
|
46 | 2 | array_column($match['replacements'], 'value'), |
|
47 | 2 | array_merge( |
|
48 | [ |
||
49 | 2 | 'sentence' => $match['sentence'], |
|
50 | 2 | 'spellingErrorMessage' => $match['message'], |
|
51 | 2 | 'ruleUsed' => $match['rule'], |
|
52 | ], |
||
53 | 2 | $context |
|
54 | ) |
||
55 | ); |
||
56 | } |
||
57 | 2 | } |
|
58 | |||
59 | /** |
||
60 | * {@inheritdoc} |
||
61 | */ |
||
62 | 2 | public function getSupportedLanguages(): iterable |
|
63 | { |
||
64 | 2 | return $this->apiClient->getSupportedLanguages(); |
|
65 | } |
||
66 | |||
67 | 2 | private function computeRealOffsetAndLine(array $match, array $lineBreaksOffset): array |
|
68 | { |
||
69 | 2 | $languageToolsOffset = (int) $match['offset']; |
|
70 | 2 | $index = SortedNumericArrayNearestValueFinder::findIndex( |
|
71 | 2 | (int) $match['offset'], |
|
72 | 2 | $lineBreaksOffset, |
|
73 | 2 | SortedNumericArrayNearestValueFinder::FIND_HIGHER |
|
74 | ); |
||
75 | |||
76 | 2 | if ($index === 0) { |
|
77 | // word is on the first line |
||
78 | 2 | $offsetFromLine = $languageToolsOffset; |
|
79 | 2 | $line = $index + 1; |
|
80 | } else { |
||
81 | 2 | if ($languageToolsOffset > $lineBreaksOffset[$index]) { |
|
82 | // word is on the last line |
||
83 | 2 | $offsetFromLine = $languageToolsOffset - $lineBreaksOffset[$index]; |
|
84 | 2 | $line = $index + 2; |
|
85 | } else { |
||
86 | 1 | $offsetFromLine = $languageToolsOffset - $lineBreaksOffset[$index - 1]; |
|
87 | 1 | $line = $index + 1; |
|
88 | } |
||
89 | } |
||
90 | |||
91 | 2 | return [$offsetFromLine, $line]; |
|
92 | } |
||
93 | |||
94 | 2 | private function getLineBreaksOffset(string $text, ?string $encoding): array |
|
95 | { |
||
96 | 2 | if ($encoding === null) { |
|
97 | 2 | $encoding = \Safe\mb_internal_encoding(); |
|
98 | 2 | } |
|
99 | 2 | ||
100 | 2 | $start = 0; |
|
101 | $lineBreaksOffset = []; |
||
102 | while (($pos = \mb_strpos(($text), PHP_EOL, $start, $encoding)) != false) { |
||
0 ignored issues
–
show
|
|||
103 | 2 | $lineBreaksOffset[] = $pos; |
|
104 | $start = $pos + 1; // start searching from next position. |
||
105 | } |
||
106 | |||
107 | return $lineBreaksOffset; |
||
108 | } |
||
109 | } |
||
110 |