tigitz /
php-spellchecker
| 1 | <?php |
||
| 2 | |||
| 3 | declare(strict_types=1); |
||
| 4 | |||
| 5 | namespace PhpSpellcheck\Spellchecker; |
||
| 6 | |||
| 7 | use PhpSpellcheck\Misspelling; |
||
| 8 | use PhpSpellcheck\Spellchecker\LanguageTool\LanguageToolApiClient; |
||
| 9 | use PhpSpellcheck\Utils\SortedNumericArrayNearestValueFinder; |
||
| 10 | use PhpSpellcheck\Utils\TextEncoding; |
||
| 11 | use Webmozart\Assert\Assert; |
||
| 12 | |||
| 13 | class LanguageTool implements SpellcheckerInterface |
||
| 14 | { |
||
| 15 | /** |
||
| 16 | * @var LanguageToolApiClient |
||
| 17 | */ |
||
| 18 | private $apiClient; |
||
| 19 | |||
| 20 | 4 | public function __construct(LanguageToolApiClient $apiClient) |
|
| 21 | { |
||
| 22 | 4 | $this->apiClient = $apiClient; |
|
| 23 | 4 | } |
|
| 24 | |||
| 25 | /** |
||
| 26 | * @return Misspelling[] |
||
| 27 | */ |
||
| 28 | 2 | public function check( |
|
| 29 | string $text, |
||
| 30 | array $languages = [], |
||
| 31 | array $context = [], |
||
| 32 | ?string $encoding = TextEncoding::UTF8 |
||
| 33 | ): iterable { |
||
| 34 | 2 | Assert::notEmpty($languages, 'LanguageTool requires at least one language to run it\'s spellchecking process'); |
|
| 35 | |||
| 36 | 2 | $check = $this->apiClient->spellCheck($text, $languages, $context[self::class] ?? []); |
|
| 37 | 2 | $lineBreaksOffset = $this->getLineBreaksOffset($text, $encoding); |
|
| 38 | |||
| 39 | 2 | foreach ($check['matches'] as $match) { |
|
| 40 | 2 | list($offsetFromLine, $line) = $this->computeRealOffsetAndLine($match, $lineBreaksOffset); |
|
| 41 | |||
| 42 | 2 | yield new Misspelling( |
|
|
0 ignored issues
–
show
Bug
Best Practice
introduced
by
Loading history...
|
|||
| 43 | 2 | mb_substr($match['context']['text'], $match['context']['offset'], $match['context']['length']), |
|
| 44 | 2 | $offsetFromLine, |
|
| 45 | 2 | $line, // line break index transformed in line number |
|
| 46 | 2 | array_column($match['replacements'], 'value'), |
|
| 47 | 2 | array_merge( |
|
| 48 | [ |
||
| 49 | 2 | 'sentence' => $match['sentence'], |
|
| 50 | 2 | 'spellingErrorMessage' => $match['message'], |
|
| 51 | 2 | 'ruleUsed' => $match['rule'], |
|
| 52 | ], |
||
| 53 | 2 | $context |
|
| 54 | ) |
||
| 55 | ); |
||
| 56 | } |
||
| 57 | 2 | } |
|
| 58 | |||
| 59 | /** |
||
| 60 | * {@inheritdoc} |
||
| 61 | */ |
||
| 62 | 2 | public function getSupportedLanguages(): iterable |
|
| 63 | { |
||
| 64 | 2 | return $this->apiClient->getSupportedLanguages(); |
|
| 65 | } |
||
| 66 | |||
| 67 | 2 | private function computeRealOffsetAndLine(array $match, array $lineBreaksOffset): array |
|
| 68 | { |
||
| 69 | 2 | $languageToolsOffset = (int) $match['offset']; |
|
| 70 | 2 | $index = SortedNumericArrayNearestValueFinder::findIndex( |
|
| 71 | 2 | (int) $match['offset'], |
|
| 72 | 2 | $lineBreaksOffset, |
|
| 73 | 2 | SortedNumericArrayNearestValueFinder::FIND_HIGHER |
|
| 74 | ); |
||
| 75 | |||
| 76 | 2 | if ($index === 0) { |
|
| 77 | // word is on the first line |
||
| 78 | 2 | $offsetFromLine = $languageToolsOffset; |
|
| 79 | 2 | $line = $index + 1; |
|
| 80 | } else { |
||
| 81 | 2 | if ($languageToolsOffset > $lineBreaksOffset[$index]) { |
|
| 82 | // word is on the last line |
||
| 83 | 2 | $offsetFromLine = $languageToolsOffset - $lineBreaksOffset[$index]; |
|
| 84 | 2 | $line = $index + 2; |
|
| 85 | } else { |
||
| 86 | 1 | $offsetFromLine = $languageToolsOffset - $lineBreaksOffset[$index - 1]; |
|
| 87 | 1 | $line = $index + 1; |
|
| 88 | } |
||
| 89 | } |
||
| 90 | |||
| 91 | 2 | return [$offsetFromLine, $line]; |
|
| 92 | } |
||
| 93 | |||
| 94 | 2 | private function getLineBreaksOffset(string $text, ?string $encoding): array |
|
| 95 | { |
||
| 96 | 2 | if ($encoding === null) { |
|
| 97 | 2 | $encoding = \Safe\mb_internal_encoding(); |
|
| 98 | 2 | } |
|
| 99 | 2 | ||
| 100 | 2 | $start = 0; |
|
| 101 | $lineBreaksOffset = []; |
||
| 102 | while (($pos = \mb_strpos(($text), PHP_EOL, $start, $encoding)) != false) { |
||
|
0 ignored issues
–
show
|
|||
| 103 | 2 | $lineBreaksOffset[] = $pos; |
|
| 104 | $start = $pos + 1; // start searching from next position. |
||
| 105 | } |
||
| 106 | |||
| 107 | return $lineBreaksOffset; |
||
| 108 | } |
||
| 109 | } |
||
| 110 |