1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
declare(strict_types=1); |
4
|
|
|
|
5
|
|
|
namespace PhpSpellcheck\Spellchecker; |
6
|
|
|
|
7
|
|
|
use PhpSpellcheck\Exception\ProcessHasErrorOutputException; |
8
|
|
|
use PhpSpellcheck\Utils\CommandLine; |
9
|
|
|
use PhpSpellcheck\Utils\IspellOutputParser; |
10
|
|
|
use PhpSpellcheck\Utils\ProcessRunner; |
11
|
|
|
use PhpSpellcheck\Utils\TextEncoding; |
12
|
|
|
use Symfony\Component\Process\Process; |
13
|
|
|
|
14
|
|
|
class Hunspell implements SpellcheckerInterface |
15
|
|
|
{ |
16
|
|
|
/** |
17
|
|
|
* @var CommandLine |
18
|
|
|
*/ |
19
|
|
|
private $binaryPath; |
20
|
|
|
|
21
|
6 |
|
public function __construct(CommandLine $binaryPath) |
22
|
|
|
{ |
23
|
6 |
|
$this->binaryPath = $binaryPath; |
24
|
6 |
|
} |
25
|
|
|
|
26
|
|
|
/** |
27
|
|
|
* {@inheritDoc} |
28
|
|
|
*/ |
29
|
4 |
|
public function check( |
30
|
|
|
string $text, |
31
|
|
|
array $languages = [], |
32
|
|
|
array $context = [], |
33
|
|
|
?string $encoding = TextEncoding::UTF8 |
34
|
|
|
): iterable { |
35
|
|
|
$cmd = $this->binaryPath->addArg('-a'); |
36
|
4 |
|
|
37
|
|
|
if (!empty($languages)) { |
38
|
4 |
|
$cmd = $cmd->addArgs(['-d', implode(',', $languages)]); |
39
|
3 |
|
} |
40
|
|
|
|
41
|
|
|
if ($encoding) { |
42
|
4 |
|
$cmd = $cmd->addArgs(['-i', $encoding]); |
43
|
4 |
|
} |
44
|
|
|
|
45
|
|
|
$process = new Process($cmd->getArgs()); |
46
|
4 |
|
// Add prefix characters putting Ispell's type of spellcheckers in terse-mode, |
47
|
|
|
// ignoring correct words and thus speeding execution |
48
|
|
|
$process->setInput('!' . PHP_EOL . $text . PHP_EOL . '%'); |
49
|
4 |
|
|
50
|
|
|
$output = ProcessRunner::run($process)->getOutput(); |
51
|
4 |
|
if ($process->getErrorOutput() !== '') { |
52
|
4 |
|
throw new ProcessHasErrorOutputException($process->getErrorOutput(), $text, $process->getCommandLine()); |
53
|
1 |
|
} |
54
|
|
|
|
55
|
|
|
return IspellOutputParser::parseMisspellings($output, $context); |
56
|
3 |
|
} |
57
|
|
|
|
58
|
|
|
public function getBinaryPath(): CommandLine |
59
|
|
|
{ |
60
|
|
|
return $this->binaryPath; |
61
|
|
|
} |
62
|
|
|
|
63
|
|
|
/** |
64
|
|
|
* {@inheritDoc} |
65
|
|
|
*/ |
66
|
|
|
public function getSupportedLanguages(): iterable |
67
|
2 |
|
{ |
68
|
|
|
$languages = []; |
69
|
2 |
|
$cmd = $this->binaryPath->addArg('-D'); |
70
|
2 |
|
$process = new Process($cmd->getArgs()); |
71
|
2 |
|
$output = explode(PHP_EOL, ProcessRunner::run($process)->getErrorOutput()); |
72
|
|
|
|
73
|
2 |
|
foreach ($output as $line) { |
74
|
2 |
|
$line = trim($line); |
75
|
2 |
|
if ('' === $line // Skip empty lines |
76
|
2 |
|
|| \Safe\substr($line, -1) === ':' // Skip headers |
77
|
2 |
|
|| strpos($line, ':') !== false // Skip search path |
78
|
|
|
) { |
79
|
2 |
|
continue; |
80
|
|
|
} |
81
|
2 |
|
$name = basename($line); |
82
|
2 |
|
if (strpos($name, 'hyph_') === 0) { |
83
|
|
|
// Skip MySpell hyphen files |
84
|
1 |
|
continue; |
85
|
|
|
} |
86
|
2 |
|
$name = \Safe\preg_replace('/\.(aff|dic)$/', '', $name); |
87
|
2 |
|
$languages[$name] = true; |
88
|
|
|
} |
89
|
2 |
|
$languages = array_keys($languages); |
90
|
2 |
|
\Safe\sort($languages); |
91
|
|
|
|
92
|
2 |
|
return $languages; |
93
|
|
|
} |
94
|
|
|
|
95
|
|
|
public static function create(?string $binaryPathAsString): self |
96
|
|
|
{ |
97
|
|
|
return new self(new CommandLine($binaryPathAsString ?? 'hunspell')); |
98
|
|
|
} |
99
|
|
|
} |
100
|
|
|
|