1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* Spell check html files |
4
|
|
|
* |
5
|
|
|
* PHP version 5.4 |
6
|
|
|
* |
7
|
|
|
* @category GLICER |
8
|
|
|
* @package GlSpellChecker |
9
|
|
|
* @author Emmanuel ROECKER |
10
|
|
|
* @author Rym BOUCHAGOUR |
11
|
|
|
* @copyright 2015 GLICER |
12
|
|
|
* @license MIT |
13
|
|
|
* @link http://dev.glicer.com/ |
14
|
|
|
* |
15
|
|
|
* Created : 04/05/15 |
16
|
|
|
* File : GlSpellChecker.php |
17
|
|
|
* |
18
|
|
|
*/ |
19
|
|
|
|
20
|
|
|
namespace GlSpellChecker; |
21
|
|
|
|
22
|
|
|
use GlHtml\GlHtml; |
23
|
|
|
use Symfony\Component\Process\Process; |
24
|
|
|
use Symfony\Component\Finder\SplFileInfo; |
25
|
|
|
use Symfony\Component\Finder\Finder; |
26
|
|
|
use Symfony\Component\Yaml\Exception\ParseException; |
27
|
|
|
use Symfony\Component\Yaml\Yaml; |
28
|
|
|
use GuzzleHttp\Client; |
29
|
|
|
|
30
|
|
|
|
31
|
|
|
/** |
32
|
|
|
* Class GlSpellChecker |
33
|
|
|
* @package GlSpellChecker |
34
|
|
|
*/ |
35
|
|
|
class GlSpellChecker |
36
|
|
|
{ |
37
|
|
|
/** |
38
|
|
|
* @var int |
39
|
|
|
*/ |
40
|
|
|
private $languageToolServerPort = 8081; |
41
|
|
|
|
42
|
|
|
/** |
43
|
|
|
* @var string |
44
|
|
|
*/ |
45
|
|
|
private $languageToolLanguage = 'fr'; |
46
|
|
|
|
47
|
|
|
/** |
48
|
|
|
* @var Process $languagetoolServer ; |
49
|
|
|
*/ |
50
|
|
|
private $languagetoolServer = null; |
51
|
|
|
|
52
|
|
|
/** |
53
|
|
|
* @var Client |
54
|
|
|
*/ |
55
|
|
|
private $languagetoolClientHttp; |
56
|
|
|
|
57
|
|
|
|
58
|
|
|
/** |
59
|
|
|
* @var string |
60
|
|
|
*/ |
61
|
|
|
private $enchantLanguage = "fr_FR"; |
62
|
|
|
private $enchantDictionnary = null; |
63
|
|
|
private $enchantBroker = null; |
64
|
|
|
|
65
|
|
|
/** |
66
|
|
|
* @param string $languageToolDirectory |
67
|
|
|
* @param string $languageToolLanguage |
68
|
|
|
* @param string $enchantLanguage |
69
|
|
|
* @param string $languageToolServerIP |
70
|
|
|
* @param int $languageToolServerPort |
71
|
|
|
* |
72
|
|
|
* @throws \Exception |
73
|
|
|
*/ |
74
|
|
|
public function __construct( |
75
|
|
|
$languageToolLanguage, |
76
|
|
|
$enchantLanguage, |
77
|
|
|
$languageToolDirectory = null, |
78
|
|
|
$languageToolServerIP = 'localhost', |
79
|
|
|
$languageToolServerPort = 8081 |
80
|
|
|
) { |
81
|
|
|
$this->languageToolLanguage = $languageToolLanguage; |
82
|
|
|
$this->enchantLanguage = $enchantLanguage; |
83
|
|
|
$this->languageToolServerPort = $languageToolServerPort; |
84
|
|
|
$this->languageToolServerIP = $languageToolServerIP; |
|
|
|
|
85
|
|
|
|
86
|
|
|
if ($languageToolDirectory) { |
|
|
|
|
87
|
|
|
$this->startLanguageToolServer($languageToolDirectory); |
88
|
|
|
} |
89
|
|
|
|
90
|
|
|
$this->languagetoolClientHttp = new Client(); |
91
|
|
|
|
92
|
|
|
if (extension_loaded('enchant')) { |
93
|
|
|
$this->enchantBroker = enchant_broker_init(); |
94
|
|
|
if (!enchant_broker_dict_exists($this->enchantBroker, $this->enchantLanguage)) { |
95
|
|
|
throw new \Exception("Cannot find dictionnaries for enchant"); |
96
|
|
|
} else { |
97
|
|
|
$this->enchantDictionnary = enchant_broker_request_dict($this->enchantBroker, $this->enchantLanguage); |
98
|
|
|
} |
99
|
|
|
} |
100
|
|
|
} |
101
|
|
|
|
102
|
|
|
public function __destruct() |
103
|
|
|
{ |
104
|
|
|
$this->stopLanguageToolServer(); |
105
|
|
|
if ($this->enchantBroker) { |
106
|
|
|
enchant_broker_free_dict($this->enchantDictionnary); |
107
|
|
|
enchant_broker_free($this->enchantBroker); |
108
|
|
|
} |
109
|
|
|
} |
110
|
|
|
|
111
|
|
|
/** |
112
|
|
|
* @param string $title |
113
|
|
|
* @param GlSpellCheckerSentence[] $sentences |
114
|
|
|
* |
115
|
|
|
* @return string |
116
|
|
|
*/ |
117
|
|
|
public function convertToHtml($title, $sentences) |
118
|
|
|
{ |
119
|
|
|
$html = '<!DOCTYPE HTML>'; |
120
|
|
|
$html .= '<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8">'; |
121
|
|
|
$html .= '<title>' . $title . '</title>'; |
122
|
|
|
$html .= '<style>'; |
123
|
|
|
$html .= '.error { color: red }'; |
124
|
|
|
|
125
|
|
|
$html .= '.tooltip |
126
|
|
|
{ |
127
|
|
|
display: inline; |
128
|
|
|
position: relative; |
129
|
|
|
text-decoration: none; |
130
|
|
|
top: 0px; |
131
|
|
|
left: 0px; |
132
|
|
|
}'; |
133
|
|
|
|
134
|
|
|
$html .= '.tooltip:hover:after |
135
|
|
|
{ |
136
|
|
|
background: #333; |
137
|
|
|
background: rgba(0,0,0,.8); |
138
|
|
|
border-radius: 5px; |
139
|
|
|
top: -5px; |
140
|
|
|
color: #fff; |
141
|
|
|
content: attr(data-tooltip); |
142
|
|
|
left: 160px; |
143
|
|
|
padding: 5px 15px; |
144
|
|
|
position: absolute; |
145
|
|
|
z-index: 98; |
146
|
|
|
width: 150px; |
147
|
|
|
}'; |
148
|
|
|
$html .= '</style>'; |
149
|
|
|
$html .= '</head><body>'; |
150
|
|
|
|
151
|
|
|
foreach ($sentences as $sentence) { |
152
|
|
|
$html .= '<div class="sentence">'; |
153
|
|
|
$text = $sentence->getText(); |
154
|
|
|
$errors = $sentence->mergeErrors(); |
155
|
|
|
|
156
|
|
|
if (count($errors) <= 0) { |
157
|
|
|
$html .= $text; |
158
|
|
|
$html .= '</div>'; |
159
|
|
|
continue; |
160
|
|
|
} |
161
|
|
|
|
162
|
|
|
$cons = ""; |
163
|
|
|
$start = 0; |
164
|
|
|
foreach ($errors as $error) { |
165
|
|
|
$offset = $error->getOffset(); |
166
|
|
|
$length = $error->getLength(); |
167
|
|
|
$cons .= mb_substr($text, $start, $offset - $start, 'UTF-8'); |
168
|
|
|
|
169
|
|
|
$tooltip = $error->getMessage(); |
170
|
|
|
$suggs = $error->getSuggestions(); |
171
|
|
|
if (count($suggs) > 0) { |
172
|
|
|
$tooltip .= " : " . $suggs[0]; |
173
|
|
|
} |
174
|
|
|
$zone = mb_substr($text, $offset, $length, 'UTF-8'); |
175
|
|
|
$cons .= '<span class="error tooltip" data-tooltip="' . $tooltip . '">' . $zone . '</span>'; |
176
|
|
|
|
177
|
|
|
$start = $offset + $length; |
178
|
|
|
} |
179
|
|
|
$cons .= mb_substr($text, $start, mb_strlen($text) - $start, 'UTF-8'); |
180
|
|
|
|
181
|
|
|
$html .= $cons; |
182
|
|
|
$html .= '</div>'; |
183
|
|
|
} |
184
|
|
|
$html .= '<br><br><br></body></html>'; |
185
|
|
|
|
186
|
|
|
return $html; |
187
|
|
|
} |
188
|
|
|
|
189
|
|
|
public function checkYamlFiles( |
190
|
|
|
Finder $files, |
191
|
|
|
array $fields, |
192
|
|
|
callable $checkfilestart, |
193
|
|
|
callable $checksentence, |
194
|
|
|
callable $checkfileend |
195
|
|
|
) { |
196
|
|
|
$results = []; |
197
|
|
|
/** |
198
|
|
|
* @var SplFileInfo $file |
199
|
|
|
*/ |
200
|
|
|
foreach ($files as $file) { |
201
|
|
|
try { |
202
|
|
|
$data = Yaml::parse( |
203
|
|
|
file_get_contents( |
204
|
|
|
$file->getRealPath() |
205
|
|
|
) |
206
|
|
|
); |
207
|
|
|
} catch (ParseException $e) { |
208
|
|
|
throw new \Exception("Unable to parse YAML string: {$e->getMessage()}"); |
209
|
|
|
} |
210
|
|
|
$sentences = []; |
211
|
|
|
foreach ($data as $item) { |
212
|
|
|
foreach ($item as $key => $valueitem) { |
213
|
|
|
foreach ($fields as $field) { |
214
|
|
|
if ($key == $field) { |
215
|
|
|
$sentences[] = $valueitem; |
216
|
|
|
} |
217
|
|
|
} |
218
|
|
|
} |
219
|
|
|
} |
220
|
|
|
$checkfilestart($file, count($sentences)); |
221
|
|
|
$sentences = $this->checkSentences( |
222
|
|
|
$sentences, |
223
|
|
|
$checksentence |
224
|
|
|
); |
225
|
|
|
$htmlcode = $this->convertToHtml($file->getFilename(), $sentences); |
226
|
|
|
|
227
|
|
|
$checkerfile = sys_get_temp_dir() . "/" . uniqid("spellcheck") . ".html"; |
228
|
|
|
file_put_contents($checkerfile, $htmlcode); |
229
|
|
|
$results[] = $checkerfile; |
230
|
|
|
|
231
|
|
|
$checkfileend(); |
232
|
|
|
} |
233
|
|
|
|
234
|
|
|
return $results; |
235
|
|
|
} |
236
|
|
|
|
237
|
|
|
/** |
238
|
|
|
* @param Finder $files |
239
|
|
|
* @param callable $checkfilestart |
240
|
|
|
* @param callable $checksentence |
241
|
|
|
* @param callable $checkfileend |
242
|
|
|
* |
243
|
|
|
* @return array |
244
|
|
|
*/ |
245
|
|
|
public function checkHtmlFiles( |
246
|
|
|
Finder $files, |
247
|
|
|
callable $checkfilestart, |
248
|
|
|
callable $checksentence, |
249
|
|
|
callable $checkfileend |
250
|
|
|
) { |
251
|
|
|
$results = []; |
252
|
|
|
/** |
253
|
|
|
* @var SplFileInfo $file |
254
|
|
|
*/ |
255
|
|
|
foreach ($files as $file) { |
256
|
|
|
$html = file_get_contents($file->getRealPath()); |
257
|
|
|
$html = new GlHtml($html); |
258
|
|
|
|
259
|
|
|
$title = $html->get("head title"); |
260
|
|
|
|
261
|
|
|
if ($title && sizeof($title) > 0) { |
|
|
|
|
262
|
|
|
$title = $title[0]->getText(); |
263
|
|
|
} else { |
264
|
|
|
$title = $file->getFilename(); |
265
|
|
|
} |
266
|
|
|
|
267
|
|
|
$sentences = $html->getSentences(); |
268
|
|
|
$checkfilestart($file, count($sentences)); |
269
|
|
|
$sentences = $this->checkSentences( |
270
|
|
|
$sentences, |
271
|
|
|
$checksentence |
272
|
|
|
); |
273
|
|
|
$htmlcode = $this->convertToHtml($title, $sentences); |
274
|
|
|
|
275
|
|
|
$checkerfile = sys_get_temp_dir() . "/" . uniqid("spellcheck") . ".html"; |
276
|
|
|
file_put_contents($checkerfile, $htmlcode); |
277
|
|
|
$results[] = $checkerfile; |
278
|
|
|
|
279
|
|
|
$checkfileend(); |
280
|
|
|
} |
281
|
|
|
|
282
|
|
|
return $results; |
283
|
|
|
} |
284
|
|
|
|
285
|
|
|
/** |
286
|
|
|
* @param array $sentences |
287
|
|
|
* |
288
|
|
|
* @param callable $closure |
289
|
|
|
* |
290
|
|
|
* @return GlSpellCheckerSentence[] |
291
|
|
|
*/ |
292
|
|
|
public |
293
|
|
|
function checkSentences( |
294
|
|
|
array $sentences, |
295
|
|
|
callable $closure |
296
|
|
|
) { |
297
|
|
|
$url = "http://{$this->languageToolServerIP}:{$this->languageToolServerPort}"; |
|
|
|
|
298
|
|
|
$sentencesChecked = []; |
299
|
|
|
foreach ($sentences as $sentence) { |
300
|
|
|
$response = $this->languagetoolClientHttp->get( |
301
|
|
|
$url, |
302
|
|
|
[ |
303
|
|
|
'query' => [ |
304
|
|
|
'language' => $this->languageToolLanguage, |
305
|
|
|
'text' => $sentence |
306
|
|
|
] |
307
|
|
|
] |
308
|
|
|
); |
309
|
|
|
$xml = $response->getBody()->getContents(); |
310
|
|
|
$glxml = new GlHtml($xml); |
311
|
|
|
$errors = $glxml->get('error'); |
312
|
|
|
$sentenceChecked = new GlSpellCheckerSentence($sentence); |
313
|
|
|
if (count($errors) > 0) { |
314
|
|
|
foreach ($errors as $error) { |
315
|
|
|
$msg = $error->getAttribute('msg'); |
316
|
|
|
$offset = (int)$error->getAttribute('offset'); |
317
|
|
|
$length = (int)$error->getAttribute('errorlength'); |
318
|
|
|
$suggs = []; |
319
|
|
|
$word = null; |
320
|
|
|
if ($error->getAttribute('locqualityissuetype') == 'misspelling') { |
321
|
|
|
$word = mb_substr($sentence, $offset, $length, 'UTF-8'); |
322
|
|
|
if ($this->enchantDictionnary) { |
323
|
|
|
$wordcorrect = enchant_dict_check($this->enchantDictionnary, $word); |
324
|
|
|
if (!$wordcorrect) { |
325
|
|
|
$suggs = enchant_dict_suggest($this->enchantDictionnary, $word); |
326
|
|
|
} |
327
|
|
|
} |
328
|
|
|
} |
329
|
|
|
$glerror = new GlSpellCheckerError($msg, $offset, $length, $word, $suggs); |
330
|
|
|
$sentenceChecked->addError($glerror); |
331
|
|
|
} |
332
|
|
|
} |
333
|
|
|
$sentencesChecked[] = $sentenceChecked; |
334
|
|
|
$closure($sentence); |
335
|
|
|
} |
336
|
|
|
|
337
|
|
|
return $sentencesChecked; |
338
|
|
|
} |
339
|
|
|
|
340
|
|
|
/** |
341
|
|
|
* @param string $directory |
342
|
|
|
*/ |
343
|
|
|
private function startLanguageToolServer($directory) |
344
|
|
|
{ |
345
|
|
|
$jar = $directory . "languagetool-server.jar"; |
346
|
|
|
$command = "java -cp $jar org.languagetool.server.HTTPServer --port {$this->languageToolServerPort}"; |
347
|
|
|
$this->languagetoolServer = new Process($command); |
348
|
|
|
$this->languagetoolServer->start(); |
349
|
|
|
} |
350
|
|
|
|
351
|
|
|
private function stopLanguageToolServer() |
352
|
|
|
{ |
353
|
|
|
if ($this->languagetoolServer) { |
354
|
|
|
$this->languagetoolServer->stop(); |
355
|
|
|
$this->languagetoolServer = null; |
356
|
|
|
} |
357
|
|
|
} |
358
|
|
|
} |
359
|
|
|
|
An attempt at access to an undefined property has been detected. This may either be a typographical error or the property has been renamed but there are still references to its old name.
If you really want to allow access to undefined properties, you can define magic methods to allow access. See the php core documentation on Overloading.