Completed
Push — master ( 236942...812281 )
by Emmanuel
04:18 queued 01:08
created

GlSpellChecker::__construct()   B

Complexity

Conditions 3
Paths 4

Size

Total Lines 26
Code Lines 18

Duplication

Lines 0
Ratio 0 %

Importance

Changes 3
Bugs 0 Features 1
Metric Value
c 3
b 0
f 1
dl 0
loc 26
rs 8.8571
cc 3
eloc 18
nc 4
nop 5
1
<?php
2
/**
3
 * Spell check html files
4
 *
5
 * PHP version 5.4
6
 *
7
 * @category  GLICER
8
 * @package   GlSpellChecker
9
 * @author    Emmanuel ROECKER
10
 * @author    Rym BOUCHAGOUR
11
 * @copyright 2015 GLICER
12
 * @license   MIT
13
 * @link      http://dev.glicer.com/
14
 *
15
 * Created : 04/05/15
16
 * File : GlSpellChecker.php
17
 *
18
 */
19
20
namespace GlSpellChecker;
21
22
use GlHtml\GlHtml;
23
use Symfony\Component\Process\Process;
24
use Symfony\Component\Finder\SplFileInfo;
25
use Symfony\Component\Finder\Finder;
26
use Symfony\Component\Yaml\Exception\ParseException;
27
use Symfony\Component\Yaml\Yaml;
28
use GuzzleHttp\Client;
29
30
31
/**
32
 * Class GlSpellChecker
33
 * @package GlSpellChecker
34
 */
35
class GlSpellChecker
36
{
37
    /**
38
     * @var int
39
     */
40
    private $languageToolServerPort = 8081;
41
42
    /**
43
     * @var string
44
     */
45
    private $languageToolLanguage = 'fr';
46
47
    /**
48
     * @var Process $languagetoolServer ;
49
     */
50
    private $languagetoolServer = null;
51
52
    /**
53
     * @var Client
54
     */
55
    private $languagetoolClientHttp;
56
57
58
    /**
59
     * @var string
60
     */
61
    private $enchantLanguage = "fr_FR";
62
    private $enchantDictionnary;
63
    private $enchantBroker;
64
65
    /**
66
     * @param string $languageToolDirectory
67
     * @param string $languageToolLanguage
68
     * @param string $enchantLanguage
69
     * @param string $languageToolServerIP
70
     * @param int    $languageToolServerPort
71
     *
72
     * @throws \Exception
73
     */
74
    public function __construct(
75
        $languageToolLanguage,
76
        $enchantLanguage,
77
        $languageToolDirectory = null,
78
        $languageToolServerIP = 'localhost',
79
        $languageToolServerPort = 8081
80
    ) {
81
        $this->languageToolLanguage   = $languageToolLanguage;
82
        $this->enchantLanguage        = $enchantLanguage;
83
        $this->languageToolServerPort = $languageToolServerPort;
84
        $this->languageToolServerIP   = $languageToolServerIP;
0 ignored issues
show
Bug introduced by
The property languageToolServerIP does not seem to exist. Did you mean languagetoolServer?

An attempt at access to an undefined property has been detected. This may either be a typographical error or the property has been renamed but there are still references to its old name.

If you really want to allow access to undefined properties, you can define magic methods to allow access. See the php core documentation on Overloading.

Loading history...
85
86
        if ($languageToolDirectory) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $languageToolDirectory of type string|null is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
87
            $this->startLanguageToolServer($languageToolDirectory);
88
        }
89
90
        $this->languagetoolClientHttp = new Client();
91
92
        $this->enchantBroker = enchant_broker_init();
93
94
        if (!enchant_broker_dict_exists($this->enchantBroker, $this->enchantLanguage)) {
95
            throw new \Exception("Cannot find dictionnaries for enchant");
96
        } else {
97
            $this->enchantDictionnary = enchant_broker_request_dict($this->enchantBroker, $this->enchantLanguage);
98
        }
99
    }
100
101
    public function __destruct()
102
    {
103
        $this->stopLanguageToolServer();
104
        enchant_broker_free_dict($this->enchantDictionnary);
105
        enchant_broker_free($this->enchantBroker);
106
    }
107
108
    /**
109
     * @param string                   $title
110
     * @param GlSpellCheckerSentence[] $sentences
111
     *
112
     * @return string
113
     */
114
    public function convertToHtml($title, $sentences)
115
    {
116
        $html = '<!DOCTYPE HTML>';
117
        $html .= '<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8">';
118
        $html .= '<title>' . $title . '</title>';
119
        $html .= '<style>';
120
        $html .= '.error {  color: red  }';
121
122
        $html .= '.tooltip
123
                    {
124
                        display: inline;
125
                        position: relative;
126
                        text-decoration: none;
127
                        top: 0px;
128
                        left: 0px;
129
                    }';
130
131
        $html .= '.tooltip:hover:after
132
                    {
133
                        background: #333;
134
                        background: rgba(0,0,0,.8);
135
                        border-radius: 5px;
136
                        top: -5px;
137
                        color: #fff;
138
                        content: attr(data-tooltip);
139
                        left: 160px;
140
                        padding: 5px 15px;
141
                        position: absolute;
142
                        z-index: 98;
143
                        width: 150px;
144
                    }';
145
        $html .= '</style>';
146
        $html .= '</head><body>';
147
148
        foreach ($sentences as $sentence) {
149
            $html .= '<div class="sentence">';
150
            $text   = $sentence->getText();
151
            $errors = $sentence->mergeErrors();
152
153
            if (count($errors) <= 0) {
154
                $html .= $text;
155
                $html .= '</div>';
156
                continue;
157
            }
158
159
            $cons  = "";
160
            $start = 0;
161
            foreach ($errors as $error) {
162
                $offset = $error->getOffset();
163
                $length = $error->getLength();
164
                $cons .= mb_substr($text, $start, $offset - $start, 'UTF-8');
165
166
                $tooltip = $error->getMessage();
167
                $suggs   = $error->getSuggestions();
168
                if (count($suggs) > 0) {
169
                    $tooltip .= " : " . $suggs[0];
170
                }
171
                $zone = mb_substr($text, $offset, $length, 'UTF-8');
172
                $cons .= '<span class="error tooltip" data-tooltip="' . $tooltip . '">' . $zone . '</span>';
173
174
                $start = $offset + $length;
175
            }
176
            $cons .= mb_substr($text, $start, mb_strlen($text) - $start, 'UTF-8');
177
178
            $html .= $cons;
179
            $html .= '</div>';
180
        }
181
        $html .= '<br><br><br></body></html>';
182
183
        return $html;
184
    }
185
186
    public function checkYamlFiles(
187
        Finder $files,
188
        array    $fields,
189
        callable $checkfilestart,
190
        callable $checksentence,
191
        callable $checkfileend
192
    ) {
193
        $results = [];
194
        /**
195
         * @var SplFileInfo $file
196
         */
197
        foreach ($files as $file) {
198
            try {
199
                $data = Yaml::parse(
200
                            file_get_contents(
201
                                $file->getRealPath()
202
                            )
203
                );
204
            } catch (ParseException $e) {
205
                throw new \Exception("Unable to parse YAML string: {$e->getMessage()}");
206
            }
207
            $sentences = [];
208
            foreach ($data as $item) {
209
                foreach ($item as $key => $valueitem) {
210
                    foreach ($fields as $field) {
211
                        if ($key == $field) {
212
                            $sentences[] = $valueitem;
213
                        }
214
                    }
215
                }
216
            }
217
            $checkfilestart($file, count($sentences));
218
            $sentences = $this->checkSentences(
219
                              $sentences,
220
                                  $checksentence
221
            );
222
            $htmlcode  = $this->convertToHtml($file->getFilename(), $sentences);
223
224
            $checkerfile = sys_get_temp_dir() . "/" . uniqid("spellcheck") . ".html";
225
            file_put_contents($checkerfile, $htmlcode);
226
            $results[] = $checkerfile;
227
228
            $checkfileend();
229
        }
230
231
        return $results;
232
    }
233
234
    /**
235
     * @param Finder   $files
236
     * @param callable $checkfilestart
237
     * @param callable $checksentence
238
     * @param callable $checkfileend
239
     *
240
     * @return array
241
     */
242
    public function checkHtmlFiles(
243
        Finder $files,
244
        callable $checkfilestart,
245
        callable $checksentence,
246
        callable $checkfileend
247
    ) {
248
        $results = [];
249
        /**
250
         * @var SplFileInfo $file
251
         */
252
        foreach ($files as $file) {
253
            $html = file_get_contents($file->getRealPath());
254
            $html = new GlHtml($html);
255
256
            $title = $html->get("head title");
257
258
            if ($title && sizeof($title) > 0) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $title of type GlHtml\GlHtmlNode[] is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
259
                $title = $title[0]->getText();
260
            } else {
261
                $title = $file->getFilename();
262
            }
263
264
            $sentences = $html->getSentences();
265
            $checkfilestart($file, count($sentences));
266
            $sentences = $this->checkSentences(
267
                              $sentences,
268
                                  $checksentence
269
            );
270
            $htmlcode  = $this->convertToHtml($title, $sentences);
271
272
            $checkerfile = sys_get_temp_dir() . "/" . uniqid("spellcheck") . ".html";
273
            file_put_contents($checkerfile, $htmlcode);
274
            $results[] = $checkerfile;
275
276
            $checkfileend();
277
        }
278
279
        return $results;
280
    }
281
282
    /**
283
     * @param array    $sentences
284
     *
285
     * @param callable $closure
286
     *
287
     * @return GlSpellCheckerSentence[]
288
     */
289
    public
290
    function checkSentences(
291
        array $sentences,
292
        callable $closure
293
    ) {
294
        $url              = "http://{$this->languageToolServerIP}:{$this->languageToolServerPort}";
0 ignored issues
show
Bug introduced by
The property languageToolServerIP does not seem to exist. Did you mean languagetoolServer?

An attempt at access to an undefined property has been detected. This may either be a typographical error or the property has been renamed but there are still references to its old name.

If you really want to allow access to undefined properties, you can define magic methods to allow access. See the php core documentation on Overloading.

Loading history...
295
        $sentencesChecked = [];
296
        foreach ($sentences as $sentence) {
297
            $response        = $this->languagetoolClientHttp->get(
298
                                                            $url,
299
                                                                [
300
                                                                    'query' => [
301
                                                                        'language' => $this->languageToolLanguage,
302
                                                                        'text'     => $sentence
303
                                                                    ]
304
                                                                ]
305
            );
306
            $xml             = $response->getBody()->getContents();
307
            $glxml           = new GlHtml($xml);
308
            $errors          = $glxml->get('error');
309
            $sentenceChecked = new GlSpellCheckerSentence($sentence);
310
            if (count($errors) > 0) {
311
                foreach ($errors as $error) {
312
                    $msg    = $error->getAttribute('msg');
313
                    $offset = (int)$error->getAttribute('offset');
314
                    $length = (int)$error->getAttribute('errorlength');
315
                    $suggs  = [];
316
                    $word   = null;
317
                    if ($error->getAttribute('locqualityissuetype') == 'misspelling') {
318
                        $word        = mb_substr($sentence, $offset, $length, 'UTF-8');
319
                        $wordcorrect = enchant_dict_check($this->enchantDictionnary, $word);
320
                        if (!$wordcorrect) {
321
                            $suggs = enchant_dict_suggest($this->enchantDictionnary, $word);
322
                        }
323
                    }
324
                    $glerror = new GlSpellCheckerError($msg, $offset, $length, $word, $suggs);
325
                    $sentenceChecked->addError($glerror);
326
                }
327
            }
328
            $sentencesChecked[] = $sentenceChecked;
329
            $closure($sentence);
330
        }
331
332
        return $sentencesChecked;
333
    }
334
335
    /**
336
     * @param string $directory
337
     */
338
    private function startLanguageToolServer($directory)
339
    {
340
        $jar                      = $directory . "languagetool-server.jar";
341
        $command                  = "java -cp $jar org.languagetool.server.HTTPServer --port {$this->languageToolServerPort}";
342
        $this->languagetoolServer = new Process($command);
343
        $this->languagetoolServer->start();
344
        sleep(3);
345
    }
346
347
    private function stopLanguageToolServer()
348
    {
349
        if ($this->languagetoolServer) {
350
            $this->languagetoolServer->stop();
351
            $this->languagetoolServer = null;
352
        }
353
    }
354
}