Completed
Push — master ( 82e66d...6e55ac )
by Emmanuel
02:01
created

GlSpellChecker::checkSentence()   B

Complexity

Conditions 2
Paths 2

Size

Total Lines 29
Code Lines 19

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 1
Metric Value
c 1
b 0
f 1
dl 0
loc 29
rs 8.8571
cc 2
eloc 19
nc 2
nop 1
1
<?php
2
/**
3
 * Spell check html files
4
 *
5
 * PHP version 5.4
6
 *
7
 * @category  GLICER
8
 * @package   GlSpellChecker
9
 * @author    Emmanuel ROECKER
10
 * @author    Rym BOUCHAGOUR
11
 * @copyright 2015 GLICER
12
 * @license   MIT
13
 * @link      http://dev.glicer.com/
14
 *
15
 * Created : 04/05/15
16
 * File : GlSpellChecker.php
17
 *
18
 */
19
20
namespace GlSpellChecker;
21
22
use GlHtml\GlHtml;
23
use Symfony\Component\Process\Process;
24
use Symfony\Component\Finder\SplFileInfo;
25
use Symfony\Component\Finder\Finder;
26
use Symfony\Component\Yaml\Exception\ParseException;
27
use Symfony\Component\Yaml\Yaml;
28
use GuzzleHttp\Client;
29
30
31
/**
32
 * Class GlSpellChecker
33
 * @package GlSpellChecker
34
 */
35
class GlSpellChecker
36
{
37
    /**
38
     * @var int
39
     */
40
    private $languageToolServerPort = 8081;
41
42
    /**
43
     * @var string
44
     */
45
    private $languageToolLanguage = 'fr';
46
47
    /**
48
     * @var Process $languagetoolServer ;
49
     */
50
    private $languagetoolServer = null;
51
52
    /**
53
     * @var Client
54
     */
55
    private $languagetoolClientHttp;
56
57
58
    /**
59
     * @var string
60
     */
61
    private $enchantLanguage = "fr_FR";
62
    private $enchantDictionnary = null;
63
    private $enchantBroker = null;
64
65
    /**
66
     * @param string $languageToolDirectory
67
     * @param string $languageToolLanguage
68
     * @param string $enchantLanguage
69
     * @param string $languageToolServerIP
70
     * @param int    $languageToolServerPort
71
     *
72
     * @throws \Exception
73
     */
74
    public function __construct(
75
        $languageToolLanguage,
76
        $enchantLanguage,
77
        $languageToolDirectory = null,
78
        $languageToolServerIP = 'localhost',
79
        $languageToolServerPort = 8081
80
    ) {
81
        $this->languageToolLanguage   = $languageToolLanguage;
82
        $this->enchantLanguage        = $enchantLanguage;
83
        $this->languageToolServerPort = $languageToolServerPort;
84
        $this->languageToolServerIP   = $languageToolServerIP;
0 ignored issues
show
Bug introduced by
The property languageToolServerIP does not seem to exist. Did you mean languagetoolServer?

An attempt at access to an undefined property has been detected. This may either be a typographical error or the property has been renamed but there are still references to its old name.

If you really want to allow access to undefined properties, you can define magic methods to allow access. See the php core documentation on Overloading.

Loading history...
85
86
        if ($languageToolDirectory) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $languageToolDirectory of type string|null is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
87
            $this->startLanguageToolServer($languageToolDirectory);
88
        }
89
90
        $this->languagetoolClientHttp = new Client();
91
92
        if (extension_loaded('enchant')) {
93
            $this->enchantBroker = enchant_broker_init();
94
            if (!enchant_broker_dict_exists($this->enchantBroker, $this->enchantLanguage)) {
95
                throw new \Exception("Cannot find dictionnaries for enchant");
96
            } else {
97
                $this->enchantDictionnary = enchant_broker_request_dict($this->enchantBroker, $this->enchantLanguage);
98
            }
99
        }
100
    }
101
102
    public function __destruct()
103
    {
104
        $this->stopLanguageToolServer();
105
        if ($this->enchantBroker) {
106
            enchant_broker_free_dict($this->enchantDictionnary);
107
            enchant_broker_free($this->enchantBroker);
108
        }
109
    }
110
111
    /**
112
     * @param string                   $title
113
     * @param GlSpellCheckerSentence[] $sentences
114
     *
115
     * @return string
116
     */
117
    public function convertToHtml($title, $sentences)
118
    {
119
        $html = '<!DOCTYPE HTML>';
120
        $html .= '<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8">';
121
        $html .= '<title>' . $title . '</title>';
122
        $html .= '<style>';
123
        $html .= '.error {  color: red  }';
124
125
        $html .= '.tooltip
126
                    {
127
                        display: inline;
128
                        position: relative;
129
                        text-decoration: none;
130
                        top: 0px;
131
                        left: 0px;
132
                    }';
133
134
        $html .= '.tooltip:hover:after
135
                    {
136
                        background: #333;
137
                        background: rgba(0,0,0,.8);
138
                        border-radius: 5px;
139
                        top: -5px;
140
                        color: #fff;
141
                        content: attr(data-tooltip);
142
                        left: 160px;
143
                        padding: 5px 15px;
144
                        position: absolute;
145
                        z-index: 98;
146
                        width: 150px;
147
                    }';
148
        $html .= '</style>';
149
        $html .= '</head><body>';
150
151
        foreach ($sentences as $sentence) {
152
            $html .= '<div class="sentence">';
153
            $text   = $sentence->getText();
154
            $errors = $sentence->mergeErrors();
155
156
            if (count($errors) <= 0) {
157
                $html .= $text;
158
                $html .= '</div>';
159
                continue;
160
            }
161
162
            $cons  = "";
163
            $start = 0;
164
            foreach ($errors as $error) {
165
                $offset = $error->getOffset();
166
                $length = $error->getLength();
167
                $cons .= mb_substr($text, $start, $offset - $start, 'UTF-8');
168
169
                $tooltip = $error->getMessage();
170
                $suggs   = $error->getSuggestions();
171
                if (count($suggs) > 0) {
172
                    $tooltip .= " : " . $suggs[0];
173
                }
174
                $zone = mb_substr($text, $offset, $length, 'UTF-8');
175
                $cons .= '<span class="error tooltip" data-tooltip="' . $tooltip . '">' . $zone . '</span>';
176
177
                $start = $offset + $length;
178
            }
179
            $cons .= mb_substr($text, $start, mb_strlen($text) - $start, 'UTF-8');
180
181
            $html .= $cons;
182
            $html .= '</div>';
183
        }
184
        $html .= '<br><br><br></body></html>';
185
186
        return $html;
187
    }
188
189
    public function checkYamlFiles(
190
        Finder $files,
191
        array    $fields,
192
        callable $checkfilestart,
193
        callable $checksentence,
194
        callable $checkfileend
195
    ) {
196
        $results = [];
197
        /**
198
         * @var SplFileInfo $file
199
         */
200
        foreach ($files as $file) {
201
            try {
202
                $data = Yaml::parse(
203
                            file_get_contents(
204
                                $file->getRealPath()
205
                            )
206
                );
207
            } catch (ParseException $e) {
208
                throw new \Exception("Unable to parse YAML string: {$e->getMessage()}");
209
            }
210
            $sentences = [];
211
            foreach ($data as $item) {
0 ignored issues
show
Bug introduced by
The expression $data of type array|string|object<stdClass> is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
212
                foreach ($item as $key => $valueitem) {
213
                    foreach ($fields as $field) {
214
                        if ($key == $field) {
215
                            $sentences[] = $valueitem;
216
                        }
217
                    }
218
                }
219
            }
220
            $checkfilestart($file, count($sentences));
221
            $sentences = $this->checkSentences(
222
                              $sentences,
223
                                  $checksentence
224
            );
225
            $htmlcode  = $this->convertToHtml($file->getFilename(), $sentences);
226
227
            $checkerfile = sys_get_temp_dir() . "/" . uniqid("spellcheck") . ".html";
228
            file_put_contents($checkerfile, $htmlcode);
229
            $results[] = $checkerfile;
230
231
            $checkfileend();
232
        }
233
234
        return $results;
235
    }
236
237
    /**
238
     * @param Finder   $files
239
     * @param callable $checkfilestart
240
     * @param callable $checksentence
241
     * @param callable $checkfileend
242
     *
243
     * @return array
244
     */
245
    public function checkHtmlFiles(
246
        Finder $files,
247
        callable $checkfilestart,
248
        callable $checksentence,
249
        callable $checkfileend
250
    ) {
251
        $results = [];
252
        /**
253
         * @var SplFileInfo $file
254
         */
255
        foreach ($files as $file) {
256
            $html = file_get_contents($file->getRealPath());
257
            $html = new GlHtml($html);
258
259
            $title = $html->get("head title");
260
261
            if ($title && sizeof($title) > 0) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $title of type GlHtml\GlHtmlNode[] is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
262
                $title = $title[0]->getText();
263
            } else {
264
                $title = $file->getFilename();
265
            }
266
267
            $sentences = $html->getSentences();
268
            $checkfilestart($file, count($sentences));
269
            $sentences = $this->checkSentences(
270
                              $sentences,
271
                                  $checksentence
272
            );
273
            $htmlcode  = $this->convertToHtml($title, $sentences);
274
275
            $checkerfile = sys_get_temp_dir() . "/" . uniqid("spellcheck") . ".html";
276
            file_put_contents($checkerfile, $htmlcode);
277
            $results[] = $checkerfile;
278
279
            $checkfileend();
280
        }
281
282
        return $results;
283
    }
284
285
    /**
286
     * @param string $sentence
287
     *
288
     * @return \GlSpellChecker\GlSpellCheckerSentence
289
     */
290
    public function checkSentence($sentence) {
291
        $url              = "http://{$this->languageToolServerIP}:{$this->languageToolServerPort}/v2/";
0 ignored issues
show
Bug introduced by
The property languageToolServerIP does not seem to exist. Did you mean languagetoolServer?

An attempt at access to an undefined property has been detected. This may either be a typographical error or the property has been renamed but there are still references to its old name.

If you really want to allow access to undefined properties, you can define magic methods to allow access. See the php core documentation on Overloading.

Loading history...
292
293
        $response        = $this->languagetoolClientHttp->post(
294
                                                        $url,
295
                                                            [
296
                                                                'form_params' => [
297
                                                                    'language' => $this->languageToolLanguage,
298
                                                                    'text'     => $sentence,
299
                                                                    'enabledOnly' => false
300
                                                                ]
301
                                                            ]
302
        );
303
        
304
        $response = json_decode($response->getBody()->getContents());
305
306
        $sentenceChecked = new GlSpellCheckerSentence($sentence);
307
        foreach ($response['matches'] as $match) {
308
            $msg    = $match['message'];
309
            $offset = (int)$match['offset'];
310
            $length = (int)$match['length'];
311
            $replacements = array_values($match['replacements']);
312
            $word   = null;
313
            $glerror = new GlSpellCheckerError($msg, $offset, $length, $word, $replacements);
314
            $sentenceChecked->addError($glerror);
315
        }
316
        
317
        return $sentenceChecked;
318
    }
319
    
320
    /**
321
     * @param array    $sentences
322
     *
323
     * @param callable $closure
324
     *
325
     * @return GlSpellCheckerSentence[]
326
     */
327
    public
328
    function checkSentences(
329
        array $sentences,
330
        callable $closure
331
    ) {
332
        $url              = "http://{$this->languageToolServerIP}:{$this->languageToolServerPort}";
0 ignored issues
show
Bug introduced by
The property languageToolServerIP does not seem to exist. Did you mean languagetoolServer?

An attempt at access to an undefined property has been detected. This may either be a typographical error or the property has been renamed but there are still references to its old name.

If you really want to allow access to undefined properties, you can define magic methods to allow access. See the php core documentation on Overloading.

Loading history...
333
        $sentencesChecked = [];
334
        foreach ($sentences as $sentence) {
335
            $response        = $this->languagetoolClientHttp->get(
336
                                                            $url,
337
                                                                [
338
                                                                    'query' => [
339
                                                                        'language' => $this->languageToolLanguage,
340
                                                                        'text'     => $sentence
341
                                                                    ]
342
                                                                ]
343
            );
344
            $xml             = $response->getBody()->getContents();
345
            $glxml           = new GlHtml($xml);
346
            $errors          = $glxml->get('error');
347
            $sentenceChecked = new GlSpellCheckerSentence($sentence);
348
            if (count($errors) > 0) {
349
                foreach ($errors as $error) {
350
                    $msg    = $error->getAttribute('msg');
351
                    $offset = (int)$error->getAttribute('offset');
352
                    $length = (int)$error->getAttribute('errorlength');
353
                    $suggs  = [];
354
                    $word   = null;
355
                    if ($error->getAttribute('locqualityissuetype') == 'misspelling') {
356
                        $word = mb_substr($sentence, $offset, $length, 'UTF-8');
357
                        if ($this->enchantDictionnary) {
358
                            $wordcorrect = enchant_dict_check($this->enchantDictionnary, $word);
359
                            if (!$wordcorrect) {
360
                                $suggs = enchant_dict_suggest($this->enchantDictionnary, $word);
361
                            }
362
                        }
363
                    }
364
                    $glerror = new GlSpellCheckerError($msg, $offset, $length, $word, $suggs);
365
                    $sentenceChecked->addError($glerror);
366
                }
367
            }
368
            $sentencesChecked[] = $sentenceChecked;
369
            $closure($sentence);
370
        }
371
372
        return $sentencesChecked;
373
    }
374
375
    /**
376
     * @param string $directory
377
     */
378
    private function startLanguageToolServer($directory)
379
    {
380
        $jar                      = $directory . "languagetool-server.jar";
381
        $command                  = "java -cp $jar org.languagetool.server.HTTPServer --port {$this->languageToolServerPort}";
382
        $this->languagetoolServer = new Process($command);
383
        $this->languagetoolServer->start();
384
    }
385
386
    private function stopLanguageToolServer()
387
    {
388
        if ($this->languagetoolServer) {
389
            $this->languagetoolServer->stop();
390
            $this->languagetoolServer = null;
391
        }
392
    }
393
} 
394