GlSpellChecker   A
last analyzed

Complexity

Total Complexity 32

Size/Duplication

Total Lines 303
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 9

Importance

Changes 0
Metric Value
wmc 32
lcom 1
cbo 9
dl 0
loc 303
rs 9.6
c 0
b 0
f 0

8 Methods

Rating   Name   Duplication   Size   Complexity  
B __construct() 0 30 4
A __destruct() 0 8 2
B convertToHtml() 0 47 5
C checkYamlFiles() 0 47 7
B checkHtmlFiles() 0 39 4
C checkSentences() 0 47 7
A startLanguageToolServer() 0 7 1
A stopLanguageToolServer() 0 7 2
1
<?php
2
/**
3
 * Spell check html files
4
 *
5
 * PHP version 5.4
6
 *
7
 * @category  GLICER
8
 * @package   GlSpellChecker
9
 * @author    Emmanuel ROECKER
10
 * @author    Rym BOUCHAGOUR
11
 * @copyright 2015 GLICER
12
 * @license   MIT
13
 * @link      http://dev.glicer.com/
14
 *
15
 * Created : 04/05/15
16
 * File : GlSpellChecker.php
17
 *
18
 */
19
20
namespace GlSpellChecker;
21
22
use GlHtml\GlHtml;
23
use Symfony\Component\Process\Process;
24
use Symfony\Component\Finder\SplFileInfo;
25
use Symfony\Component\Finder\Finder;
26
use Symfony\Component\Yaml\Exception\ParseException;
27
use Symfony\Component\Yaml\Yaml;
28
use GuzzleHttp\Client;
29
30
31
/**
32
 * Class GlSpellChecker
33
 * @package GlSpellChecker
34
 */
35
class GlSpellChecker
36
{
37
    /**
38
     * @var int
39
     */
40
    private $languageToolServerPort = 8081;
41
42
    /**
43
     * @var string
44
     */
45
    private $languageToolLanguage = 'fr';
46
47
    /**
48
     * @var Process $languagetoolServer ;
49
     */
50
    private $languagetoolServer = null;
51
52
    /**
53
     * @var Client
54
     */
55
    private $languagetoolClientHttp;
56
57
58
    /**
59
     * @var string
60
     */
61
    private $enchantLanguage = "fr_FR";
62
    private $enchantDictionnary = null;
63
    private $enchantBroker = null;
64
65
    /**
66
     * @param string $languageToolDirectory
67
     * @param string $languageToolLanguage
68
     * @param string $enchantLanguage
69
     * @param string $languageToolServerIP
70
     * @param int    $languageToolServerPort
71
     *
72
     * @throws \Exception
73
     */
74
    public function __construct(
75
        $languageToolLanguage,
76
        $enchantLanguage,
77
        $languageToolDirectory = null,
78
        $languageToolServerIP = 'localhost',
79
        $languageToolServerPort = 8081
80
    ) {
81
        $this->languageToolLanguage   = $languageToolLanguage;
82
        $this->enchantLanguage        = $enchantLanguage;
83
        $this->languageToolServerPort = $languageToolServerPort;
84
        $this->languageToolServerIP   = $languageToolServerIP;
0 ignored issues
show
Bug introduced by
The property languageToolServerIP does not seem to exist. Did you mean languagetoolServer?

An attempt at access to an undefined property has been detected. This may either be a typographical error or the property has been renamed but there are still references to its old name.

If you really want to allow access to undefined properties, you can define magic methods to allow access. See the php core documentation on Overloading.

Loading history...
85
86
        if ($languageToolDirectory) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $languageToolDirectory of type string|null is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
87
            $this->startLanguageToolServer($languageToolDirectory);
88
        }
89
90
        $this->languagetoolClientHttp = new Client();
91
92
        if (extension_loaded('enchant')) {
93
            $this->enchantBroker = enchant_broker_init();
94
95
            enchant_broker_set_dict_path($this->enchantBroker, ENCHANT_MYSPELL, __DIR__ . '/../dicts');
96
97
            if (!enchant_broker_dict_exists($this->enchantBroker, $this->enchantLanguage)) {
98
                throw new \Exception("Cannot find dictionnaries for enchant");
99
            } else {
100
                $this->enchantDictionnary = enchant_broker_request_dict($this->enchantBroker, $this->enchantLanguage);
101
            }
102
        }
103
    }
104
105
    public function __destruct()
106
    {
107
        $this->stopLanguageToolServer();
108
        if ($this->enchantBroker) {
109
            enchant_broker_free_dict($this->enchantDictionnary);
110
            enchant_broker_free($this->enchantBroker);
111
        }
112
    }
113
114
    /**
115
     * @param string                   $title
116
     * @param GlSpellCheckerSentence[] $sentences
117
     *
118
     * @return string
119
     */
120
    public static function convertToHtml($title, $sentences)
121
    {
122
        $html = '<!DOCTYPE HTML>';
123
        $html .= '<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8">';
124
        $html .= '<title>' . $title . '</title>';
125
        $html .= '<style>';
126
        $html .= '.error {  color: red  }';
127
        $html .= '</style>';
128
        $html .= '</head><body>';
129
130
        foreach ($sentences as $sentence) {
131
            $html .= '<div class="sentence">';
132
            $text   = $sentence->getText();
133
            $errors = $sentence->mergeErrors();
134
135
            if (count($errors) <= 0) {
136
                $html .= $text;
137
                $html .= '</div>';
138
                continue;
139
            }
140
141
            $cons  = "";
142
            $start = 0;
143
            foreach ($errors as $error) {
144
                $offset = $error->getOffset();
145
                $length = $error->getLength();
146
                $cons .= mb_substr($text, $start, $offset - $start, 'UTF-8');
147
148
                $tooltip = $error->getMessage();
149
                $suggs   = $error->getSuggestions();
150
                if (count($suggs) > 0) {
151
                    $tooltip .= " : " . $suggs[0];
152
                }
153
                $zone = mb_substr($text, $offset, $length, 'UTF-8');
154
                $cons .= '<span class="error" title="' . $tooltip . '">' . $zone . '</span>';
155
156
                $start = $offset + $length;
157
            }
158
            $cons .= mb_substr($text, $start, mb_strlen($text) - $start, 'UTF-8');
159
160
            $html .= $cons;
161
            $html .= '</div>';
162
        }
163
        $html .= '<br><br><br></body></html>';
164
165
        return $html;
166
    }
167
168
    public function checkYamlFiles(
169
        Finder $files,
170
        array    $fields,
171
        callable $checkfilestart,
172
        callable $checksentence,
173
        callable $checkfileend
174
    ) {
175
        $results = [];
176
        /**
177
         * @var SplFileInfo $file
178
         */
179
        foreach ($files as $file) {
180
            try {
181
                $data = Yaml::parse(
182
                            file_get_contents(
183
                                $file->getRealPath()
184
                            )
185
                );
186
            } catch (ParseException $e) {
187
                throw new \Exception("Unable to parse YAML string: {$e->getMessage()}");
188
            }
189
            $sentences = [];
190
            foreach ($data as $item) {
0 ignored issues
show
Bug introduced by
The expression $data of type string|array|object<stdClass> is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
191
                foreach ($item as $key => $valueitem) {
192
                    foreach ($fields as $field) {
193
                        if ($key == $field) {
194
                            $sentences[] = $valueitem;
195
                        }
196
                    }
197
                }
198
            }
199
            $checkfilestart($file, count($sentences));
200
            $sentences = $this->checkSentences(
201
                              $sentences,
202
                                  $checksentence
203
            );
204
            $htmlcode  = $this->convertToHtml($file->getFilename(), $sentences);
205
206
            $checkerfile = sys_get_temp_dir() . "/" . uniqid("spellcheck") . ".html";
207
            file_put_contents($checkerfile, $htmlcode);
208
            $results[] = $checkerfile;
209
210
            $checkfileend();
211
        }
212
213
        return $results;
214
    }
215
216
    /**
217
     * @param Finder   $files
218
     * @param callable $checkfilestart
219
     * @param callable $checksentence
220
     * @param callable $checkfileend
221
     *
222
     * @return array
223
     */
224
    public function checkHtmlFiles(
225
        Finder $files,
226
        callable $checkfilestart,
227
        callable $checksentence,
228
        callable $checkfileend
229
    ) {
230
        $results = [];
231
        /**
232
         * @var SplFileInfo $file
233
         */
234
        foreach ($files as $file) {
235
            $html = file_get_contents($file->getRealPath());
236
            $html = new GlHtml($html);
237
238
            $title = $html->get("head title");
239
240
            if ($title && sizeof($title) > 0) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $title of type GlHtml\GlHtmlNode[] is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
241
                $title = $title[0]->getText();
242
            } else {
243
                $title = $file->getFilename();
244
            }
245
246
            $sentences = $html->getSentences();
247
            $checkfilestart($file, count($sentences));
248
            $sentences = $this->checkSentences(
249
                              $sentences,
250
                                  $checksentence
251
            );
252
            $htmlcode  = $this->convertToHtml($title, $sentences);
253
254
            $checkerfile = sys_get_temp_dir() . "/" . uniqid("spellcheck") . ".html";
255
            file_put_contents($checkerfile, $htmlcode);
256
            $results[] = $checkerfile;
257
258
            $checkfileend();
259
        }
260
261
        return $results;
262
    }
263
264
    /**
265
     * @param array    $sentences
266
     *
267
     * @param callable $closure
268
     *
269
     * @return GlSpellCheckerSentence[]
270
     */
271
    public
272
    function checkSentences(
273
        array $sentences,
274
        callable $closure
275
    ) {
276
        $url              = "http://{$this->languageToolServerIP}:{$this->languageToolServerPort}";
0 ignored issues
show
Bug introduced by
The property languageToolServerIP does not seem to exist. Did you mean languagetoolServer?

An attempt at access to an undefined property has been detected. This may either be a typographical error or the property has been renamed but there are still references to its old name.

If you really want to allow access to undefined properties, you can define magic methods to allow access. See the php core documentation on Overloading.

Loading history...
277
        $sentencesChecked = [];
278
        foreach ($sentences as $sentence) {
279
            $response = $this->languagetoolClientHttp->get(
280
                                                     $url,
281
                                                         [
282
                                                             'query' => [
283
                                                                 'language' => $this->languageToolLanguage,
284
                                                                 'text'     => $sentence
285
                                                             ]
286
                                                         ]
287
            );
288
            $xml      = $response->getBody()->getContents();
289
            $glxml           = new GlHtml($xml);
290
            $errors          = $glxml->get('error');
291
            $sentenceChecked = new GlSpellCheckerSentence($sentence);
292
            if (count($errors) > 0) {
293
                foreach ($errors as $error) {
294
                    $msg    = $error->getAttribute('msg');
295
                    $offset = (int)$error->getAttribute('offset');
296
                    $length = (int)$error->getAttribute('errorlength');
297
                    $suggs  = [];
298
                    $word   = null;
299
                    if ($error->getAttribute('locqualityissuetype') == 'misspelling') {
300
                        $word = mb_substr($sentence, $offset, $length, 'UTF-8');
301
                        if ($this->enchantDictionnary) {
302
                            $wordcorrect = enchant_dict_check($this->enchantDictionnary, $word);
303
                            if (!$wordcorrect) {
304
                                $suggs = enchant_dict_suggest($this->enchantDictionnary, $word);
305
                            }
306
                        }
307
                    }
308
                    $glerror = new GlSpellCheckerError($msg, $offset, $length, $word, $suggs);
309
                    $sentenceChecked->addError($glerror);
310
                }
311
            }
312
            $sentencesChecked[] = $sentenceChecked;
313
            $closure($sentence);
314
        }
315
316
        return $sentencesChecked;
317
    }
318
319
    /**
320
     * @param string $directory
321
     */
322
    private function startLanguageToolServer($directory)
323
    {
324
        $jar                      = $directory . "languagetool-server.jar";
325
        $command                  = "java -cp $jar org.languagetool.server.HTTPServer --port {$this->languageToolServerPort}";
326
        $this->languagetoolServer = new Process($command);
327
        $this->languagetoolServer->start();
328
    }
329
330
    private function stopLanguageToolServer()
331
    {
332
        if ($this->languagetoolServer) {
333
            $this->languagetoolServer->stop();
334
            $this->languagetoolServer = null;
335
        }
336
    }
337
} 
338