Passed
Pull Request — master (#89)
by
unknown
03:03
created

AbstractDiff   F

Complexity

Total Complexity 69

Size/Duplication

Total Lines 577
Duplicated Lines 0 %

Test Coverage

Coverage 65.88%

Importance

Changes 3
Bugs 0 Features 0
Metric Value
eloc 143
c 3
b 0
f 0
dl 0
loc 577
ccs 112
cts 170
cp 0.6588
rs 2.88
wmc 69

37 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 17 3
A hasDiffCache() 0 3 1
A getConfig() 0 3 1
A addSpecialCaseTag() 0 3 1
A getNewHtml() 0 3 1
A setGroupDiffs() 0 5 1
A isStartOfTag() 0 3 1
A splitInputsToWords() 0 4 1
A clearContent() 0 3 1
A removeSpecialCaseChar() 0 3 1
A initPurifier() 0 25 4
A prepare() 0 10 2
A isWhiteSpace() 0 3 1
A setSpecialCaseTags() 0 3 1
A getMatchThreshold() 0 3 1
A setNewWords() 0 4 1
A setMatchThreshold() 0 5 1
A getOldHtml() 0 3 1
A setConfig() 0 5 1
A purifyHtml() 0 7 2
A getSpecialCaseChars() 0 3 1
A getOpeningTag() 0 3 1
A isPartOfWord() 0 3 1
A getClosingTag() 0 3 1
A setOldWords() 0 4 1
A getDiffCache() 0 13 3
A getSpecialCaseTags() 0 3 1
A explode() 0 4 1
A setHTMLPurifierConfig() 0 3 1
A isGroupDiffs() 0 3 1
A isEndOfTag() 0 3 1
A getDifference() 0 3 1
D convertHtmlToListOfWords() 0 76 24
A setSpecialCaseChars() 0 3 1
A removeSpecialCaseTag() 0 3 1
A ctypeAlphanumUnicode() 0 3 1
A addSpecialCaseChar() 0 3 1

How to fix   Complexity   

Complex Class

Complex classes like AbstractDiff often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use AbstractDiff, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
namespace Caxy\HtmlDiff;
4
5
use Caxy\HtmlDiff\Util\MbStringUtil;
6
7
/**
8
 * Class AbstractDiff.
9
 */
10
abstract class AbstractDiff
11
{
12
    /**
13
     * @var array
14
     *
15
     * @deprecated since 0.1.0
16
     */
17
    public static $defaultSpecialCaseTags = array('strong', 'b', 'i', 'big', 'small', 'u', 'sub', 'sup', 'strike', 's', 'p');
18
19
    /**
20
     * @var array
21
     *
22
     * @deprecated since 0.1.0
23
     */
24
    public static $defaultSpecialCaseChars = array('.', ',', '(', ')', '\'');
25
26
    /**
27
     * @var bool
28
     *
29
     * @deprecated since 0.1.0
30
     */
31
    public static $defaultGroupDiffs = true;
32
33
    /**
34
     * @var HtmlDiffConfig
35
     */
36
    protected $config;
37
38
    /**
39
     * @var string
40
     */
41
    protected $content;
42
43
    /**
44
     * @var string
45
     */
46
    protected $oldText;
47
48
    /**
49
     * @var string
50
     */
51
    protected $newText;
52
53
    /**
54
     * @var array
55
     */
56
    protected $oldWords = array();
57
58
    /**
59
     * @var array
60
     */
61
    protected $newWords = array();
62
63
    /**
64
     * @var DiffCache[]
65
     */
66
    protected $diffCaches = array();
67
68
    /**
69
     * @var \HTMLPurifier
70
     */
71
    protected $purifier;
72
73
    /**
74
     * @var \HTMLPurifier_Config|null
75
     */
76
    protected $purifierConfig = null;
77
78
    /**
79
     * @see array_slice_cached();
80
     * @var bool
81
     */
82
    protected $resetCache = false;
83
84
    /**
85
     * @var MbStringUtil
86
     */
87
    protected $stringUtil;
88
89
    /**
90
     * AbstractDiff constructor.
91
     *
92
     * @param string     $oldText
93
     * @param string     $newText
94
     * @param string     $encoding
95
     * @param null|array $specialCaseTags
96
     * @param null|bool  $groupDiffs
97
     */
98 16
    public function __construct($oldText, $newText, $encoding = 'UTF-8', $specialCaseTags = null, $groupDiffs = null)
99
    {
100 16
        $this->stringUtil = new MbStringUtil($oldText, $newText);
101
102 16
        $this->setConfig(HtmlDiffConfig::create()->setEncoding($encoding));
103
104 16
        if ($specialCaseTags !== null) {
105 15
            $this->config->setSpecialCaseTags($specialCaseTags);
106
        }
107
108 16
        if ($groupDiffs !== null) {
109
            $this->config->setGroupDiffs($groupDiffs);
110
        }
111
112 16
        $this->oldText = $oldText;
113 16
        $this->newText = $newText;
114 16
        $this->content = '';
115 16
    }
116
117
    /**
118
     * @return bool|string
119
     */
120
    abstract public function build();
121
122
    /**
123
     * Initializes HTMLPurifier with cache location.
124
     *
125
     * @param null|string $defaultPurifierSerializerCache
126
     */
127 16
    public function initPurifier($defaultPurifierSerializerCache = null)
128
    {
129 16
        if (!class_exists(\HTMLPurifier::class)) {
130
            throw new \LogicException('You cannot use the purifier, as the htmlpurifier is not available. Try running "composer require ezyang/htmlpurifier".');
131
        }
132
133 16
        if (null !== $this->purifierConfig) {
134 2
            $HTMLPurifierConfig  = $this->purifierConfig;
135
        } else {
136 16
            $HTMLPurifierConfig = \HTMLPurifier_Config::createDefault();
137
        }
138
139
        // Cache.SerializerPath defaults to Null and sets
140
        // the location to inside the vendor HTMLPurifier library
141
        // under the DefinitionCache/Serializer folder.
142 16
        if (!is_null($defaultPurifierSerializerCache)) {
143 2
            $HTMLPurifierConfig->set('Cache.SerializerPath', $defaultPurifierSerializerCache);
144
        }
145
146
        // Cache.SerializerPermissions defaults to 0744.
147
        // This setting allows the cache files to be deleted by any user, as they are typically
148
        // created by the web/php user (www-user, php-fpm, etc.)
149 16
        $HTMLPurifierConfig->set('Cache.SerializerPermissions', 0777);
150
151 16
        $this->purifier = new \HTMLPurifier($HTMLPurifierConfig);
152 16
    }
153
154
    /**
155
     * Prepare (purify) the HTML
156
     *
157
     * @return void
158
     */
159 16
    protected function prepare()
160
    {
161 16
        if (!$this->config->isPurifierEnabled()) {
162
            return;
163
        }
164
165 16
        $this->initPurifier($this->config->getPurifierCacheLocation());
166
167 16
        $this->oldText = $this->purifyHtml($this->oldText);
168 16
        $this->newText = $this->purifyHtml($this->newText);
169 16
    }
170
171
    /**
172
     * @return DiffCache|null
173
     */
174
    protected function getDiffCache()
175
    {
176
        if (!$this->hasDiffCache()) {
177
            return null;
178
        }
179
180
        $hash = spl_object_hash($this->getConfig()->getCacheProvider());
181
182
        if (!array_key_exists($hash, $this->diffCaches)) {
183
            $this->diffCaches[$hash] = new DiffCache($this->getConfig()->getCacheProvider());
184
        }
185
186
        return $this->diffCaches[$hash];
187
    }
188
189
    /**
190
     * @return bool
191
     */
192 16
    protected function hasDiffCache()
193
    {
194 16
        return null !== $this->getConfig()->getCacheProvider();
195
    }
196
197
    /**
198
     * @return HtmlDiffConfig
199
     */
200 16
    public function getConfig()
201
    {
202 16
        return $this->config;
203
    }
204
205
    /**
206
     * @param HtmlDiffConfig $config
207
     *
208
     * @return AbstractDiff
209
     */
210 16
    public function setConfig(HtmlDiffConfig $config)
211
    {
212 16
        $this->config = $config;
213
214 16
        return $this;
215
    }
216
217
    /**
218
     * @return int
219
     *
220
     * @deprecated since 0.1.0
221
     */
222
    public function getMatchThreshold()
223
    {
224
        return $this->config->getMatchThreshold();
225
    }
226
227
    /**
228
     * @param int $matchThreshold
229
     *
230
     * @return AbstractDiff
231
     *
232
     * @deprecated since 0.1.0
233
     */
234
    public function setMatchThreshold($matchThreshold)
235
    {
236
        $this->config->setMatchThreshold($matchThreshold);
237
238
        return $this;
239
    }
240
241
    /**
242
     * @param array $chars
243
     *
244
     * @deprecated since 0.1.0
245
     */
246
    public function setSpecialCaseChars(array $chars)
247
    {
248
        $this->config->setSpecialCaseChars($chars);
249
    }
250
251
    /**
252
     * @return array|null
253
     *
254
     * @deprecated since 0.1.0
255
     */
256
    public function getSpecialCaseChars()
257
    {
258
        return $this->config->getSpecialCaseChars();
259
    }
260
261
    /**
262
     * @param string $char
263
     *
264
     * @deprecated since 0.1.0
265
     */
266
    public function addSpecialCaseChar($char)
267
    {
268
        $this->config->addSpecialCaseChar($char);
269
    }
270
271
    /**
272
     * @param string $char
273
     *
274
     * @deprecated since 0.1.0
275
     */
276
    public function removeSpecialCaseChar($char)
277
    {
278
        $this->config->removeSpecialCaseChar($char);
279
    }
280
281
    /**
282
     * @param array $tags
283
     *
284
     * @deprecated since 0.1.0
285
     */
286
    public function setSpecialCaseTags(array $tags = array())
287
    {
288
        $this->config->setSpecialCaseChars($tags);
289
    }
290
291
    /**
292
     * @param string $tag
293
     *
294
     * @deprecated since 0.1.0
295
     */
296
    public function addSpecialCaseTag($tag)
297
    {
298
        $this->config->addSpecialCaseTag($tag);
299
    }
300
301
    /**
302
     * @param string $tag
303
     *
304
     * @deprecated since 0.1.0
305
     */
306
    public function removeSpecialCaseTag($tag)
307
    {
308
        $this->config->removeSpecialCaseTag($tag);
309
    }
310
311
    /**
312
     * @return array|null
313
     *
314
     * @deprecated since 0.1.0
315
     */
316
    public function getSpecialCaseTags()
317
    {
318
        return $this->config->getSpecialCaseTags();
319
    }
320
321
    /**
322
     * @return string
323
     */
324
    public function getOldHtml()
325
    {
326
        return $this->oldText;
327
    }
328
329
    /**
330
     * @return string
331
     */
332
    public function getNewHtml()
333
    {
334
        return $this->newText;
335
    }
336
337
    /**
338
     * @return string
339
     */
340
    public function getDifference()
341
    {
342
        return $this->content;
343
    }
344
345
    /**
346
     * Clears the diff content.
347
     *
348
     * @return void
349
     */
350
    public function clearContent()
351
    {
352
        $this->content = null;
353
    }
354
355
    /**
356
     * @param bool $boolean
357
     *
358
     * @return $this
359
     *
360
     * @deprecated since 0.1.0
361
     */
362
    public function setGroupDiffs($boolean)
363
    {
364
        $this->config->setGroupDiffs($boolean);
365
366
        return $this;
367
    }
368
369
    /**
370
     * @return bool
371
     *
372
     * @deprecated since 0.1.0
373
     */
374 16
    public function isGroupDiffs()
375
    {
376 16
        return $this->config->isGroupDiffs();
377
    }
378
379
    /**
380
     * @param \HTMLPurifier_Config $config
381
     */
382 2
    public function setHTMLPurifierConfig(\HTMLPurifier_Config $config)
383
    {
384 2
        $this->purifierConfig = $config;
385 2
    }
386
387
    /**
388
     * @param string $tag
389
     *
390
     * @return string
391
     */
392
    protected function getOpeningTag($tag)
393
    {
394
        return '/<'.$tag.'[^>]*/i';
395
    }
396
397
    /**
398
     * @param string $tag
399
     *
400
     * @return string
401
     */
402
    protected function getClosingTag($tag)
403
    {
404
        return '</'.$tag.'>';
405
    }
406
407
    /**
408
     * @param string $html
409
     *
410
     * @return string
411
     */
412 16
    protected function purifyHtml($html)
413
    {
414 16
        if (!$this->purifier) {
415
            return $html;
416
        }
417
418 16
        return $this->purifier->purify($html);
419
    }
420
421 16
    protected function splitInputsToWords()
422
    {
423 16
        $this->setOldWords($this->convertHtmlToListOfWords($this->explode($this->oldText)));
424 16
        $this->setNewWords($this->convertHtmlToListOfWords($this->explode($this->newText)));
425 16
    }
426
427
    /**
428
     * @param array $oldWords
429
     */
430 16
    protected function setOldWords(array $oldWords)
431
    {
432 16
        $this->resetCache = true;
433 16
        $this->oldWords   = $oldWords;
434 16
    }
435
436
    /**
437
     * @param array $newWords
438
     */
439 16
    protected function setNewWords(array $newWords)
440
    {
441 16
        $this->resetCache = true;
442 16
        $this->newWords   = $newWords;
443 16
    }
444
445
    /**
446
     * @param string $text
447
     *
448
     * @return bool
449
     */
450 16
    protected function isPartOfWord($text)
451
    {
452 16
        return $this->ctypeAlphanumUnicode(str_replace($this->config->getSpecialCaseChars(), '', $text));
453
    }
454
455
    /**
456
     * @param array $characterString
457
     *
458
     * @return array
459
     */
460 16
    protected function convertHtmlToListOfWords($characterString)
461
    {
462 16
        $mode = 'character';
463 16
        $current_word = '';
464 16
        $words = array();
465 16
        $keepNewLines = $this->getConfig()->isKeepNewLines();
466 16
        foreach ($characterString as $i => $character) {
467
            switch ($mode) {
468 16
                case 'character':
469 16
                if ($this->isStartOfTag($character)) {
470 14
                    if ($current_word != '') {
471 13
                        $words[] = $current_word;
472
                    }
473
474 14
                    $current_word = '<';
475 14
                    $mode = 'tag';
476 16
                } elseif (preg_match("/\s/u", $character)) {
477 14
                    if ($current_word !== '') {
478 14
                        $words[] = $current_word;
479
                    }
480 14
                    $current_word = $keepNewLines ? $character : preg_replace('/\s+/Su', ' ', $character);
481 14
                    $mode = 'whitespace';
482
                } else {
483
                    if (
484 16
                        (($this->ctypeAlphanumUnicode($character) === true) && ($this->stringUtil->strlen($current_word) === 0 || $this->isPartOfWord($current_word))) ||
0 ignored issues
show
introduced by
Consider adding parentheses for clarity. Current Interpretation: ($this->ctypeAlphanumUni...haracterString[$i + 1]), Probably Intended Meaning: $this->ctypeAlphanumUnic...aracterString[$i + 1]))
Loading history...
485 11
                        (in_array($character, $this->config->getSpecialCaseChars()) && isset($characterString[$i + 1]) && $this->isPartOfWord($characterString[$i + 1]))
486
                    ) {
487 16
                        $current_word .= $character;
488
                    } else {
489 11
                        $words[] = $current_word;
490 11
                        $current_word = $character;
491
                    }
492
                }
493 16
                break;
494 16
                case 'tag' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
495 15
                if ($this->isEndOfTag($character)) {
496 15
                    $current_word .= '>';
497 15
                    $words[] = $current_word;
498 15
                    $current_word = '';
499
500 15
                    if (!preg_match('[^\s]u', $character)) {
501 15
                        $mode = 'whitespace';
502
                    } else {
503
                        $mode = 'character';
504
                    }
505
                } else {
506 15
                    $current_word .= $character;
507
                }
508 15
                break;
509 16
                case 'whitespace':
510 16
                if ($this->isStartOfTag($character)) {
511 13
                    if ($current_word !== '') {
512 13
                        $words[] = $current_word;
513
                    }
514 13
                    $current_word = '<';
515 13
                    $mode = 'tag';
516 16
                } elseif (preg_match("/\s/u", $character)) {
517 11
                    $current_word .= $character;
518 11
                    if (!$keepNewLines) $current_word = preg_replace('/\s+/Su', ' ', $current_word);
0 ignored issues
show
Coding Style Best Practice introduced by
It is generally a best practice to always use braces with control structures.

Adding braces to control structures avoids accidental mistakes as your code changes:

// Without braces (not recommended)
if (true)
    doSomething();

// Recommended
if (true) {
    doSomething();
}
Loading history...
519
                } else {
520 16
                    if ($current_word != '') {
521 13
                        $words[] = $current_word;
522
                    }
523 16
                    $current_word = $character;
524 16
                    $mode = 'character';
525
                }
526 16
                break;
527
                default:
528
                break;
529
            }
530
        }
531 16
        if ($current_word != '') {
532 8
            $words[] = $current_word;
533
        }
534
535 16
        return $words;
536
    }
537
538
    /**
539
     * @param string $val
540
     *
541
     * @return bool
542
     */
543 16
    protected function isStartOfTag($val)
544
    {
545 16
        return $val === '<';
546
    }
547
548
    /**
549
     * @param string $val
550
     *
551
     * @return bool
552
     */
553 15
    protected function isEndOfTag($val)
554
    {
555 15
        return $val === '>';
556
    }
557
558
    /**
559
     * @param string $value
560
     *
561
     * @return bool
562
     */
563
    protected function isWhiteSpace($value)
564
    {
565
        return !preg_match('[^\s]u', $value);
566
    }
567
568
    /**
569
     * @param string $value
570
     *
571
     * @return array
572
     */
573 16
    protected function explode($value)
574
    {
575
        // as suggested by @onassar
576 16
        return preg_split('//u', $value, -1, PREG_SPLIT_NO_EMPTY);
577
    }
578
579
    /**
580
     * @param string $str
581
     *
582
     * @return bool
583
     */
584 16
    protected function ctypeAlphanumUnicode($str)
585
    {
586 16
        return preg_match("/^[a-zA-Z0-9\pL]+$/u", $str) === 1;
587
    }
588
}
589