Passed
Push — master ( d7540c...231250 )
by Josh
05:57
created

AbstractDiff   C

Complexity

Total Complexity 70

Size/Duplication

Total Lines 583
Duplicated Lines 0 %

Coupling/Cohesion

Components 2
Dependencies 4

Test Coverage

Coverage 62.57%

Importance

Changes 0
Metric Value
dl 0
loc 583
ccs 107
cts 171
cp 0.6257
rs 5.6163
c 0
b 0
f 0
wmc 70
lcom 2
cbo 4

38 Methods

Rating   Name   Duplication   Size   Complexity  
build() 0 1 ?
A __construct() 0 18 3
A initPurifier() 0 22 3
A prepare() 0 7 1
A getDiffCache() 0 14 3
A hasDiffCache() 0 4 1
A getConfig() 0 4 1
A setConfig() 0 6 1
A getMatchThreshold() 0 4 1
A setMatchThreshold() 0 6 1
A setSpecialCaseChars() 0 4 1
A getSpecialCaseChars() 0 4 1
A addSpecialCaseChar() 0 4 1
A removeSpecialCaseChar() 0 4 1
A setSpecialCaseTags() 0 4 1
A addSpecialCaseTag() 0 4 1
A removeSpecialCaseTag() 0 4 1
A getSpecialCaseTags() 0 4 1
A getOldHtml() 0 4 1
A getNewHtml() 0 4 1
A getDifference() 0 4 1
A clearContent() 0 4 1
A setGroupDiffs() 0 6 1
A isGroupDiffs() 0 4 1
A setHTMLPurifierConfig() 0 4 1
A getOpeningTag() 0 4 1
A getClosingTag() 0 4 1
A getStringBetween() 0 14 3
A splitInputsToWords() 0 5 1
A setOldWords() 0 5 1
A setNewWords() 0 5 1
A isPartOfWord() 0 4 1
A purifyHtml() 0 13 3
C convertHtmlToListOfWords() 0 77 24
A isStartOfTag() 0 4 1
A isEndOfTag() 0 4 1
A isWhiteSpace() 0 4 1
A explode() 0 5 1

How to fix   Complexity   

Complex Class

Complex classes like AbstractDiff often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use AbstractDiff, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
namespace Caxy\HtmlDiff;
4
5
/**
6
 * Class AbstractDiff.
7
 */
8
abstract class AbstractDiff
9
{
10
    /**
11
     * @var array
12
     *
13
     * @deprecated since 0.1.0
14
     */
15
    public static $defaultSpecialCaseTags = array('strong', 'b', 'i', 'big', 'small', 'u', 'sub', 'sup', 'strike', 's', 'p');
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 125 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
16
17
    /**
18
     * @var array
19
     *
20
     * @deprecated since 0.1.0
21
     */
22
    public static $defaultSpecialCaseChars = array('.', ',', '(', ')', '\'');
23
24
    /**
25
     * @var bool
26
     *
27
     * @deprecated since 0.1.0
28
     */
29
    public static $defaultGroupDiffs = true;
30
31
    /**
32
     * @var HtmlDiffConfig
33
     */
34
    protected $config;
35
36
    /**
37
     * @var string
38
     */
39
    protected $content;
40
41
    /**
42
     * @var string
43
     */
44
    protected $oldText;
45
46
    /**
47
     * @var string
48
     */
49
    protected $newText;
50
51
    /**
52
     * @var array
53
     */
54
    protected $oldWords = array();
55
56
    /**
57
     * @var array
58
     */
59
    protected $newWords = array();
60
61
    /**
62
     * @var DiffCache[]
63
     */
64
    protected $diffCaches = array();
65
66
    /**
67
     * @var \HTMLPurifier
68
     */
69
    protected $purifier;
70
71
    /**
72
     * @var \HTMLPurifier_Config|null
73
     */
74
    protected $purifierConfig = null;
75
76
    /**
77
     * @see array_slice_cached();
78
     * @var bool
79
     */
80
    protected $resetCache = false;
81
82
    /**
83
     * AbstractDiff constructor.
84
     *
85
     * @param string     $oldText
86
     * @param string     $newText
87
     * @param string     $encoding
88
     * @param null|array $specialCaseTags
89
     * @param null|bool  $groupDiffs
90
     */
91 15
    public function __construct($oldText, $newText, $encoding = 'UTF-8', $specialCaseTags = null, $groupDiffs = null)
92
    {
93 15
        mb_substitute_character(0x20);
94
95 15
        $this->setConfig(HtmlDiffConfig::create()->setEncoding($encoding));
96
97 15
        if ($specialCaseTags !== null) {
98 14
            $this->config->setSpecialCaseTags($specialCaseTags);
99
        }
100
101 15
        if ($groupDiffs !== null) {
102
            $this->config->setGroupDiffs($groupDiffs);
103
        }
104
105 15
        $this->oldText = $oldText;
106 15
        $this->newText = $newText;
107 15
        $this->content = '';
108 15
    }
109
110
    /**
111
     * @return bool|string
112
     */
113
    abstract public function build();
114
115
    /**
116
     * Initializes HTMLPurifier with cache location.
117
     *
118
     * @param null|string $defaultPurifierSerializerCache
119
     */
120 15
    public function initPurifier($defaultPurifierSerializerCache = null)
121
    {
122 15
        if (null !== $this->purifierConfig) {
123 2
            $HTMLPurifierConfig  = $this->purifierConfig;
124
        } else {
125 15
            $HTMLPurifierConfig = \HTMLPurifier_Config::createDefault();
126
        }
127
128
        // Cache.SerializerPath defaults to Null and sets
129
        // the location to inside the vendor HTMLPurifier library
130
        // under the DefinitionCache/Serializer folder.
131 15
        if (!is_null($defaultPurifierSerializerCache)) {
132 2
            $HTMLPurifierConfig->set('Cache.SerializerPath', $defaultPurifierSerializerCache);
133
        }
134
135
        // Cache.SerializerPermissions defaults to 0744.
136
        // This setting allows the cache files to be deleted by any user, as they are typically
137
        // created by the web/php user (www-user, php-fpm, etc.)
138 15
        $HTMLPurifierConfig->set('Cache.SerializerPermissions', 0777);
139
140 15
        $this->purifier = new \HTMLPurifier($HTMLPurifierConfig);
141 15
    }
142
143
    /**
144
     * Prepare (purify) the HTML
145
     *
146
     * @return void
147
     */
148 15
    protected function prepare()
149
    {
150 15
        $this->initPurifier($this->config->getPurifierCacheLocation());
151
152 15
        $this->oldText = $this->purifyHtml($this->oldText);
0 ignored issues
show
Documentation Bug introduced by
It seems like $this->purifyHtml($this->oldText) can also be of type false. However, the property $oldText is declared as type string. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
153 15
        $this->newText = $this->purifyHtml($this->newText);
0 ignored issues
show
Documentation Bug introduced by
It seems like $this->purifyHtml($this->newText) can also be of type false. However, the property $newText is declared as type string. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
154 15
    }
155
156
    /**
157
     * @return DiffCache|null
158
     */
159
    protected function getDiffCache()
160
    {
161
        if (!$this->hasDiffCache()) {
162
            return null;
163
        }
164
165
        $hash = spl_object_hash($this->getConfig()->getCacheProvider());
166
167
        if (!array_key_exists($hash, $this->diffCaches)) {
168
            $this->diffCaches[$hash] = new DiffCache($this->getConfig()->getCacheProvider());
0 ignored issues
show
Bug introduced by
It seems like $this->getConfig()->getCacheProvider() can be null; however, __construct() does not accept null, maybe add an additional type check?

Unless you are absolutely sure that the expression can never be null because of other conditions, we strongly recommend to add an additional type check to your code:

/** @return stdClass|null */
function mayReturnNull() { }

function doesNotAcceptNull(stdClass $x) { }

// With potential error.
function withoutCheck() {
    $x = mayReturnNull();
    doesNotAcceptNull($x); // Potential error here.
}

// Safe - Alternative 1
function withCheck1() {
    $x = mayReturnNull();
    if ( ! $x instanceof stdClass) {
        throw new \LogicException('$x must be defined.');
    }
    doesNotAcceptNull($x);
}

// Safe - Alternative 2
function withCheck2() {
    $x = mayReturnNull();
    if ($x instanceof stdClass) {
        doesNotAcceptNull($x);
    }
}
Loading history...
169
        }
170
171
        return $this->diffCaches[$hash];
172
    }
173
174
    /**
175
     * @return bool
176
     */
177 15
    protected function hasDiffCache()
178
    {
179 15
        return null !== $this->getConfig()->getCacheProvider();
180
    }
181
182
    /**
183
     * @return HtmlDiffConfig
184
     */
185 15
    public function getConfig()
186
    {
187 15
        return $this->config;
188
    }
189
190
    /**
191
     * @param HtmlDiffConfig $config
192
     *
193
     * @return AbstractDiff
194
     */
195 15
    public function setConfig(HtmlDiffConfig $config)
196
    {
197 15
        $this->config = $config;
198
199 15
        return $this;
200
    }
201
202
    /**
203
     * @return int
204
     *
205
     * @deprecated since 0.1.0
206
     */
207
    public function getMatchThreshold()
208
    {
209
        return $this->config->getMatchThreshold();
210
    }
211
212
    /**
213
     * @param int $matchThreshold
214
     *
215
     * @return AbstractDiff
216
     *
217
     * @deprecated since 0.1.0
218
     */
219
    public function setMatchThreshold($matchThreshold)
220
    {
221
        $this->config->setMatchThreshold($matchThreshold);
222
223
        return $this;
224
    }
225
226
    /**
227
     * @param array $chars
228
     *
229
     * @deprecated since 0.1.0
230
     */
231
    public function setSpecialCaseChars(array $chars)
232
    {
233
        $this->config->setSpecialCaseChars($chars);
234
    }
235
236
    /**
237
     * @return array|null
238
     *
239
     * @deprecated since 0.1.0
240
     */
241
    public function getSpecialCaseChars()
242
    {
243
        return $this->config->getSpecialCaseChars();
244
    }
245
246
    /**
247
     * @param string $char
248
     *
249
     * @deprecated since 0.1.0
250
     */
251
    public function addSpecialCaseChar($char)
252
    {
253
        $this->config->addSpecialCaseChar($char);
254
    }
255
256
    /**
257
     * @param string $char
258
     *
259
     * @deprecated since 0.1.0
260
     */
261
    public function removeSpecialCaseChar($char)
262
    {
263
        $this->config->removeSpecialCaseChar($char);
264
    }
265
266
    /**
267
     * @param array $tags
268
     *
269
     * @deprecated since 0.1.0
270
     */
271
    public function setSpecialCaseTags(array $tags = array())
272
    {
273
        $this->config->setSpecialCaseChars($tags);
274
    }
275
276
    /**
277
     * @param string $tag
278
     *
279
     * @deprecated since 0.1.0
280
     */
281
    public function addSpecialCaseTag($tag)
282
    {
283
        $this->config->addSpecialCaseTag($tag);
284
    }
285
286
    /**
287
     * @param string $tag
288
     *
289
     * @deprecated since 0.1.0
290
     */
291
    public function removeSpecialCaseTag($tag)
292
    {
293
        $this->config->removeSpecialCaseTag($tag);
294
    }
295
296
    /**
297
     * @return array|null
298
     *
299
     * @deprecated since 0.1.0
300
     */
301
    public function getSpecialCaseTags()
302
    {
303
        return $this->config->getSpecialCaseTags();
304
    }
305
306
    /**
307
     * @return string
308
     */
309
    public function getOldHtml()
310
    {
311
        return $this->oldText;
312
    }
313
314
    /**
315
     * @return string
316
     */
317
    public function getNewHtml()
318
    {
319
        return $this->newText;
320
    }
321
322
    /**
323
     * @return string
324
     */
325
    public function getDifference()
326
    {
327
        return $this->content;
328
    }
329
330
    /**
331
     * Clears the diff content.
332
     *
333
     * @return void
334
     */
335
    public function clearContent()
336
    {
337
        $this->content = null;
338
    }
339
340
    /**
341
     * @param bool $boolean
342
     *
343
     * @return $this
344
     *
345
     * @deprecated since 0.1.0
346
     */
347
    public function setGroupDiffs($boolean)
348
    {
349
        $this->config->setGroupDiffs($boolean);
350
351
        return $this;
352
    }
353
354
    /**
355
     * @return bool
356
     *
357
     * @deprecated since 0.1.0
358
     */
359 15
    public function isGroupDiffs()
360
    {
361 15
        return $this->config->isGroupDiffs();
362
    }
363
364
    /**
365
     * @param \HTMLPurifier_Config $config
366
     */
367 2
    public function setHTMLPurifierConfig(\HTMLPurifier_Config $config)
368
    {
369 2
        $this->purifierConfig = $config;
370 2
    }
371
372
    /**
373
     * @param string $tag
374
     *
375
     * @return string
376
     */
377
    protected function getOpeningTag($tag)
378
    {
379
        return '/<'.$tag.'[^>]*/i';
380
    }
381
382
    /**
383
     * @param string $tag
384
     *
385
     * @return string
386
     */
387
    protected function getClosingTag($tag)
388
    {
389
        return '</'.$tag.'>';
390
    }
391
392
    /**
393
     * @param string $str
394
     * @param string $start
395
     * @param string $end
396
     *
397
     * @return string
398
     */
399
    protected function getStringBetween($str, $start, $end)
400
    {
401
        $expStr = explode($start, $str, 2);
402
        if (count($expStr) > 1) {
403
            $expStr = explode($end, $expStr[ 1 ]);
404
            if (count($expStr) > 1) {
405
                array_pop($expStr);
406
407
                return implode($end, $expStr);
408
            }
409
        }
410
411
        return '';
412
    }
413
414
    /**
415
     * @param string $html
416
     *
417
     * @return string
0 ignored issues
show
Documentation introduced by
Should the return type not be string|false|null?

This check compares the return type specified in the @return annotation of a function or method doc comment with the types returned by the function and raises an issue if they mismatch.

Loading history...
418
     */
419 15
    protected function purifyHtml($html)
420
    {
421 15
        if (class_exists('Tidy') && false) {
422
            $config = array('output-xhtml' => true, 'indent' => false);
423
            $tidy = new tidy();
424
            $tidy->parseString($html, $config, 'utf8');
425
            $html = (string) $tidy;
426
427
            return $this->getStringBetween($html, '<body>');
0 ignored issues
show
Bug introduced by
The call to getStringBetween() misses a required argument $end.

This check looks for function calls that miss required arguments.

Loading history...
428
        }
429
430 15
        return $this->purifier->purify($html);
431
    }
432
433 15
    protected function splitInputsToWords()
434
    {
435 15
        $this->setOldWords($this->convertHtmlToListOfWords($this->explode($this->oldText)));
436 15
        $this->setNewWords($this->convertHtmlToListOfWords($this->explode($this->newText)));
437 15
    }
438
439
    /**
440
     * @param array $oldWords
441
     */
442 15
    protected function setOldWords(array $oldWords)
443
    {
444 15
        $this->resetCache = true;
445 15
        $this->oldWords   = $oldWords;
446 15
    }
447
448
    /**
449
     * @param array $newWords
450
     */
451 15
    protected function setNewWords(array $newWords)
452
    {
453 15
        $this->resetCache = true;
454 15
        $this->newWords   = $newWords;
455 15
    }
456
457
    /**
458
     * @param string $text
459
     *
460
     * @return bool
461
     */
462 15
    protected function isPartOfWord($text)
463
    {
464 15
        return ctype_alnum(str_replace($this->config->getSpecialCaseChars(), '', $text));
465
    }
466
467
    /**
468
     * @param array $characterString
469
     *
470
     * @return array
471
     */
472 15
    protected function convertHtmlToListOfWords($characterString)
473
    {
474 15
        $mode = 'character';
475 15
        $current_word = '';
476 15
        $words = array();
477 15
        $keepNewLines = $this->getConfig()->isKeepNewLines();
478 15
        foreach ($characterString as $i => $character) {
479
            switch ($mode) {
480 15
                case 'character':
481 15
                if ($this->isStartOfTag($character)) {
482 14
                    if ($current_word != '') {
483 13
                        $words[] = $current_word;
484
                    }
485
486 14
                    $current_word = '<';
487 14
                    $mode = 'tag';
488 15
                } elseif (preg_match("/\s/", $character)) {
489 13
                    if ($current_word !== '') {
490 13
                        $words[] = $current_word;
491
                    }
492 13
                    $current_word = $keepNewLines ? $character : preg_replace('/\s+/S', ' ', $character);
493 13
                    $mode = 'whitespace';
494
                } else {
495
                    if (
496 15
                        (ctype_alnum($character) && (strlen($current_word) == 0 || $this->isPartOfWord($current_word))) ||
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 122 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
497 15
                        (in_array($character, $this->config->getSpecialCaseChars()) && isset($characterString[$i + 1]) && $this->isPartOfWord($characterString[$i + 1]))
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 168 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
498
                    ) {
499 15
                        $current_word .= $character;
500
                    } else {
501 15
                        $words[] = $current_word;
502 15
                        $current_word = $character;
503
                    }
504
                }
505 15
                break;
506 15
                case 'tag' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
507 15
                if ($this->isEndOfTag($character)) {
508 15
                    $current_word .= '>';
509 15
                    $words[] = $current_word;
510 15
                    $current_word = '';
511
512 15
                    if (!preg_match('[^\s]', $character)) {
513 15
                        $mode = 'whitespace';
514
                    } else {
515
                        $mode = 'character';
516
                    }
517
                } else {
518 15
                    $current_word .= $character;
519
                }
520 15
                break;
521 15
                case 'whitespace':
522 15
                if ($this->isStartOfTag($character)) {
523 13
                    if ($current_word !== '') {
524 13
                        $words[] = $current_word;
525
                    }
526 13
                    $current_word = '<';
527 13
                    $mode = 'tag';
528 15
                } elseif (preg_match("/\s/", $character)) {
529 11
                    $current_word .= $character;
530 11
                    if (!$keepNewLines) $current_word = preg_replace('/\s+/S', ' ', $current_word);
531
                } else {
532 15
                    if ($current_word != '') {
533 13
                        $words[] = $current_word;
534
                    }
535 15
                    $current_word = $character;
536 15
                    $mode = 'character';
537
                }
538 15
                break;
539
                default:
540
                break;
541
            }
542
        }
543 15
        if ($current_word != '') {
544
            $words[] = $current_word;
545
        }
546
547 15
        return $words;
548
    }
549
550
    /**
551
     * @param string $val
552
     *
553
     * @return bool
554
     */
555 15
    protected function isStartOfTag($val)
556
    {
557 15
        return $val == '<';
558
    }
559
560
    /**
561
     * @param string $val
562
     *
563
     * @return bool
564
     */
565 15
    protected function isEndOfTag($val)
566
    {
567 15
        return $val == '>';
568
    }
569
570
    /**
571
     * @param string $value
572
     *
573
     * @return bool
574
     */
575
    protected function isWhiteSpace($value)
576
    {
577
        return !preg_match('[^\s]', $value);
578
    }
579
580
    /**
581
     * @param string $value
582
     *
583
     * @return array
584
     */
585 15
    protected function explode($value)
586
    {
587
        // as suggested by @onassar
588 15
        return preg_split('//u', $value);
589
    }
590
}
591