Passed
Pull Request — master (#62)
by
unknown
05:01
created

AbstractDiff   C

Complexity

Total Complexity 68

Size/Duplication

Total Lines 577
Duplicated Lines 0 %

Coupling/Cohesion

Components 2
Dependencies 4

Test Coverage

Coverage 61.49%

Importance

Changes 0
Metric Value
dl 0
loc 577
ccs 107
cts 174
cp 0.6149
rs 5.6756
c 0
b 0
f 0
wmc 68
lcom 2
cbo 4

38 Methods

Rating   Name   Duplication   Size   Complexity  
build() 0 1 ?
A prepare() 0 7 1
A hasDiffCache() 0 4 1
A getConfig() 0 4 1
A setConfig() 0 6 1
A getOldHtml() 0 4 1
A getNewHtml() 0 4 1
A getDifference() 0 4 1
A clearContent() 0 4 1
A setHTMLPurifierConfig() 0 4 1
A getOpeningTag() 0 4 1
A getClosingTag() 0 4 1
A purifyHtml() 0 13 3
A splitInputsToWords() 0 5 1
A setOldWords() 0 5 1
A setNewWords() 0 5 1
A isPartOfWord() 0 4 1
A isStartOfTag() 0 4 1
A isEndOfTag() 0 4 1
A isWhiteSpace() 0 4 1
A explode() 0 5 1
A __construct() 0 18 3
A initPurifier() 0 17 3
A getDiffCache() 0 14 3
A getMatchThreshold() 0 4 1
A setMatchThreshold() 0 6 1
A setSpecialCaseChars() 0 4 1
A getSpecialCaseChars() 0 4 1
A addSpecialCaseChar() 0 4 1
A removeSpecialCaseChar() 0 4 1
A setSpecialCaseTags() 0 4 1
A addSpecialCaseTag() 0 4 1
A removeSpecialCaseTag() 0 4 1
A getSpecialCaseTags() 0 4 1
A setGroupDiffs() 0 6 1
A isGroupDiffs() 0 4 1
A getStringBetween() 0 14 3
C convertHtmlToListOfWords() 0 76 22

How to fix   Complexity   

Complex Class

Complex classes like AbstractDiff often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use AbstractDiff, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
namespace Caxy\HtmlDiff;
4
5
/**
6
 * Class AbstractDiff.
7
 */
8
abstract class AbstractDiff
9
{
10
    /**
11
     * @var array
12
     *
13
     * @deprecated since 0.1.0
14
     */
15
    public static $defaultSpecialCaseTags = array('strong', 'b', 'i', 'big', 'small', 'u', 'sub', 'sup', 'strike', 's', 'p');
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 125 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
16
17
    /**
18
     * @var array
19
     *
20
     * @deprecated since 0.1.0
21
     */
22
    public static $defaultSpecialCaseChars = array('.', ',', '(', ')', '\'');
23
24
    /**
25
     * @var bool
26
     *
27
     * @deprecated since 0.1.0
28
     */
29
    public static $defaultGroupDiffs = true;
30
31
    /**
32
     * @var HtmlDiffConfig
33
     */
34
    protected $config;
35
36
    /**
37
     * @var string
38
     */
39
    protected $content;
40
41
    /**
42
     * @var string
43
     */
44
    protected $oldText;
45
46
    /**
47
     * @var string
48
     */
49
    protected $newText;
50
51
    /**
52
     * @var array
53
     */
54
    protected $oldWords = array();
55
56
    /**
57
     * @var array
58
     */
59
    protected $newWords = array();
60
61
    /**
62
     * @var DiffCache[]
63
     */
64
    protected $diffCaches = array();
65
66
    /**
67
     * @var \HTMLPurifier
68
     */
69
    protected $purifier;
70
71
    /**
72
     * @var \HTMLPurifier_Config|null
73
     */
74
    protected $purifierConfig = null;
75
76
    /**
77
     * @see array_slice_cached();
78
     * @var bool
79
     */
80
    protected $resetCache = false;
81
82
    /**
83
     * AbstractDiff constructor.
84
     *
85
     * @param string     $oldText
86
     * @param string     $newText
87
     * @param string     $encoding
88
     * @param null|array $specialCaseTags
89
     * @param null|bool  $groupDiffs
90
     */
91 15
    public function __construct($oldText, $newText, $encoding = 'UTF-8', $specialCaseTags = null, $groupDiffs = null)
92
    {
93 15
        mb_substitute_character(0x20);
94
95 15
        $this->setConfig(HtmlDiffConfig::create()->setEncoding($encoding));
96
97 15
        if ($specialCaseTags !== null) {
98 14
            $this->config->setSpecialCaseTags($specialCaseTags);
99
        }
100
101 15
        if ($groupDiffs !== null) {
102
            $this->config->setGroupDiffs($groupDiffs);
103
        }
104
105 15
        $this->oldText = $oldText;
106 15
        $this->newText = $newText;
107 15
        $this->content = '';
108 15
    }
109
110
    /**
111
     * @return bool|string
112
     */
113
    abstract public function build();
114
115
    /**
116
     * Initializes HTMLPurifier with cache location.
117
     *
118
     * @param null|string $defaultPurifierSerializerCache
119
     */
120 15
    public function initPurifier($defaultPurifierSerializerCache = null)
121
    {
122 15
        if (null !== $this->purifierConfig) {
123 2
            $HTMLPurifierConfig  = $this->purifierConfig;
124
        } else {
125 15
            $HTMLPurifierConfig = \HTMLPurifier_Config::createDefault();
126
        }
127
128
        // Cache.SerializerPath defaults to Null and sets
129
        // the location to inside the vendor HTMLPurifier library
130
        // under the DefinitionCache/Serializer folder.
131 15
        if (!is_null($defaultPurifierSerializerCache)) {
132 2
            $HTMLPurifierConfig->set('Cache.SerializerPath', $defaultPurifierSerializerCache);
133
        }
134
135 15
        $this->purifier = new \HTMLPurifier($HTMLPurifierConfig);
136 15
    }
137
138
    /**
139
     * Prepare (purify) the HTML
140
     *
141
     * @return void
142
     */
143 15
    protected function prepare()
144
    {
145 15
        $this->initPurifier($this->config->getPurifierCacheLocation());
146
147 15
        $this->oldText = $this->purifyHtml($this->oldText);
0 ignored issues
show
Documentation Bug introduced by
It seems like $this->purifyHtml($this->oldText) can also be of type false. However, the property $oldText is declared as type string. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
148 15
        $this->newText = $this->purifyHtml($this->newText);
0 ignored issues
show
Documentation Bug introduced by
It seems like $this->purifyHtml($this->newText) can also be of type false. However, the property $newText is declared as type string. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
149 15
    }
150
151
    /**
152
     * @return DiffCache|null
153
     */
154
    protected function getDiffCache()
155
    {
156
        if (!$this->hasDiffCache()) {
157
            return null;
158
        }
159
160
        $hash = spl_object_hash($this->getConfig()->getCacheProvider());
161
162
        if (!array_key_exists($hash, $this->diffCaches)) {
163
            $this->diffCaches[$hash] = new DiffCache($this->getConfig()->getCacheProvider());
0 ignored issues
show
Bug introduced by
It seems like $this->getConfig()->getCacheProvider() can be null; however, __construct() does not accept null, maybe add an additional type check?

Unless you are absolutely sure that the expression can never be null because of other conditions, we strongly recommend to add an additional type check to your code:

/** @return stdClass|null */
function mayReturnNull() { }

function doesNotAcceptNull(stdClass $x) { }

// With potential error.
function withoutCheck() {
    $x = mayReturnNull();
    doesNotAcceptNull($x); // Potential error here.
}

// Safe - Alternative 1
function withCheck1() {
    $x = mayReturnNull();
    if ( ! $x instanceof stdClass) {
        throw new \LogicException('$x must be defined.');
    }
    doesNotAcceptNull($x);
}

// Safe - Alternative 2
function withCheck2() {
    $x = mayReturnNull();
    if ($x instanceof stdClass) {
        doesNotAcceptNull($x);
    }
}
Loading history...
164
        }
165
166
        return $this->diffCaches[$hash];
167
    }
168
169
    /**
170
     * @return bool
171
     */
172 15
    protected function hasDiffCache()
173
    {
174 15
        return null !== $this->getConfig()->getCacheProvider();
175
    }
176
177
    /**
178
     * @return HtmlDiffConfig
179
     */
180 15
    public function getConfig()
181
    {
182 15
        return $this->config;
183
    }
184
185
    /**
186
     * @param HtmlDiffConfig $config
187
     *
188
     * @return AbstractDiff
189
     */
190 15
    public function setConfig(HtmlDiffConfig $config)
191
    {
192 15
        $this->config = $config;
193
194 15
        return $this;
195
    }
196
197
    /**
198
     * @return int
199
     *
200
     * @deprecated since 0.1.0
201
     */
202
    public function getMatchThreshold()
203
    {
204
        return $this->config->getMatchThreshold();
205
    }
206
207
    /**
208
     * @param int $matchThreshold
209
     *
210
     * @return AbstractDiff
211
     *
212
     * @deprecated since 0.1.0
213
     */
214
    public function setMatchThreshold($matchThreshold)
215
    {
216
        $this->config->setMatchThreshold($matchThreshold);
217
218
        return $this;
219
    }
220
221
    /**
222
     * @param array $chars
223
     *
224
     * @deprecated since 0.1.0
225
     */
226
    public function setSpecialCaseChars(array $chars)
227
    {
228
        $this->config->setSpecialCaseChars($chars);
229
    }
230
231
    /**
232
     * @return array|null
233
     *
234
     * @deprecated since 0.1.0
235
     */
236
    public function getSpecialCaseChars()
237
    {
238
        return $this->config->getSpecialCaseChars();
239
    }
240
241
    /**
242
     * @param string $char
243
     *
244
     * @deprecated since 0.1.0
245
     */
246
    public function addSpecialCaseChar($char)
247
    {
248
        $this->config->addSpecialCaseChar($char);
249
    }
250
251
    /**
252
     * @param string $char
253
     *
254
     * @deprecated since 0.1.0
255
     */
256
    public function removeSpecialCaseChar($char)
257
    {
258
        $this->config->removeSpecialCaseChar($char);
259
    }
260
261
    /**
262
     * @param array $tags
263
     *
264
     * @deprecated since 0.1.0
265
     */
266
    public function setSpecialCaseTags(array $tags = array())
267
    {
268
        $this->config->setSpecialCaseChars($tags);
269
    }
270
271
    /**
272
     * @param string $tag
273
     *
274
     * @deprecated since 0.1.0
275
     */
276
    public function addSpecialCaseTag($tag)
277
    {
278
        $this->config->addSpecialCaseTag($tag);
279
    }
280
281
    /**
282
     * @param string $tag
283
     *
284
     * @deprecated since 0.1.0
285
     */
286
    public function removeSpecialCaseTag($tag)
287
    {
288
        $this->config->removeSpecialCaseTag($tag);
289
    }
290
291
    /**
292
     * @return array|null
293
     *
294
     * @deprecated since 0.1.0
295
     */
296
    public function getSpecialCaseTags()
297
    {
298
        return $this->config->getSpecialCaseTags();
299
    }
300
301
    /**
302
     * @return string
303
     */
304
    public function getOldHtml()
305
    {
306
        return $this->oldText;
307
    }
308
309
    /**
310
     * @return string
311
     */
312
    public function getNewHtml()
313
    {
314
        return $this->newText;
315
    }
316
317
    /**
318
     * @return string
319
     */
320
    public function getDifference()
321
    {
322
        return $this->content;
323
    }
324
325
    /**
326
     * Clears the diff content.
327
     *
328
     * @return void
329
     */
330
    public function clearContent()
331
    {
332
        $this->content = null;
333
    }
334
335
    /**
336
     * @param bool $boolean
337
     *
338
     * @return $this
339
     *
340
     * @deprecated since 0.1.0
341
     */
342
    public function setGroupDiffs($boolean)
343
    {
344
        $this->config->setGroupDiffs($boolean);
345
346
        return $this;
347
    }
348
349
    /**
350
     * @return bool
351
     *
352
     * @deprecated since 0.1.0
353
     */
354 15
    public function isGroupDiffs()
355
    {
356 15
        return $this->config->isGroupDiffs();
357
    }
358
359
    /**
360
     * @param \HTMLPurifier_Config $config
361
     */
362 2
    public function setHTMLPurifierConfig(\HTMLPurifier_Config $config)
363
    {
364 2
        $this->purifierConfig = $config;
365 2
    }
366
367
    /**
368
     * @param string $tag
369
     *
370
     * @return string
371
     */
372
    protected function getOpeningTag($tag)
373
    {
374
        return '/<'.$tag.'[^>]*/i';
375
    }
376
377
    /**
378
     * @param string $tag
379
     *
380
     * @return string
381
     */
382
    protected function getClosingTag($tag)
383
    {
384
        return '</'.$tag.'>';
385
    }
386
387
    /**
388
     * @param string $str
389
     * @param string $start
390
     * @param string $end
391
     *
392
     * @return string
393
     */
394
    protected function getStringBetween($str, $start, $end)
395
    {
396
        $expStr = explode($start, $str, 2);
397
        if (count($expStr) > 1) {
398
            $expStr = explode($end, $expStr[ 1 ]);
399
            if (count($expStr) > 1) {
400
                array_pop($expStr);
401
402
                return implode($end, $expStr);
403
            }
404
        }
405
406
        return '';
407
    }
408
409
    /**
410
     * @param string $html
411
     *
412
     * @return string
0 ignored issues
show
Documentation introduced by
Should the return type not be string|false|null?

This check compares the return type specified in the @return annotation of a function or method doc comment with the types returned by the function and raises an issue if they mismatch.

Loading history...
413
     */
414 15
    protected function purifyHtml($html)
415
    {
416 15
        if (class_exists('Tidy') && false) {
417
            $config = array('output-xhtml' => true, 'indent' => false);
418
            $tidy = new tidy();
419
            $tidy->parseString($html, $config, 'utf8');
420
            $html = (string) $tidy;
421
422
            return $this->getStringBetween($html, '<body>');
0 ignored issues
show
Bug introduced by
The call to getStringBetween() misses a required argument $end.

This check looks for function calls that miss required arguments.

Loading history...
423
        }
424
425 15
        return $this->purifier->purify($html);
426
    }
427
428 15
    protected function splitInputsToWords()
429
    {
430 15
        $this->setOldWords($this->convertHtmlToListOfWords($this->explode($this->oldText)));
431 15
        $this->setNewWords($this->convertHtmlToListOfWords($this->explode($this->newText)));
432 15
    }
433
434
    /**
435
     * @param array $oldWords
436
     */
437 15
    protected function setOldWords(array $oldWords)
438
    {
439 15
        $this->resetCache = true;
440 15
        $this->oldWords   = $oldWords;
441 15
    }
442
443
    /**
444
     * @param array $newWords
445
     */
446 15
    protected function setNewWords(array $newWords)
447
    {
448 15
        $this->resetCache = true;
449 15
        $this->newWords   = $newWords;
450 15
    }
451
452
    /**
453
     * @param string $text
454
     *
455
     * @return bool
456
     */
457 15
    protected function isPartOfWord($text)
458
    {
459 15
        return ctype_alnum(str_replace($this->config->getSpecialCaseChars(), '', $text));
460
    }
461
462
    /**
463
     * @param array $characterString
464
     *
465
     * @return array
466
     */
467 15
    protected function convertHtmlToListOfWords($characterString)
468
    {
469 15
        $mode = 'character';
470 15
        $current_word = '';
471 15
        $words = array();
472 15
        foreach ($characterString as $i => $character) {
473
            switch ($mode) {
474 15
                case 'character':
475 15
                if ($this->isStartOfTag($character)) {
476 14
                    if ($current_word != '') {
477 13
                        $words[] = $current_word;
478
                    }
479
480 14
                    $current_word = '<';
481 14
                    $mode = 'tag';
482 15
                } elseif (preg_match("/\s/", $character)) {
483 13
                    if ($current_word !== '') {
484 13
                        $words[] = $current_word;
485
                    }
486 13
                    $current_word = preg_replace('/\s+/S', ' ', $character);
487 13
                    $mode = 'whitespace';
488
                } else {
489
                    if (
490 15
                        (ctype_alnum($character) && (strlen($current_word) == 0 || $this->isPartOfWord($current_word))) ||
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 122 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
491 15
                        (in_array($character, $this->config->getSpecialCaseChars()) && isset($characterString[$i + 1]) && $this->isPartOfWord($characterString[$i + 1]))
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 168 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
492
                    ) {
493 15
                        $current_word .= $character;
494
                    } else {
495 15
                        $words[] = $current_word;
496 15
                        $current_word = $character;
497
                    }
498
                }
499 15
                break;
500 15
                case 'tag' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
501 15
                if ($this->isEndOfTag($character)) {
502 15
                    $current_word .= '>';
503 15
                    $words[] = $current_word;
504 15
                    $current_word = '';
505
506 15
                    if (!preg_match('[^\s]', $character)) {
507 15
                        $mode = 'whitespace';
508
                    } else {
509 15
                        $mode = 'character';
510
                    }
511
                } else {
512 15
                    $current_word .= $character;
513
                }
514 15
                break;
515 15
                case 'whitespace':
516 15
                if ($this->isStartOfTag($character)) {
517 13
                    if ($current_word !== '') {
518 13
                        $words[] = $current_word;
519
                    }
520 13
                    $current_word = '<';
521 13
                    $mode = 'tag';
522 15
                } elseif (preg_match("/\s/", $character)) {
523 11
                    $current_word .= $character;
524 11
                    $current_word = preg_replace('/\s+/S', ' ', $current_word);
525
                } else {
526 15
                    if ($current_word != '') {
527 13
                        $words[] = $current_word;
528
                    }
529 15
                    $current_word = $character;
530 15
                    $mode = 'character';
531
                }
532 15
                break;
533
                default:
534 15
                break;
535
            }
536
        }
537 15
        if ($current_word != '') {
538
            $words[] = $current_word;
539
        }
540
541 15
        return $words;
542
    }
543
544
    /**
545
     * @param string $val
546
     *
547
     * @return bool
548
     */
549 15
    protected function isStartOfTag($val)
550
    {
551 15
        return $val == '<';
552
    }
553
554
    /**
555
     * @param string $val
556
     *
557
     * @return bool
558
     */
559 15
    protected function isEndOfTag($val)
560
    {
561 15
        return $val == '>';
562
    }
563
564
    /**
565
     * @param string $value
566
     *
567
     * @return bool
568
     */
569
    protected function isWhiteSpace($value)
570
    {
571
        return !preg_match('[^\s]', $value);
572
    }
573
574
    /**
575
     * @param string $value
576
     *
577
     * @return array
578
     */
579 15
    protected function explode($value)
580
    {
581
        // as suggested by @onassar
582 15
        return preg_split('//u', $value);
583
    }
584
}
585