Passed
Pull Request — master (#54)
by
unknown
03:56
created

AbstractDiff::setNewWords()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 5
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 4
CRAP Score 1

Importance

Changes 1
Bugs 0 Features 1
Metric Value
cc 1
eloc 3
c 1
b 0
f 1
nc 1
nop 1
dl 0
loc 5
ccs 4
cts 4
cp 1
crap 1
rs 9.4285
1
<?php
2
3
namespace Caxy\HtmlDiff;
4
5
/**
6
 * Class AbstractDiff.
7
 */
8
abstract class AbstractDiff
9
{
10
    /**
11
     * @var array
12
     *
13
     * @deprecated since 0.1.0
14
     */
15
    public static $defaultSpecialCaseTags = array('strong', 'b', 'i', 'big', 'small', 'u', 'sub', 'sup', 'strike', 's', 'p');
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 125 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
16
17
    /**
18
     * @var array
19
     *
20
     * @deprecated since 0.1.0
21
     */
22
    public static $defaultSpecialCaseChars = array('.', ',', '(', ')', '\'');
23
24
    /**
25
     * @var bool
26
     *
27
     * @deprecated since 0.1.0
28
     */
29
    public static $defaultGroupDiffs = true;
30
31
    /**
32
     * @var HtmlDiffConfig
33
     */
34
    protected $config;
35
36
    /**
37
     * @var string
38
     */
39
    protected $content;
40
41
    /**
42
     * @var string
43
     */
44
    protected $oldText;
45
46
    /**
47
     * @var string
48
     */
49
    protected $newText;
50
51
    /**
52
     * @var array
53
     */
54
    protected $oldWords = array();
55
56
    /**
57
     * @var array
58
     */
59
    protected $newWords = array();
60
61
    /**
62
     * @var DiffCache[]
63
     */
64
    protected $diffCaches = array();
65
66
    /**
67
     * @var \HTMLPurifier
68
     */
69
    protected $purifier;
70
71
    /**
72
     * @var \HTMLPurifier_Config|null
73
     */
74
    protected $purifierConfig = null;
75
76
    /**
77
     * @see array_slice_cached();
78
     * @var bool
79
     */
80
    protected $resetCache = false;
81
82
    /**
83
     * AbstractDiff constructor.
84
     *
85
     * @param string     $oldText
86
     * @param string     $newText
87
     * @param string     $encoding
88
     * @param null|array $specialCaseTags
89
     * @param null|bool  $groupDiffs
90
     */
91 14
    public function __construct($oldText, $newText, $encoding = 'UTF-8', $specialCaseTags = null, $groupDiffs = null)
92
    {
93 14
        mb_substitute_character(0x20);
94
95 14
        $this->setConfig(HtmlDiffConfig::create()->setEncoding($encoding));
96
97 14
        if ($specialCaseTags !== null) {
98 13
            $this->config->setSpecialCaseTags($specialCaseTags);
99 13
        }
100
101 14
        if ($groupDiffs !== null) {
102
            $this->config->setGroupDiffs($groupDiffs);
103
        }
104
105 14
        $this->oldText = $oldText;
106 14
        $this->newText = $newText;
107 14
        $this->content = '';
108 14
    }
109
110
    /**
111
     * @return bool|string
112
     */
113
    abstract public function build();
114
115
    /**
116
     * Initializes HTMLPurifier with cache location.
117
     *
118
     * @param null|string $defaultPurifierSerializerCache
119
     */
120 14
    public function initPurifier($defaultPurifierSerializerCache = null)
121
    {
122 14
        if (null !== $this->purifierConfig) {
123 2
            $HTMLPurifierConfig  = $this->purifierConfig;
124 2
        } else {
125 14
            $HTMLPurifierConfig = \HTMLPurifier_Config::createDefault();
126
        }
127
128
        // Cache.SerializerPath defaults to Null and sets
129
        // the location to inside the vendor HTMLPurifier library
130
        // under the DefinitionCache/Serializer folder.
131 14
        if (!is_null($defaultPurifierSerializerCache)) {
132 2
            $HTMLPurifierConfig->set('Cache.SerializerPath', $defaultPurifierSerializerCache);
133 2
        }
134
135 14
        $this->purifier = new \HTMLPurifier($HTMLPurifierConfig);
136 14
    }
137
138
    /**
139
     * Prepare (purify) the HTML
140
     *
141
     * @return void
142
     */
143 14
    protected function prepare()
144
    {
145 14
        $this->initPurifier($this->config->getPurifierCacheLocation());
146
147 14
        $this->oldText = $this->purifyHtml($this->oldText);
0 ignored issues
show
Documentation Bug introduced by
It seems like $this->purifyHtml($this->oldText) can also be of type false. However, the property $oldText is declared as type string. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
148 14
        $this->newText = $this->purifyHtml($this->newText);
0 ignored issues
show
Documentation Bug introduced by
It seems like $this->purifyHtml($this->newText) can also be of type false. However, the property $newText is declared as type string. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
149 14
    }
150
151
    /**
152
     * @return DiffCache|null
153
     */
154 1
    protected function getDiffCache()
155 1
    {
156
        if (!$this->hasDiffCache()) {
157
            return null;
158 1
        }
159
160
        $hash = spl_object_hash($this->getConfig()->getCacheProvider());
161
162
        if (!array_key_exists($hash, $this->diffCaches)) {
163
            $this->diffCaches[$hash] = new DiffCache($this->getConfig()->getCacheProvider());
0 ignored issues
show
Bug introduced by
It seems like $this->getConfig()->getCacheProvider() can be null; however, __construct() does not accept null, maybe add an additional type check?

Unless you are absolutely sure that the expression can never be null because of other conditions, we strongly recommend to add an additional type check to your code:

/** @return stdClass|null */
function mayReturnNull() { }

function doesNotAcceptNull(stdClass $x) { }

// With potential error.
function withoutCheck() {
    $x = mayReturnNull();
    doesNotAcceptNull($x); // Potential error here.
}

// Safe - Alternative 1
function withCheck1() {
    $x = mayReturnNull();
    if ( ! $x instanceof stdClass) {
        throw new \LogicException('$x must be defined.');
    }
    doesNotAcceptNull($x);
}

// Safe - Alternative 2
function withCheck2() {
    $x = mayReturnNull();
    if ($x instanceof stdClass) {
        doesNotAcceptNull($x);
    }
}
Loading history...
164
        }
165
166
        return $this->diffCaches[$hash];
167
    }
168
169
    /**
170
     * @return bool
171
     */
172 14
    protected function hasDiffCache()
173
    {
174 14
        return null !== $this->getConfig()->getCacheProvider();
175
    }
176
177
    /**
178
     * @return HtmlDiffConfig
179
     */
180 14
    public function getConfig()
181
    {
182 14
        return $this->config;
183
    }
184
185
    /**
186
     * @param HtmlDiffConfig $config
187
     *
188
     * @return AbstractDiff
189
     */
190 14
    public function setConfig(HtmlDiffConfig $config)
191
    {
192 14
        $this->config = $config;
193
194 14
        return $this;
195
    }
196
197
    /**
198
     * @return int
199
     *
200
     * @deprecated since 0.1.0
201
     */
202
    public function getMatchThreshold()
203
    {
204
        return $this->config->getMatchThreshold();
205
    }
206
207
    /**
208
     * @param int $matchThreshold
209
     *
210
     * @return AbstractDiff
211
     *
212
     * @deprecated since 0.1.0
213
     */
214
    public function setMatchThreshold($matchThreshold)
215
    {
216
        $this->config->setMatchThreshold($matchThreshold);
217
218
        return $this;
219
    }
220
221
    /**
222
     * @param array $chars
223
     *
224
     * @deprecated since 0.1.0
225
     */
226
    public function setSpecialCaseChars(array $chars)
227
    {
228
        $this->config->setSpecialCaseChars($chars);
229
    }
230
231
    /**
232
     * @return array|null
233
     *
234
     * @deprecated since 0.1.0
235
     */
236
    public function getSpecialCaseChars()
237
    {
238
        return $this->config->getSpecialCaseChars();
239
    }
240
241
    /**
242
     * @param string $char
243
     *
244
     * @deprecated since 0.1.0
245
     */
246
    public function addSpecialCaseChar($char)
247
    {
248
        $this->config->addSpecialCaseChar($char);
249
    }
250
251
    /**
252
     * @param string $char
253
     *
254
     * @deprecated since 0.1.0
255
     */
256
    public function removeSpecialCaseChar($char)
257
    {
258
        $this->config->removeSpecialCaseChar($char);
259
    }
260
261
    /**
262
     * @param array $tags
263
     *
264
     * @deprecated since 0.1.0
265
     */
266
    public function setSpecialCaseTags(array $tags = array())
267
    {
268
        $this->config->setSpecialCaseChars($tags);
269
    }
270
271
    /**
272
     * @param string $tag
273
     *
274
     * @deprecated since 0.1.0
275
     */
276
    public function addSpecialCaseTag($tag)
277
    {
278
        $this->config->addSpecialCaseTag($tag);
279
    }
280
281
    /**
282
     * @param string $tag
283
     *
284
     * @deprecated since 0.1.0
285
     */
286
    public function removeSpecialCaseTag($tag)
287
    {
288
        $this->config->removeSpecialCaseTag($tag);
289
    }
290
291
    /**
292
     * @return array|null
293
     *
294
     * @deprecated since 0.1.0
295
     */
296
    public function getSpecialCaseTags()
297
    {
298
        return $this->config->getSpecialCaseTags();
299
    }
300
301
    /**
302
     * @return string
303
     */
304
    public function getOldHtml()
305
    {
306
        return $this->oldText;
307
    }
308
309
    /**
310
     * @return string
311
     */
312
    public function getNewHtml()
313
    {
314
        return $this->newText;
315
    }
316
317
    /**
318
     * @return string
319
     */
320
    public function getDifference()
321
    {
322
        return $this->content;
323
    }
324
325
    /**
326
     * Clears the diff content.
327
     *
328
     * @return void
329
     */
330
    public function clearContent()
331
    {
332
        $this->content = null;
333
    }
334
335
    /**
336
     * @param bool $boolean
337
     *
338
     * @return $this
339
     *
340
     * @deprecated since 0.1.0
341
     */
342
    public function setGroupDiffs($boolean)
343
    {
344
        $this->config->setGroupDiffs($boolean);
345
346
        return $this;
347
    }
348
349
    /**
350
     * @return bool
351
     *
352
     * @deprecated since 0.1.0
353
     */
354
    public function isGroupDiffs()
355
    {
356
        return $this->config->isGroupDiffs();
357
    }
358
359
    /**
360
     * @param \HTMLPurifier_Config $config
361
     */
362 2
    public function setHTMLPurifierConfig(\HTMLPurifier_Config $config)
363
    {
364 2
        $this->purifierConfig = $config;
365 2
    }
366
367
    /**
368
     * @param string $tag
369
     *
370
     * @return string
371
     */
372
    protected function getOpeningTag($tag)
373
    {
374
        return '/<'.$tag.'[^>]*/i';
375
    }
376
377
    /**
378
     * @param string $tag
379
     *
380
     * @return string
381
     */
382
    protected function getClosingTag($tag)
383
    {
384
        return '</'.$tag.'>';
385
    }
386
387
    /**
388
     * @param string $str
389
     * @param string $start
390
     * @param string $end
391
     *
392
     * @return string
393
     */
394
    protected function getStringBetween($str, $start, $end)
395
    {
396
        $expStr = explode($start, $str, 2);
397
        if (count($expStr) > 1) {
398
            $expStr = explode($end, $expStr[ 1 ]);
399
            if (count($expStr) > 1) {
400
                array_pop($expStr);
401
402
                return implode($end, $expStr);
403
            }
404
        }
405
406
        return '';
407
    }
408
409
    /**
410
     * @param string $html
411
     *
412
     * @return string
0 ignored issues
show
Documentation introduced by
Should the return type not be string|false|null?

This check compares the return type specified in the @return annotation of a function or method doc comment with the types returned by the function and raises an issue if they mismatch.

Loading history...
413
     */
414 14
    protected function purifyHtml($html)
415
    {
416 14
        if (class_exists('Tidy') && false) {
417
            $config = array('output-xhtml' => true, 'indent' => false);
418
            $tidy = new tidy();
419
            $tidy->parseString($html, $config, 'utf8');
420
            $html = (string) $tidy;
421
422
            return $this->getStringBetween($html, '<body>');
0 ignored issues
show
Bug introduced by
The call to getStringBetween() misses a required argument $end.

This check looks for function calls that miss required arguments.

Loading history...
423
        }
424
425 14
        return $this->purifier->purify($html);
426
    }
427
428 14
    protected function splitInputsToWords()
429
    {
430 14
        $this->setOldWords($this->convertHtmlToListOfWords($this->explode($this->oldText)));
431 14
        $this->setNewWords($this->convertHtmlToListOfWords($this->explode($this->newText)));
432 14
    }
433
434
    /**
435
     * @param array $oldWords
436
     */
437 14
    protected function setOldWords(array $oldWords)
438
    {
439 14
        $this->resetCache = true;
440 14
        $this->oldWords   = $oldWords;
441 14
    }
442
443
    /**
444
     * @param array $newWords
445
     */
446 14
    protected function setNewWords(array $newWords)
447
    {
448 14
        $this->resetCache = true;
449 14
        $this->newWords   = $newWords;
450 14
    }
451
452
    /**
453
     * @param string $text
454
     *
455
     * @return bool
456
     */
457 14
    protected function isPartOfWord($text)
458
    {
459 14
        return ctype_alnum(str_replace($this->config->getSpecialCaseChars(), '', $text));
460
    }
461
462
    /**
463
     * @param array $characterString
464
     *
465
     * @return array
466
     */
467 14
    protected function convertHtmlToListOfWords($characterString)
468
    {
469 14
        $mode = 'character';
470 14
        $current_word = '';
471 14
        $words = array();
472 14
        foreach ($characterString as $i => $character) {
473
            switch ($mode) {
474 14
                case 'character':
475 14
                if ($this->isStartOfTag($character)) {
476 13
                    if ($current_word != '') {
477 12
                        $words[] = $current_word;
478 12
                    }
479
480 13
                    $current_word = '<';
481 13
                    $mode = 'tag';
482 14
                } elseif (preg_match("/\s/", $character)) {
483 12
                    if ($current_word !== '') {
484 12
                        $words[] = $current_word;
485 12
                    }
486 12
                    $current_word = preg_replace('/\s+/S', ' ', $character);
487 12
                    $mode = 'whitespace';
488 12
                } else {
489
                    if (
490 14
                        (ctype_alnum($character) && (strlen($current_word) == 0 || $this->isPartOfWord($current_word))) ||
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 122 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
491 14
                        (in_array($character, $this->config->getSpecialCaseChars()) && isset($characterString[$i + 1]) && $this->isPartOfWord($characterString[$i + 1]))
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 168 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
492 14
                    ) {
493 14
                        $current_word .= $character;
494 14
                    } else {
495 14
                        $words[] = $current_word;
496 14
                        $current_word = $character;
497
                    }
498
                }
499 14
                break;
500 14
                case 'tag' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
501 14
                if ($this->isEndOfTag($character)) {
502 14
                    $current_word .= '>';
503 14
                    $words[] = $current_word;
504 14
                    $current_word = '';
505
506 14
                    if (!preg_match('[^\s]', $character)) {
507 14
                        $mode = 'whitespace';
508 14
                    } else {
509
                        $mode = 'character';
510
                    }
511 14
                } else {
512 14
                    $current_word .= $character;
513
                }
514 14
                break;
515 14
                case 'whitespace':
516 14
                if ($this->isStartOfTag($character)) {
517 12
                    if ($current_word !== '') {
518 12
                        $words[] = $current_word;
519 12
                    }
520 12
                    $current_word = '<';
521 12
                    $mode = 'tag';
522 14
                } elseif (preg_match("/\s/", $character)) {
523 10
                    $current_word .= $character;
524 10
                    $current_word = preg_replace('/\s+/S', ' ', $current_word);
525 10
                } else {
526 14
                    if ($current_word != '') {
527 12
                        $words[] = $current_word;
528 12
                    }
529 14
                    $current_word = $character;
530 14
                    $mode = 'character';
531
                }
532 14
                break;
533
                default:
534
                break;
535
            }
536 14
        }
537 14
        if ($current_word != '') {
538
            $words[] = $current_word;
539
        }
540
541 14
        return $words;
542
    }
543
544
    /**
545
     * @param string $val
546
     *
547
     * @return bool
548
     */
549 14
    protected function isStartOfTag($val)
550
    {
551 14
        return $val == '<';
552
    }
553
554
    /**
555
     * @param string $val
556
     *
557
     * @return bool
558
     */
559 14
    protected function isEndOfTag($val)
560
    {
561 14
        return $val == '>';
562
    }
563
564
    /**
565
     * @param string $value
566
     *
567
     * @return bool
568
     */
569
    protected function isWhiteSpace($value)
570
    {
571
        return !preg_match('[^\s]', $value);
572
    }
573
574
    /**
575
     * @param string $value
576
     *
577
     * @return array
578
     */
579 14
    protected function explode($value)
580
    {
581
        // as suggested by @onassar
582 14
        return preg_split('//u', $value);
583
    }
584
}
585