Passed
Push — master ( e9b438...e7628e )
by Sven
02:04 queued 11s
created

AbstractDiff::setNewWords()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 5

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 4
CRAP Score 1

Importance

Changes 0
Metric Value
cc 1
nc 1
nop 1
dl 0
loc 5
rs 10
c 0
b 0
f 0
ccs 4
cts 4
cp 1
crap 1
1
<?php
2
3
namespace Caxy\HtmlDiff;
4
5
use Caxy\HtmlDiff\Util\MbStringUtil;
6
7
/**
8
 * Class AbstractDiff.
9
 */
10
abstract class AbstractDiff
11
{
12
    /**
13
     * @var array
14
     *
15
     * @deprecated since 0.1.0
16
     */
17
    public static $defaultSpecialCaseTags = array('strong', 'b', 'i', 'big', 'small', 'u', 'sub', 'sup', 'strike', 's', 'p');
18
19
    /**
20
     * @var array
21
     *
22
     * @deprecated since 0.1.0
23
     */
24
    public static $defaultSpecialCaseChars = array('.', ',', '(', ')', '\'');
25
26
    /**
27
     * @var bool
28
     *
29
     * @deprecated since 0.1.0
30
     */
31
    public static $defaultGroupDiffs = true;
32
33
    /**
34
     * @var HtmlDiffConfig
35
     */
36
    protected $config;
37
38
    /**
39
     * @var string
40
     */
41
    protected $content;
42
43
    /**
44
     * @var string
45
     */
46
    protected $oldText;
47
48
    /**
49
     * @var string
50
     */
51
    protected $newText;
52
53
    /**
54
     * @var array
55
     */
56
    protected $oldWords = array();
57
58
    /**
59
     * @var array
60
     */
61
    protected $newWords = array();
62
63
    /**
64
     * @var DiffCache[]
65
     */
66
    protected $diffCaches = array();
67
68
    /**
69
     * @var \HTMLPurifier|null
70
     */
71
    protected $purifier;
72
73
    /**
74
     * @var \HTMLPurifier_Config|null
75
     */
76
    protected $purifierConfig = null;
77
78
    /**
79
     * @see array_slice_cached();
80
     * @var bool
81
     */
82
    protected $resetCache = false;
83
84
    /**
85
     * @var MbStringUtil
86
     */
87
    protected $stringUtil;
88
89
    /**
90
     * AbstractDiff constructor.
91
     *
92
     * @param string     $oldText
93
     * @param string     $newText
94
     * @param string     $encoding
95
     * @param null|array $specialCaseTags
96
     * @param null|bool  $groupDiffs
97
     */
98 16
    public function __construct($oldText, $newText, $encoding = 'UTF-8', $specialCaseTags = null, $groupDiffs = null)
99
    {
100 16
        $this->stringUtil = new MbStringUtil($oldText, $newText);
101
102 16
        $this->setConfig(HtmlDiffConfig::create()->setEncoding($encoding));
103
104 16
        if ($specialCaseTags !== null) {
105 15
            $this->config->setSpecialCaseTags($specialCaseTags);
106
        }
107
108 16
        if ($groupDiffs !== null) {
109
            $this->config->setGroupDiffs($groupDiffs);
110
        }
111
112 16
        $this->oldText = $oldText;
113 16
        $this->newText = $newText;
114 16
        $this->content = '';
115 16
    }
116
117
    /**
118
     * @return bool|string
119
     */
120
    abstract public function build();
121
122
    /**
123
     * Initializes HTMLPurifier with cache location.
124
     *
125
     * @param null|string $defaultPurifierSerializerCache
126
     */
127 16
    public function initPurifier($defaultPurifierSerializerCache = null)
128
    {
129 16
        if (null !== $this->purifierConfig) {
130 2
            $HTMLPurifierConfig  = $this->purifierConfig;
131
        } else {
132 16
            $HTMLPurifierConfig = \HTMLPurifier_Config::createDefault();
133
        }
134
135
        // Cache.SerializerPath defaults to Null and sets
136
        // the location to inside the vendor HTMLPurifier library
137
        // under the DefinitionCache/Serializer folder.
138 16
        if (!is_null($defaultPurifierSerializerCache)) {
139 2
            $HTMLPurifierConfig->set('Cache.SerializerPath', $defaultPurifierSerializerCache);
140
        }
141
142
        // Cache.SerializerPermissions defaults to 0744.
143
        // This setting allows the cache files to be deleted by any user, as they are typically
144
        // created by the web/php user (www-user, php-fpm, etc.)
145 16
        $HTMLPurifierConfig->set('Cache.SerializerPermissions', 0777);
146
147 16
        $this->purifier = new \HTMLPurifier($HTMLPurifierConfig);
148 16
    }
149
150
    /**
151
     * Prepare (purify) the HTML
152
     *
153
     * @return void
154
     */
155 16
    protected function prepare()
156
    {
157 16
        if (false === $this->config->isPurifierEnabled()) {
158
            return;
159
        }
160
161 16
        $this->initPurifier($this->config->getPurifierCacheLocation());
162
163 16
        $this->oldText = $this->purifyHtml($this->oldText);
0 ignored issues
show
Documentation Bug introduced by
It seems like $this->purifyHtml($this->oldText) can also be of type false. However, the property $oldText is declared as type string. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
164 16
        $this->newText = $this->purifyHtml($this->newText);
0 ignored issues
show
Documentation Bug introduced by
It seems like $this->purifyHtml($this->newText) can also be of type false. However, the property $newText is declared as type string. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
165 16
    }
166
167
    /**
168
     * @return DiffCache|null
169
     */
170
    protected function getDiffCache()
171
    {
172
        if (!$this->hasDiffCache()) {
173
            return null;
174
        }
175
176
        $hash = spl_object_hash($this->getConfig()->getCacheProvider());
177
178
        if (!array_key_exists($hash, $this->diffCaches)) {
179
            $this->diffCaches[$hash] = new DiffCache($this->getConfig()->getCacheProvider());
0 ignored issues
show
Bug introduced by
It seems like $this->getConfig()->getCacheProvider() can be null; however, __construct() does not accept null, maybe add an additional type check?

Unless you are absolutely sure that the expression can never be null because of other conditions, we strongly recommend to add an additional type check to your code:

/** @return stdClass|null */
function mayReturnNull() { }

function doesNotAcceptNull(stdClass $x) { }

// With potential error.
function withoutCheck() {
    $x = mayReturnNull();
    doesNotAcceptNull($x); // Potential error here.
}

// Safe - Alternative 1
function withCheck1() {
    $x = mayReturnNull();
    if ( ! $x instanceof stdClass) {
        throw new \LogicException('$x must be defined.');
    }
    doesNotAcceptNull($x);
}

// Safe - Alternative 2
function withCheck2() {
    $x = mayReturnNull();
    if ($x instanceof stdClass) {
        doesNotAcceptNull($x);
    }
}
Loading history...
180
        }
181
182
        return $this->diffCaches[$hash];
183
    }
184
185
    /**
186
     * @return bool
187
     */
188 16
    protected function hasDiffCache()
189
    {
190 16
        return null !== $this->getConfig()->getCacheProvider();
191
    }
192
193
    /**
194
     * @return HtmlDiffConfig
195
     */
196 16
    public function getConfig()
197
    {
198 16
        return $this->config;
199
    }
200
201
    /**
202
     * @param HtmlDiffConfig $config
203
     *
204
     * @return AbstractDiff
205
     */
206 16
    public function setConfig(HtmlDiffConfig $config)
207
    {
208 16
        $this->config = $config;
209
210 16
        return $this;
211
    }
212
213
    /**
214
     * @return int
215
     *
216
     * @deprecated since 0.1.0
217
     */
218
    public function getMatchThreshold()
219
    {
220
        return $this->config->getMatchThreshold();
221
    }
222
223
    /**
224
     * @param int $matchThreshold
225
     *
226
     * @return AbstractDiff
227
     *
228
     * @deprecated since 0.1.0
229
     */
230
    public function setMatchThreshold($matchThreshold)
231
    {
232
        $this->config->setMatchThreshold($matchThreshold);
233
234
        return $this;
235
    }
236
237
    /**
238
     * @param array $chars
239
     *
240
     * @deprecated since 0.1.0
241
     */
242
    public function setSpecialCaseChars(array $chars)
243
    {
244
        $this->config->setSpecialCaseChars($chars);
245
    }
246
247
    /**
248
     * @return array|null
249
     *
250
     * @deprecated since 0.1.0
251
     */
252
    public function getSpecialCaseChars()
253
    {
254
        return $this->config->getSpecialCaseChars();
255
    }
256
257
    /**
258
     * @param string $char
259
     *
260
     * @deprecated since 0.1.0
261
     */
262
    public function addSpecialCaseChar($char)
263
    {
264
        $this->config->addSpecialCaseChar($char);
265
    }
266
267
    /**
268
     * @param string $char
269
     *
270
     * @deprecated since 0.1.0
271
     */
272
    public function removeSpecialCaseChar($char)
273
    {
274
        $this->config->removeSpecialCaseChar($char);
275
    }
276
277
    /**
278
     * @param array $tags
279
     *
280
     * @deprecated since 0.1.0
281
     */
282
    public function setSpecialCaseTags(array $tags = array())
283
    {
284
        $this->config->setSpecialCaseChars($tags);
285
    }
286
287
    /**
288
     * @param string $tag
289
     *
290
     * @deprecated since 0.1.0
291
     */
292
    public function addSpecialCaseTag($tag)
293
    {
294
        $this->config->addSpecialCaseTag($tag);
295
    }
296
297
    /**
298
     * @param string $tag
299
     *
300
     * @deprecated since 0.1.0
301
     */
302
    public function removeSpecialCaseTag($tag)
303
    {
304
        $this->config->removeSpecialCaseTag($tag);
305
    }
306
307
    /**
308
     * @return array|null
309
     *
310
     * @deprecated since 0.1.0
311
     */
312
    public function getSpecialCaseTags()
313
    {
314
        return $this->config->getSpecialCaseTags();
315
    }
316
317
    /**
318
     * @return string
319
     */
320
    public function getOldHtml()
321
    {
322
        return $this->oldText;
323
    }
324
325
    /**
326
     * @return string
327
     */
328
    public function getNewHtml()
329
    {
330
        return $this->newText;
331
    }
332
333
    /**
334
     * @return string
335
     */
336
    public function getDifference()
337
    {
338
        return $this->content;
339
    }
340
341
    /**
342
     * Clears the diff content.
343
     *
344
     * @return void
345
     */
346
    public function clearContent()
347
    {
348
        $this->content = null;
349
    }
350
351
    /**
352
     * @param bool $boolean
353
     *
354
     * @return $this
355
     *
356
     * @deprecated since 0.1.0
357
     */
358
    public function setGroupDiffs($boolean)
359
    {
360
        $this->config->setGroupDiffs($boolean);
361
362
        return $this;
363
    }
364
365
    /**
366
     * @return bool
367
     *
368
     * @deprecated since 0.1.0
369
     */
370 16
    public function isGroupDiffs()
371
    {
372 16
        return $this->config->isGroupDiffs();
373
    }
374
375
    /**
376
     * @param \HTMLPurifier_Config $config
377
     */
378 2
    public function setHTMLPurifierConfig(\HTMLPurifier_Config $config)
379
    {
380 2
        $this->purifierConfig = $config;
381 2
    }
382
383
    /**
384
     * @param string $tag
385
     *
386
     * @return string
387
     */
388
    protected function getOpeningTag($tag)
389
    {
390
        return '/<'.$tag.'[^>]*/i';
391
    }
392
393
    /**
394
     * @param string $tag
395
     *
396
     * @return string
397
     */
398
    protected function getClosingTag($tag)
399
    {
400
        return '</'.$tag.'>';
401
    }
402
403
    /**
404
     * @param string $html
405
     *
406
     * @return string
0 ignored issues
show
Documentation introduced by
Should the return type not be string|false|null?

This check compares the return type specified in the @return annotation of a function or method doc comment with the types returned by the function and raises an issue if they mismatch.

Loading history...
407
     */
408 16
    protected function purifyHtml($html)
409
    {
410 16
        if (null === $this->purifier) {
411
            return $html;
412
        }
413
414 16
        return $this->purifier->purify($html);
415
    }
416
417 16
    protected function splitInputsToWords()
418
    {
419 16
        $this->setOldWords($this->convertHtmlToListOfWords($this->explode($this->oldText)));
420 16
        $this->setNewWords($this->convertHtmlToListOfWords($this->explode($this->newText)));
421 16
    }
422
423
    /**
424
     * @param array $oldWords
425
     */
426 16
    protected function setOldWords(array $oldWords)
427
    {
428 16
        $this->resetCache = true;
429 16
        $this->oldWords   = $oldWords;
430 16
    }
431
432
    /**
433
     * @param array $newWords
434
     */
435 16
    protected function setNewWords(array $newWords)
436
    {
437 16
        $this->resetCache = true;
438 16
        $this->newWords   = $newWords;
439 16
    }
440
441
    /**
442
     * @param string $text
443
     *
444
     * @return bool
445
     */
446 16
    protected function isPartOfWord($text)
447
    {
448 16
        return $this->ctypeAlphanumUnicode(str_replace($this->config->getSpecialCaseChars(), '', $text));
449
    }
450
451
    /**
452
     * @param array $characterString
453
     *
454
     * @return array
455
     */
456 16
    protected function convertHtmlToListOfWords($characterString)
457
    {
458 16
        $mode = 'character';
459 16
        $current_word = '';
460 16
        $words = array();
461 16
        $keepNewLines = $this->getConfig()->isKeepNewLines();
462 16
        foreach ($characterString as $i => $character) {
463 16
            switch ($mode) {
464 16
                case 'character':
465 16
                if ($this->isStartOfTag($character)) {
466 14
                    if ($current_word != '') {
467 13
                        $words[] = $current_word;
468
                    }
469
470 14
                    $current_word = '<';
471 14
                    $mode = 'tag';
472 16
                } elseif (preg_match("/\s/u", $character)) {
473 14
                    if ($current_word !== '') {
474 14
                        $words[] = $current_word;
475
                    }
476 14
                    $current_word = $keepNewLines ? $character : preg_replace('/\s+/Su', ' ', $character);
477 14
                    $mode = 'whitespace';
478
                } else {
479
                    if (
480 16
                        (($this->ctypeAlphanumUnicode($character) === true) && ($this->stringUtil->strlen($current_word) === 0 || $this->isPartOfWord($current_word))) ||
481 16
                        (in_array($character, $this->config->getSpecialCaseChars()) && isset($characterString[$i + 1]) && $this->isPartOfWord($characterString[$i + 1]))
482
                    ) {
483 16
                        $current_word .= $character;
484
                    } else {
485 11
                        $words[] = $current_word;
486 11
                        $current_word = $character;
487
                    }
488
                }
489 16
                break;
490 16
                case 'tag' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
491 15
                if ($this->isEndOfTag($character)) {
492 15
                    $current_word .= '>';
493 15
                    $words[] = $current_word;
494 15
                    $current_word = '';
495
496 15
                    if (!preg_match('[^\s]u', $character)) {
497 15
                        $mode = 'whitespace';
498
                    } else {
499 15
                        $mode = 'character';
500
                    }
501
                } else {
502 15
                    $current_word .= $character;
503
                }
504 15
                break;
505 16
                case 'whitespace':
506 16
                if ($this->isStartOfTag($character)) {
507 13
                    if ($current_word !== '') {
508 13
                        $words[] = $current_word;
509
                    }
510 13
                    $current_word = '<';
511 13
                    $mode = 'tag';
512 16
                } elseif (preg_match("/\s/u", $character)) {
513 11
                    $current_word .= $character;
514 11
                    if (!$keepNewLines) $current_word = preg_replace('/\s+/Su', ' ', $current_word);
0 ignored issues
show
Coding Style Best Practice introduced by
It is generally a best practice to always use braces with control structures.

Adding braces to control structures avoids accidental mistakes as your code changes:

// Without braces (not recommended)
if (true)
    doSomething();

// Recommended
if (true) {
    doSomething();
}
Loading history...
515
                } else {
516 16
                    if ($current_word != '') {
517 13
                        $words[] = $current_word;
518
                    }
519 16
                    $current_word = $character;
520 16
                    $mode = 'character';
521
                }
522 16
                break;
523
                default:
524
                break;
525
            }
526
        }
527 16
        if ($current_word != '') {
528 8
            $words[] = $current_word;
529
        }
530
531 16
        return $words;
532
    }
533
534
    /**
535
     * @param string $val
536
     *
537
     * @return bool
538
     */
539 16
    protected function isStartOfTag($val)
540
    {
541 16
        return $val === '<';
542
    }
543
544
    /**
545
     * @param string $val
546
     *
547
     * @return bool
548
     */
549 15
    protected function isEndOfTag($val)
550
    {
551 15
        return $val === '>';
552
    }
553
554
    /**
555
     * @param string $value
556
     *
557
     * @return bool
558
     */
559
    protected function isWhiteSpace($value)
560
    {
561
        return !preg_match('[^\s]u', $value);
562
    }
563
564
    /**
565
     * @param string $value
566
     *
567
     * @return array
568
     */
569 16
    protected function explode($value)
570
    {
571
        // as suggested by @onassar
572 16
        return preg_split('//u', $value, -1, PREG_SPLIT_NO_EMPTY);
573
    }
574
575
    /**
576
     * @param string $str
577
     *
578
     * @return bool
579
     */
580 16
    protected function ctypeAlphanumUnicode($str)
581
    {
582 16
        return preg_match("/^[a-zA-Z0-9\pL]+$/u", $str) === 1;
583
    }
584
}
585