Test Failed
Pull Request — master (#96)
by Sven
03:32
created

AbstractDiff::removeSpecialCaseChar()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 0
Metric Value
cc 1
nc 1
nop 1
dl 0
loc 4
ccs 0
cts 2
cp 0
crap 2
rs 10
c 0
b 0
f 0
1
<?php
2
3
namespace Caxy\HtmlDiff;
4
5
use Caxy\HtmlDiff\Util\MbStringUtil;
6
use HTMLPurifier;
7
use HTMLPurifier_Config;
8
9
/**
10
 * Class AbstractDiff.
11
 */
12
abstract class AbstractDiff
13
{
14
    /**
15
     * @var array
16
     *
17
     * @deprecated since 0.1.0
18
     */
19
    public static $defaultSpecialCaseTags = array('strong', 'b', 'i', 'big', 'small', 'u', 'sub', 'sup', 'strike', 's', 'p');
20
21
    /**
22
     * @var array
23
     *
24
     * @deprecated since 0.1.0
25
     */
26
    public static $defaultSpecialCaseChars = array('.', ',', '(', ')', '\'');
27
28
    /**
29
     * @var bool
30
     *
31
     * @deprecated since 0.1.0
32
     */
33
    public static $defaultGroupDiffs = true;
34
35
    /**
36
     * @var HtmlDiffConfig
37
     */
38
    protected $config;
39
40
    /**
41
     * @var string
42
     */
43
    protected $content;
44
45
    /**
46
     * @var string
47
     */
48
    protected $oldText;
49
50
    /**
51
     * @var string
52
     */
53
    protected $newText;
54
55
    /**
56
     * @var array
57
     */
58
    protected $oldWords = array();
59
60
    /**
61
     * @var array
62
     */
63
    protected $newWords = array();
64
65
    /**
66
     * @var DiffCache[]
67
     */
68
    protected $diffCaches = array();
69
70
    /**
71
     * @var HTMLPurifier
72
     */
73
    protected $purifier;
74
75
    /**
76
     * @var HTMLPurifier_Config|null
77
     */
78
    protected $purifierConfig = null;
79
80
    /**
81
     * @see array_slice_cached();
82
     * @var bool
83
     */
84
    protected $resetCache = false;
85
86
    /**
87
     * @var MbStringUtil
88
     */
89
    protected $stringUtil;
90
91
    /**
92
     * AbstractDiff constructor.
93
     *
94
     * @param string     $oldText
95
     * @param string     $newText
96
     * @param string     $encoding
97
     * @param null|array $specialCaseTags
98 16
     * @param null|bool  $groupDiffs
99
     */
100 16
    public function __construct($oldText, $newText, $encoding = 'UTF-8', $specialCaseTags = null, $groupDiffs = null)
101
    {
102 16
        $this->stringUtil = new MbStringUtil($oldText, $newText);
103
104 16
        $this->setConfig(HtmlDiffConfig::create()->setEncoding($encoding));
105 15
106
        if ($specialCaseTags !== null) {
107
            $this->config->setSpecialCaseTags($specialCaseTags);
108 16
        }
109
110
        if ($groupDiffs !== null) {
111
            $this->config->setGroupDiffs($groupDiffs);
112 16
        }
113 16
114 16
        $this->oldText = $oldText;
115 16
        $this->newText = $newText;
116
        $this->content = '';
117
    }
118
119
    /**
120
     * @return bool|string
121
     */
122
    abstract public function build();
123
124
    /**
125
     * Initializes HTMLPurifier with cache location.
126
     *
127 16
     * @param null|string $defaultPurifierSerializerCache
128
     */
129 16
    public function initPurifier($defaultPurifierSerializerCache = null)
130 2
    {
131
        if (null !== $this->purifierConfig) {
132 16
            $HTMLPurifierConfig  = $this->purifierConfig;
133
        } else {
134
            $HTMLPurifierConfig = HTMLPurifier_Config::createDefault();
135
        }
136
137
        // Cache.SerializerPath defaults to Null and sets
138 16
        // the location to inside the vendor HTMLPurifier library
139 2
        // under the DefinitionCache/Serializer folder.
140
        if (!is_null($defaultPurifierSerializerCache)) {
141
            $HTMLPurifierConfig->set('Cache.SerializerPath', $defaultPurifierSerializerCache);
142
        }
143
144
        // Cache.SerializerPermissions defaults to 0744.
145 16
        // This setting allows the cache files to be deleted by any user, as they are typically
146
        // created by the web/php user (www-user, php-fpm, etc.)
147 16
        $HTMLPurifierConfig->set('Cache.SerializerPermissions', 0777);
148 16
149
        $this->purifier = new HTMLPurifier($HTMLPurifierConfig);
150
    }
151
152
    /**
153
     * Prepare (purify) the HTML
154
     *
155 16
     * @return void
156
     */
157 16
    protected function prepare()
158
    {
159 16
        $this->initPurifier($this->config->getPurifierCacheLocation());
160 16
161 16
        $this->oldText = $this->purifyHtml($this->oldText);
0 ignored issues
show
Documentation Bug introduced by
It seems like $this->purifyHtml($this->oldText) can also be of type false. However, the property $oldText is declared as type string. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
162
        $this->newText = $this->purifyHtml($this->newText);
0 ignored issues
show
Documentation Bug introduced by
It seems like $this->purifyHtml($this->newText) can also be of type false. However, the property $newText is declared as type string. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
163
    }
164
165
    /**
166
     * @return DiffCache|null
167
     */
168
    protected function getDiffCache()
169
    {
170
        if (!$this->hasDiffCache()) {
171
            return null;
172
        }
173
174
        $hash = spl_object_hash($this->getConfig()->getCacheProvider());
175
176
        if (!array_key_exists($hash, $this->diffCaches)) {
177
            $this->diffCaches[$hash] = new DiffCache($this->getConfig()->getCacheProvider());
0 ignored issues
show
Bug introduced by
It seems like $this->getConfig()->getCacheProvider() can be null; however, __construct() does not accept null, maybe add an additional type check?

Unless you are absolutely sure that the expression can never be null because of other conditions, we strongly recommend to add an additional type check to your code:

/** @return stdClass|null */
function mayReturnNull() { }

function doesNotAcceptNull(stdClass $x) { }

// With potential error.
function withoutCheck() {
    $x = mayReturnNull();
    doesNotAcceptNull($x); // Potential error here.
}

// Safe - Alternative 1
function withCheck1() {
    $x = mayReturnNull();
    if ( ! $x instanceof stdClass) {
        throw new \LogicException('$x must be defined.');
    }
    doesNotAcceptNull($x);
}

// Safe - Alternative 2
function withCheck2() {
    $x = mayReturnNull();
    if ($x instanceof stdClass) {
        doesNotAcceptNull($x);
    }
}
Loading history...
178
        }
179
180
        return $this->diffCaches[$hash];
181
    }
182
183
    /**
184 16
     * @return bool
185
     */
186 16
    protected function hasDiffCache()
187
    {
188
        return null !== $this->getConfig()->getCacheProvider();
189
    }
190
191
    /**
192 16
     * @return HtmlDiffConfig
193
     */
194 16
    public function getConfig()
195
    {
196
        return $this->config;
197
    }
198
199
    /**
200
     * @param HtmlDiffConfig $config
201
     *
202 16
     * @return AbstractDiff
203
     */
204 16
    public function setConfig(HtmlDiffConfig $config)
205
    {
206 16
        $this->config = $config;
207
208
        return $this;
209
    }
210
211
    /**
212
     * @return int
213
     *
214
     * @deprecated since 0.1.0
215
     */
216
    public function getMatchThreshold()
217
    {
218
        return $this->config->getMatchThreshold();
219
    }
220
221
    /**
222
     * @param int $matchThreshold
223
     *
224
     * @return AbstractDiff
225
     *
226
     * @deprecated since 0.1.0
227
     */
228
    public function setMatchThreshold($matchThreshold)
229
    {
230
        $this->config->setMatchThreshold($matchThreshold);
231
232
        return $this;
233
    }
234
235
    /**
236
     * @param array $chars
237
     *
238
     * @deprecated since 0.1.0
239
     */
240
    public function setSpecialCaseChars(array $chars)
241
    {
242
        $this->config->setSpecialCaseChars($chars);
243
    }
244
245
    /**
246
     * @return array|null
247
     *
248
     * @deprecated since 0.1.0
249
     */
250
    public function getSpecialCaseChars()
251
    {
252
        return $this->config->getSpecialCaseChars();
253
    }
254
255
    /**
256
     * @param string $char
257
     *
258
     * @deprecated since 0.1.0
259
     */
260
    public function addSpecialCaseChar($char)
261
    {
262
        $this->config->addSpecialCaseChar($char);
263
    }
264
265
    /**
266
     * @param string $char
267
     *
268
     * @deprecated since 0.1.0
269
     */
270
    public function removeSpecialCaseChar($char)
271
    {
272
        $this->config->removeSpecialCaseChar($char);
273
    }
274
275
    /**
276
     * @param array $tags
277
     *
278
     * @deprecated since 0.1.0
279
     */
280
    public function setSpecialCaseTags(array $tags = array())
281
    {
282
        $this->config->setSpecialCaseChars($tags);
283
    }
284
285
    /**
286
     * @param string $tag
287
     *
288
     * @deprecated since 0.1.0
289
     */
290
    public function addSpecialCaseTag($tag)
291
    {
292
        $this->config->addSpecialCaseTag($tag);
293
    }
294
295
    /**
296
     * @param string $tag
297
     *
298
     * @deprecated since 0.1.0
299
     */
300
    public function removeSpecialCaseTag($tag)
301
    {
302
        $this->config->removeSpecialCaseTag($tag);
303
    }
304
305
    /**
306
     * @return array|null
307
     *
308
     * @deprecated since 0.1.0
309
     */
310
    public function getSpecialCaseTags()
311
    {
312
        return $this->config->getSpecialCaseTags();
313
    }
314
315
    /**
316
     * @return string
317
     */
318
    public function getOldHtml()
319
    {
320
        return $this->oldText;
321
    }
322
323
    /**
324
     * @return string
325
     */
326
    public function getNewHtml()
327
    {
328
        return $this->newText;
329
    }
330
331
    /**
332
     * @return string
333
     */
334
    public function getDifference()
335
    {
336
        return $this->content;
337
    }
338
339
    /**
340
     * Clears the diff content.
341
     *
342
     * @return void
343
     */
344
    public function clearContent()
345
    {
346
        $this->content = null;
347
    }
348
349
    /**
350
     * @param bool $boolean
351
     *
352
     * @return $this
353
     *
354
     * @deprecated since 0.1.0
355
     */
356
    public function setGroupDiffs($boolean)
357
    {
358
        $this->config->setGroupDiffs($boolean);
359
360
        return $this;
361
    }
362
363
    /**
364
     * @return bool
365
     *
366 16
     * @deprecated since 0.1.0
367
     */
368 16
    public function isGroupDiffs()
369
    {
370
        return $this->config->isGroupDiffs();
371
    }
372
373
    /**
374 2
     * @param HTMLPurifier_Config $config
375
     */
376 2
    public function setHTMLPurifierConfig(HTMLPurifier_Config $config)
377 2
    {
378
        $this->purifierConfig = $config;
379
    }
380
381
    /**
382
     * @param string $html
383
     *
384
     * @return string
0 ignored issues
show
Documentation introduced by
Should the return type not be string|false|null?

This check compares the return type specified in the @return annotation of a function or method doc comment with the types returned by the function and raises an issue if they mismatch.

Loading history...
385
     */
386
    protected function purifyHtml($html)
387
    {
388
        return $this->purifier->purify($html);
389
    }
390
391
    protected function splitInputsToWords()
392
    {
393
        $this->setOldWords($this->convertHtmlToListOfWords($this->explode($this->oldText)));
394
        $this->setNewWords($this->convertHtmlToListOfWords($this->explode($this->newText)));
395
    }
396
397
    /**
398
     * @param array $oldWords
399
     */
400
    protected function setOldWords(array $oldWords)
401
    {
402
        $this->resetCache = true;
403
        $this->oldWords   = $oldWords;
404 16
    }
405
406 16
    /**
407
     * @param array $newWords
408
     */
409 16
    protected function setNewWords(array $newWords)
410
    {
411 16
        $this->resetCache = true;
412 16
        $this->newWords   = $newWords;
413 16
    }
414
415
    /**
416
     * @param string $text
417
     *
418 16
     * @return bool
419
     */
420 16
    protected function isPartOfWord($text)
421 16
    {
422 16
        return $this->ctypeAlphanumUnicode(str_replace($this->config->getSpecialCaseChars(), '', $text));
423
    }
424
425
    /**
426
     * @param array $characterString
427 16
     *
428
     * @return array
429 16
     */
430 16
    protected function convertHtmlToListOfWords($characterString)
431 16
    {
432
        $mode = 'character';
433
        $current_word = '';
434
        $words = array();
435
        $keepNewLines = $this->getConfig()->isKeepNewLines();
436
        foreach ($characterString as $i => $character) {
437
            switch ($mode) {
438 16
                case 'character':
439
                if ($this->isStartOfTag($character)) {
440 16
                    if ($current_word != '') {
441
                        $words[] = $current_word;
442
                    }
443
444
                    $current_word = '<';
445
                    $mode = 'tag';
446
                } elseif (preg_match("/\s/u", $character)) {
447
                    if ($current_word !== '') {
448 16
                        $words[] = $current_word;
449
                    }
450 16
                    $current_word = $keepNewLines ? $character : preg_replace('/\s+/Su', ' ', $character);
451 16
                    $mode = 'whitespace';
452 16
                } else {
453 16
                    if (
454 16
                        (($this->ctypeAlphanumUnicode($character) === true) && ($this->stringUtil->strlen($current_word) === 0 || $this->isPartOfWord($current_word))) ||
455 16
                        (in_array($character, $this->config->getSpecialCaseChars()) && isset($characterString[$i + 1]) && $this->isPartOfWord($characterString[$i + 1]))
456 16
                    ) {
457 16
                        $current_word .= $character;
458 14
                    } else {
459 13
                        $words[] = $current_word;
460
                        $current_word = $character;
461
                    }
462 14
                }
463 14
                break;
464 16
                case 'tag' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
465 14
                if ($this->isEndOfTag($character)) {
466 14
                    $current_word .= '>';
467
                    $words[] = $current_word;
468 14
                    $current_word = '';
469 14
470
                    if (!preg_match('[^\s]u', $character)) {
471
                        $mode = 'whitespace';
472 16
                    } else {
473 16
                        $mode = 'character';
474
                    }
475 16
                } else {
476
                    $current_word .= $character;
477 11
                }
478 11
                break;
479
                case 'whitespace':
480
                if ($this->isStartOfTag($character)) {
481 16
                    if ($current_word !== '') {
482 16
                        $words[] = $current_word;
483 15
                    }
484 15
                    $current_word = '<';
485 15
                    $mode = 'tag';
486 15
                } elseif (preg_match("/\s/u", $character)) {
487
                    $current_word .= $character;
488 15
                    if (!$keepNewLines) $current_word = preg_replace('/\s+/Su', ' ', $current_word);
0 ignored issues
show
Coding Style Best Practice introduced by
It is generally a best practice to always use braces with control structures.

Adding braces to control structures avoids accidental mistakes as your code changes:

// Without braces (not recommended)
if (true)
    doSomething();

// Recommended
if (true) {
    doSomething();
}
Loading history...
489 15
                } else {
490
                    if ($current_word != '') {
491 15
                        $words[] = $current_word;
492
                    }
493
                    $current_word = $character;
494 15
                    $mode = 'character';
495
                }
496 15
                break;
497 16
                default:
498 16
                break;
499 13
            }
500 13
        }
501
        if ($current_word != '') {
502 13
            $words[] = $current_word;
503 13
        }
504 16
505 11
        return $words;
506 11
    }
507
508 16
    /**
509 13
     * @param string $val
510
     *
511 16
     * @return bool
512 16
     */
513
    protected function isStartOfTag($val)
514 16
    {
515
        return $val === '<';
516
    }
517
518
    /**
519 16
     * @param string $val
520 8
     *
521
     * @return bool
522
     */
523 16
    protected function isEndOfTag($val)
524
    {
525
        return $val === '>';
526
    }
527
528
    /**
529
     * @param string $value
530
     *
531 16
     * @return bool
532
     */
533 16
    protected function isWhiteSpace($value)
534
    {
535
        return !preg_match('[^\s]u', $value);
536
    }
537
538
    /**
539
     * @param string $value
540
     *
541 15
     * @return array
542
     */
543 15
    protected function explode($value)
544
    {
545
        // as suggested by @onassar
546
        return preg_split('//u', $value, -1, PREG_SPLIT_NO_EMPTY);
547
    }
548
549
    /**
550
     * @param string $str
551
     *
552
     * @return bool
553
     */
554
    protected function ctypeAlphanumUnicode($str)
555
    {
556
        return preg_match("/^[a-zA-Z0-9\pL]+$/u", $str) === 1;
557
    }
558
}
559