Test Failed
Pull Request — master (#96)
by Sven
03:26
created

AbstractDiff::ctypeAlphanumUnicode()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 0
Metric Value
cc 1
nc 1
nop 1
dl 0
loc 4
ccs 0
cts 0
cp 0
crap 2
rs 10
c 0
b 0
f 0
1
<?php
2
3
namespace Caxy\HtmlDiff;
4
5
use Caxy\HtmlDiff\Util\MbStringUtil;
6
use HTMLPurifier;
7
use HTMLPurifier_Config;
8
9
/**
10
 * Class AbstractDiff.
11
 */
12
abstract class AbstractDiff
13
{
14
    /**
15
     * @var array
16
     *
17
     * @deprecated since 0.1.0
18
     */
19
    public static $defaultSpecialCaseTags = array('strong', 'b', 'i', 'big', 'small', 'u', 'sub', 'sup', 'strike', 's', 'p');
20
21
    /**
22
     * @var array
23
     *
24
     * @deprecated since 0.1.0
25
     */
26
    public static $defaultSpecialCaseChars = array('.', ',', '(', ')', '\'');
27
28
    /**
29
     * @var bool
30
     *
31
     * @deprecated since 0.1.0
32
     */
33
    public static $defaultGroupDiffs = true;
34
35
    /**
36
     * @var HtmlDiffConfig
37
     */
38
    protected $config;
39
40
    /**
41
     * @var string
42
     */
43
    protected $content;
44
45
    /**
46
     * @var string
47
     */
48
    protected $oldText;
49
50
    /**
51
     * @var string
52
     */
53
    protected $newText;
54
55
    /**
56
     * @var array
57
     */
58
    protected $oldWords = array();
59
60
    /**
61
     * @var array
62
     */
63
    protected $newWords = array();
64
65
    /**
66
     * @var DiffCache[]
67
     */
68
    protected $diffCaches = array();
69
70
    /**
71
     * @var HTMLPurifier|null
72
     */
73
    protected $purifier;
74
75
    /**
76
     * @var HTMLPurifier_Config|null
77
     */
78
    protected $purifierConfig = null;
79
80
    /**
81
     * @see array_slice_cached();
82
     * @var bool
83
     */
84
    protected $resetCache = false;
85
86
    /**
87
     * @var MbStringUtil
88
     */
89
    protected $stringUtil;
90
91
    /**
92
     * AbstractDiff constructor.
93
     *
94
     * @param string     $oldText
95
     * @param string     $newText
96
     * @param string     $encoding
97
     * @param null|array $specialCaseTags
98 16
     * @param null|bool  $groupDiffs
99
     */
100 16
    public function __construct($oldText, $newText, $encoding = 'UTF-8', $specialCaseTags = null, $groupDiffs = null)
101
    {
102 16
        $this->stringUtil = new MbStringUtil($oldText, $newText);
103
104 16
        $this->setConfig(HtmlDiffConfig::create()->setEncoding($encoding));
105 15
106
        if ($specialCaseTags !== null) {
107
            $this->config->setSpecialCaseTags($specialCaseTags);
108 16
        }
109
110
        if ($groupDiffs !== null) {
111
            $this->config->setGroupDiffs($groupDiffs);
112 16
        }
113 16
114 16
        $this->oldText = $oldText;
115 16
        $this->newText = $newText;
116
        $this->content = '';
117
    }
118
119
    /**
120
     * @return bool|string
121
     */
122
    abstract public function build();
123
124
    /**
125
     * Initializes HTMLPurifier with cache location.
126
     *
127 16
     * @param null|string $defaultPurifierSerializerCache
128
     */
129 16
    public function initPurifier($defaultPurifierSerializerCache = null)
130 2
    {
131
        if (null !== $this->purifierConfig) {
132 16
            $HTMLPurifierConfig  = $this->purifierConfig;
133
        } else {
134
            $HTMLPurifierConfig = HTMLPurifier_Config::createDefault();
135
        }
136
137
        // Cache.SerializerPath defaults to Null and sets
138 16
        // the location to inside the vendor HTMLPurifier library
139 2
        // under the DefinitionCache/Serializer folder.
140
        if (!is_null($defaultPurifierSerializerCache)) {
141
            $HTMLPurifierConfig->set('Cache.SerializerPath', $defaultPurifierSerializerCache);
142
        }
143
144
        // Cache.SerializerPermissions defaults to 0744.
145 16
        // This setting allows the cache files to be deleted by any user, as they are typically
146
        // created by the web/php user (www-user, php-fpm, etc.)
147 16
        $HTMLPurifierConfig->set('Cache.SerializerPermissions', 0777);
148 16
149
        $this->purifier = new HTMLPurifier($HTMLPurifierConfig);
150
    }
151
152
    /**
153
     * Prepare (purify) the HTML
154
     *
155 16
     * @return void
156
     */
157 16
    protected function prepare()
158
    {
159
        if (false === $this->config->isPurifierEnabled()) {
160
            return;
161 16
        }
162
163 16
        $this->initPurifier($this->config->getPurifierCacheLocation());
164 16
165 16
        $this->oldText = $this->purifyHtml($this->oldText);
0 ignored issues
show
Documentation Bug introduced by
It seems like $this->purifyHtml($this->oldText) can also be of type false. However, the property $oldText is declared as type string. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
166
        $this->newText = $this->purifyHtml($this->newText);
0 ignored issues
show
Documentation Bug introduced by
It seems like $this->purifyHtml($this->newText) can also be of type false. However, the property $newText is declared as type string. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
167
    }
168
169
    /**
170
     * @return DiffCache|null
171
     */
172
    protected function getDiffCache()
173
    {
174
        if (!$this->hasDiffCache()) {
175
            return null;
176
        }
177
178
        $hash = spl_object_hash($this->getConfig()->getCacheProvider());
179
180
        if (!array_key_exists($hash, $this->diffCaches)) {
181
            $this->diffCaches[$hash] = new DiffCache($this->getConfig()->getCacheProvider());
0 ignored issues
show
Bug introduced by
It seems like $this->getConfig()->getCacheProvider() can be null; however, __construct() does not accept null, maybe add an additional type check?

Unless you are absolutely sure that the expression can never be null because of other conditions, we strongly recommend to add an additional type check to your code:

/** @return stdClass|null */
function mayReturnNull() { }

function doesNotAcceptNull(stdClass $x) { }

// With potential error.
function withoutCheck() {
    $x = mayReturnNull();
    doesNotAcceptNull($x); // Potential error here.
}

// Safe - Alternative 1
function withCheck1() {
    $x = mayReturnNull();
    if ( ! $x instanceof stdClass) {
        throw new \LogicException('$x must be defined.');
    }
    doesNotAcceptNull($x);
}

// Safe - Alternative 2
function withCheck2() {
    $x = mayReturnNull();
    if ($x instanceof stdClass) {
        doesNotAcceptNull($x);
    }
}
Loading history...
182
        }
183
184
        return $this->diffCaches[$hash];
185
    }
186
187
    /**
188 16
     * @return bool
189
     */
190 16
    protected function hasDiffCache()
191
    {
192
        return null !== $this->getConfig()->getCacheProvider();
193
    }
194
195
    /**
196 16
     * @return HtmlDiffConfig
197
     */
198 16
    public function getConfig()
199
    {
200
        return $this->config;
201
    }
202
203
    /**
204
     * @param HtmlDiffConfig $config
205
     *
206 16
     * @return AbstractDiff
207
     */
208 16
    public function setConfig(HtmlDiffConfig $config)
209
    {
210 16
        $this->config = $config;
211
212
        return $this;
213
    }
214
215
    /**
216
     * @return int
217
     *
218
     * @deprecated since 0.1.0
219
     */
220
    public function getMatchThreshold()
221
    {
222
        return $this->config->getMatchThreshold();
223
    }
224
225
    /**
226
     * @param int $matchThreshold
227
     *
228
     * @return AbstractDiff
229
     *
230
     * @deprecated since 0.1.0
231
     */
232
    public function setMatchThreshold($matchThreshold)
233
    {
234
        $this->config->setMatchThreshold($matchThreshold);
235
236
        return $this;
237
    }
238
239
    /**
240
     * @param array $chars
241
     *
242
     * @deprecated since 0.1.0
243
     */
244
    public function setSpecialCaseChars(array $chars)
245
    {
246
        $this->config->setSpecialCaseChars($chars);
247
    }
248
249
    /**
250
     * @return array|null
251
     *
252
     * @deprecated since 0.1.0
253
     */
254
    public function getSpecialCaseChars()
255
    {
256
        return $this->config->getSpecialCaseChars();
257
    }
258
259
    /**
260
     * @param string $char
261
     *
262
     * @deprecated since 0.1.0
263
     */
264
    public function addSpecialCaseChar($char)
265
    {
266
        $this->config->addSpecialCaseChar($char);
267
    }
268
269
    /**
270
     * @param string $char
271
     *
272
     * @deprecated since 0.1.0
273
     */
274
    public function removeSpecialCaseChar($char)
275
    {
276
        $this->config->removeSpecialCaseChar($char);
277
    }
278
279
    /**
280
     * @param array $tags
281
     *
282
     * @deprecated since 0.1.0
283
     */
284
    public function setSpecialCaseTags(array $tags = array())
285
    {
286
        $this->config->setSpecialCaseChars($tags);
287
    }
288
289
    /**
290
     * @param string $tag
291
     *
292
     * @deprecated since 0.1.0
293
     */
294
    public function addSpecialCaseTag($tag)
295
    {
296
        $this->config->addSpecialCaseTag($tag);
297
    }
298
299
    /**
300
     * @param string $tag
301
     *
302
     * @deprecated since 0.1.0
303
     */
304
    public function removeSpecialCaseTag($tag)
305
    {
306
        $this->config->removeSpecialCaseTag($tag);
307
    }
308
309
    /**
310
     * @return array|null
311
     *
312
     * @deprecated since 0.1.0
313
     */
314
    public function getSpecialCaseTags()
315
    {
316
        return $this->config->getSpecialCaseTags();
317
    }
318
319
    /**
320
     * @return string
321
     */
322
    public function getOldHtml()
323
    {
324
        return $this->oldText;
325
    }
326
327
    /**
328
     * @return string
329
     */
330
    public function getNewHtml()
331
    {
332
        return $this->newText;
333
    }
334
335
    /**
336
     * @return string
337
     */
338
    public function getDifference()
339
    {
340
        return $this->content;
341
    }
342
343
    /**
344
     * Clears the diff content.
345
     *
346
     * @return void
347
     */
348
    public function clearContent()
349
    {
350
        $this->content = null;
351
    }
352
353
    /**
354
     * @param bool $boolean
355
     *
356
     * @return $this
357
     *
358
     * @deprecated since 0.1.0
359
     */
360
    public function setGroupDiffs($boolean)
361
    {
362
        $this->config->setGroupDiffs($boolean);
363
364
        return $this;
365
    }
366
367
    /**
368
     * @return bool
369
     *
370 16
     * @deprecated since 0.1.0
371
     */
372 16
    public function isGroupDiffs()
373
    {
374
        return $this->config->isGroupDiffs();
375
    }
376
377
    /**
378 2
     * @param HTMLPurifier_Config $config
379
     */
380 2
    public function setHTMLPurifierConfig(HTMLPurifier_Config $config)
381 2
    {
382
        $this->purifierConfig = $config;
383
    }
384
385
    /**
386
     * @param string $html
387
     *
388
     * @return string
0 ignored issues
show
Documentation introduced by
Should the return type not be string|false|null?

This check compares the return type specified in the @return annotation of a function or method doc comment with the types returned by the function and raises an issue if they mismatch.

Loading history...
389
     */
390
    protected function purifyHtml($html)
391
    {
392
        if (null === $this->purifier) {
393
            return $html;
394
        }
395
396
        return $this->purifier->purify($html);
397
    }
398
399
    protected function splitInputsToWords()
400
    {
401
        $this->setOldWords($this->convertHtmlToListOfWords($this->explode($this->oldText)));
402
        $this->setNewWords($this->convertHtmlToListOfWords($this->explode($this->newText)));
403
    }
404
405
    /**
406
     * @param array $oldWords
407
     */
408 16
    protected function setOldWords(array $oldWords)
409
    {
410 16
        $this->resetCache = true;
411
        $this->oldWords   = $oldWords;
412
    }
413
414 16
    /**
415
     * @param array $newWords
416
     */
417 16
    protected function setNewWords(array $newWords)
418
    {
419 16
        $this->resetCache = true;
420 16
        $this->newWords   = $newWords;
421 16
    }
422
423
    /**
424
     * @param string $text
425
     *
426 16
     * @return bool
427
     */
428 16
    protected function isPartOfWord($text)
429 16
    {
430 16
        return $this->ctypeAlphanumUnicode(str_replace($this->config->getSpecialCaseChars(), '', $text));
431
    }
432
433
    /**
434
     * @param array $characterString
435 16
     *
436
     * @return array
437 16
     */
438 16
    protected function convertHtmlToListOfWords($characterString)
439 16
    {
440
        $mode = 'character';
441
        $current_word = '';
442
        $words = array();
443
        $keepNewLines = $this->getConfig()->isKeepNewLines();
444
        foreach ($characterString as $i => $character) {
445
            switch ($mode) {
446 16
                case 'character':
447
                if ($this->isStartOfTag($character)) {
448 16
                    if ($current_word != '') {
449
                        $words[] = $current_word;
450
                    }
451
452
                    $current_word = '<';
453
                    $mode = 'tag';
454
                } elseif (preg_match("/\s/u", $character)) {
455
                    if ($current_word !== '') {
456 16
                        $words[] = $current_word;
457
                    }
458 16
                    $current_word = $keepNewLines ? $character : preg_replace('/\s+/Su', ' ', $character);
459 16
                    $mode = 'whitespace';
460 16
                } else {
461 16
                    if (
462 16
                        (($this->ctypeAlphanumUnicode($character) === true) && ($this->stringUtil->strlen($current_word) === 0 || $this->isPartOfWord($current_word))) ||
463 16
                        (in_array($character, $this->config->getSpecialCaseChars()) && isset($characterString[$i + 1]) && $this->isPartOfWord($characterString[$i + 1]))
464 16
                    ) {
465 16
                        $current_word .= $character;
466 14
                    } else {
467 13
                        $words[] = $current_word;
468
                        $current_word = $character;
469
                    }
470 14
                }
471 14
                break;
472 16
                case 'tag' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
473 14
                if ($this->isEndOfTag($character)) {
474 14
                    $current_word .= '>';
475
                    $words[] = $current_word;
476 14
                    $current_word = '';
477 14
478
                    if (!preg_match('[^\s]u', $character)) {
479
                        $mode = 'whitespace';
480 16
                    } else {
481 16
                        $mode = 'character';
482
                    }
483 16
                } else {
484
                    $current_word .= $character;
485 11
                }
486 11
                break;
487
                case 'whitespace':
488
                if ($this->isStartOfTag($character)) {
489 16
                    if ($current_word !== '') {
490 16
                        $words[] = $current_word;
491 15
                    }
492 15
                    $current_word = '<';
493 15
                    $mode = 'tag';
494 15
                } elseif (preg_match("/\s/u", $character)) {
495
                    $current_word .= $character;
496 15
                    if (!$keepNewLines) $current_word = preg_replace('/\s+/Su', ' ', $current_word);
0 ignored issues
show
Coding Style Best Practice introduced by
It is generally a best practice to always use braces with control structures.

Adding braces to control structures avoids accidental mistakes as your code changes:

// Without braces (not recommended)
if (true)
    doSomething();

// Recommended
if (true) {
    doSomething();
}
Loading history...
497 15
                } else {
498
                    if ($current_word != '') {
499 15
                        $words[] = $current_word;
500
                    }
501
                    $current_word = $character;
502 15
                    $mode = 'character';
503
                }
504 15
                break;
505 16
                default:
506 16
                break;
507 13
            }
508 13
        }
509
        if ($current_word != '') {
510 13
            $words[] = $current_word;
511 13
        }
512 16
513 11
        return $words;
514 11
    }
515
516 16
    /**
517 13
     * @param string $val
518
     *
519 16
     * @return bool
520 16
     */
521
    protected function isStartOfTag($val)
522 16
    {
523
        return $val === '<';
524
    }
525
526
    /**
527 16
     * @param string $val
528 8
     *
529
     * @return bool
530
     */
531 16
    protected function isEndOfTag($val)
532
    {
533
        return $val === '>';
534
    }
535
536
    /**
537
     * @param string $value
538
     *
539 16
     * @return bool
540
     */
541 16
    protected function isWhiteSpace($value)
542
    {
543
        return !preg_match('[^\s]u', $value);
544
    }
545
546
    /**
547
     * @param string $value
548
     *
549 15
     * @return array
550
     */
551 15
    protected function explode($value)
552
    {
553
        // as suggested by @onassar
554
        return preg_split('//u', $value, -1, PREG_SPLIT_NO_EMPTY);
555
    }
556
557
    /**
558
     * @param string $str
559
     *
560
     * @return bool
561
     */
562
    protected function ctypeAlphanumUnicode($str)
563
    {
564
        return preg_match("/^[a-zA-Z0-9\pL]+$/u", $str) === 1;
565
    }
566
}
567