Passed
Push — master ( 9b95a1...8430aa )
by Josh
01:05
created

AbstractDiff::initPurifier()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 11
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 5
CRAP Score 2.0932

Importance

Changes 1
Bugs 1 Features 0
Metric Value
cc 2
eloc 5
c 1
b 1
f 0
nc 2
nop 1
dl 0
loc 11
ccs 5
cts 7
cp 0.7143
crap 2.0932
rs 9.4285
1
<?php
2
3
namespace Caxy\HtmlDiff;
4
5
/**
6
 * Class AbstractDiff.
7
 */
8
abstract class AbstractDiff
9
{
10
    /**
11
     * @var array
12
     *
13
     * @deprecated since 0.1.0
14
     */
15
    public static $defaultSpecialCaseTags = array('strong', 'b', 'i', 'big', 'small', 'u', 'sub', 'sup', 'strike', 's', 'p');
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 125 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
16
    /**
17
     * @var array
18
     *
19
     * @deprecated since 0.1.0
20
     */
21
    public static $defaultSpecialCaseChars = array('.', ',', '(', ')', '\'');
22
    /**
23
     * @var bool
24
     *
25
     * @deprecated since 0.1.0
26
     */
27
    public static $defaultGroupDiffs = true;
28
29
    /**
30
     * @var HtmlDiffConfig
31
     */
32
    protected $config;
33
34
    /**
35
     * @var string
36
     */
37
    protected $content;
38
    /**
39
     * @var string
40
     */
41
    protected $oldText;
42
    /**
43
     * @var string
44
     */
45
    protected $newText;
46
    /**
47
     * @var array
48
     */
49
    protected $oldWords = array();
50
    /**
51
     * @var array
52
     */
53
    protected $newWords = array();
54
55
    /**
56
     * @var DiffCache[]
57
     */
58
    protected $diffCaches = array();
59
60
    /**
61
     * @var \HTMLPurifier
62
     */
63
    protected $purifier;
64
65
    /**
66
     * AbstractDiff constructor.
67
     *
68
     * @param string     $oldText
69
     * @param string     $newText
70
     * @param string     $encoding
71
     * @param null|array $specialCaseTags
72
     * @param null|bool  $groupDiffs
73
     */
74 12
    public function __construct($oldText, $newText, $encoding = 'UTF-8', $specialCaseTags = null, $groupDiffs = null)
75
    {
76 12
        mb_substitute_character(0x20);
77
78 12
        $this->setConfig(HtmlDiffConfig::create()->setEncoding($encoding));
79
80 12
        if ($specialCaseTags !== null) {
81 12
            $this->config->setSpecialCaseTags($specialCaseTags);
82 12
        }
83
84 12
        if ($groupDiffs !== null) {
85
            $this->config->setGroupDiffs($groupDiffs);
86
        }
87
88 12
        $this->oldText = $this->purifyHtml($oldText);
0 ignored issues
show
Documentation Bug introduced by
It seems like $this->purifyHtml($oldText) can also be of type false. However, the property $oldText is declared as type string. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
89 12
        $this->newText = $this->purifyHtml($newText);
0 ignored issues
show
Documentation Bug introduced by
It seems like $this->purifyHtml($newText) can also be of type false. However, the property $newText is declared as type string. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
90 12
        $this->content = '';
91
92 12
    }
93
94
    /**
95
     * @return bool|string
96
     */
97
    abstract public function build();
98
99
    /**
100
     * Initializes HTMLPurifier with cache location.
101
     *
102
     * @param null|string $defaultPurifierSerializerCache
103
     */
104 12
    public function initPurifier($defaultPurifierSerializerCache = null)
105
    {
106 12
        $HTMLPurifierConfig = \HTMLPurifier_Config::createDefault();
107
        // Cache.SerializerPath defaults to Null and sets
108
        // the location to inside the vendor HTMLPurifier library
109
        // under the DefinitionCache/Serializer folder.
110 12
        if (!is_null($defaultPurifierSerializerCache)) {
111
            $HTMLPurifierConfig->set('Cache.SerializerPath', $defaultPurifierSerializerCache);
112
        }
113 12
        $this->purifier = new \HTMLPurifier($HTMLPurifierConfig);
114 12
    }
115
116
    /**
117
     * @return DiffCache|null
118
     */
119
    protected function getDiffCache()
120
    {
121
        if (!$this->hasDiffCache()) {
122
            return;
123
        }
124
125
        $hash = spl_object_hash($this->getConfig()->getCacheProvider());
126
127
        if (!array_key_exists($hash, $this->diffCaches)) {
128
            $this->diffCaches[$hash] = new DiffCache($this->getConfig()->getCacheProvider());
0 ignored issues
show
Bug introduced by
It seems like $this->getConfig()->getCacheProvider() can be null; however, __construct() does not accept null, maybe add an additional type check?

Unless you are absolutely sure that the expression can never be null because of other conditions, we strongly recommend to add an additional type check to your code:

/** @return stdClass|null */
function mayReturnNull() { }

function doesNotAcceptNull(stdClass $x) { }

// With potential error.
function withoutCheck() {
    $x = mayReturnNull();
    doesNotAcceptNull($x); // Potential error here.
}

// Safe - Alternative 1
function withCheck1() {
    $x = mayReturnNull();
    if ( ! $x instanceof stdClass) {
        throw new \LogicException('$x must be defined.');
    }
    doesNotAcceptNull($x);
}

// Safe - Alternative 2
function withCheck2() {
    $x = mayReturnNull();
    if ($x instanceof stdClass) {
        doesNotAcceptNull($x);
    }
}
Loading history...
129
        }
130
131
        return $this->diffCaches[$hash];
132
    }
133
134
    /**
135
     * @return bool
136
     */
137 12
    protected function hasDiffCache()
138
    {
139 12
        return null !== $this->getConfig()->getCacheProvider();
140
    }
141
142
    /**
143
     * @return HtmlDiffConfig
144
     */
145 12
    public function getConfig()
146
    {
147 12
        return $this->config;
148
    }
149
150
    /**
151
     * @param HtmlDiffConfig $config
152
     *
153
     * @return AbstractDiff
154
     */
155 12
    public function setConfig(HtmlDiffConfig $config)
156 1
    {
157 12
        $this->config = $config;
158 12
        $this->initPurifier($this->config->getPurifierCacheLocation());
159
160 12
        return $this;
161
    }
162
163
    /**
164
     * @return int
165
     *
166
     * @deprecated since 0.1.0
167
     */
168
    public function getMatchThreshold()
169
    {
170
        return $this->config->getMatchThreshold();
171
    }
172
173
    /**
174
     * @param int $matchThreshold
175
     *
176
     * @return AbstractDiff
177
     *
178
     * @deprecated since 0.1.0
179
     */
180
    public function setMatchThreshold($matchThreshold)
181
    {
182
        $this->config->setMatchThreshold($matchThreshold);
183
184
        return $this;
185
    }
186
187
    /**
188
     * @param array $chars
189
     *
190
     * @deprecated since 0.1.0
191
     */
192
    public function setSpecialCaseChars(array $chars)
193
    {
194
        $this->config->setSpecialCaseChars($chars);
195
    }
196
197
    /**
198
     * @return array|null
199
     *
200
     * @deprecated since 0.1.0
201
     */
202
    public function getSpecialCaseChars()
203
    {
204
        return $this->config->getSpecialCaseChars();
205
    }
206
207
    /**
208
     * @param string $char
209
     *
210
     * @deprecated since 0.1.0
211
     */
212
    public function addSpecialCaseChar($char)
213
    {
214
        $this->config->addSpecialCaseChar($char);
215
    }
216
217
    /**
218
     * @param string $char
219
     *
220
     * @deprecated since 0.1.0
221
     */
222
    public function removeSpecialCaseChar($char)
223
    {
224
        $this->config->removeSpecialCaseChar($char);
225
    }
226
227
    /**
228
     * @param array $tags
229
     *
230
     * @deprecated since 0.1.0
231
     */
232
    public function setSpecialCaseTags(array $tags = array())
233
    {
234
        $this->config->setSpecialCaseChars($tags);
235
    }
236
237
    /**
238
     * @param string $tag
239
     *
240
     * @deprecated since 0.1.0
241
     */
242
    public function addSpecialCaseTag($tag)
243
    {
244
        $this->config->addSpecialCaseTag($tag);
245
    }
246
247
    /**
248
     * @param string $tag
249
     *
250
     * @deprecated since 0.1.0
251
     */
252
    public function removeSpecialCaseTag($tag)
253
    {
254
        $this->config->removeSpecialCaseTag($tag);
255
    }
256
257
    /**
258
     * @return array|null
259
     *
260
     * @deprecated since 0.1.0
261
     */
262
    public function getSpecialCaseTags()
263
    {
264
        return $this->config->getSpecialCaseTags();
265
    }
266
267
    /**
268
     * @return string
269
     */
270
    public function getOldHtml()
271
    {
272
        return $this->oldText;
273
    }
274
275
    /**
276
     * @return string
277
     */
278
    public function getNewHtml()
279
    {
280
        return $this->newText;
281
    }
282
283
    /**
284
     * @return string
285
     */
286
    public function getDifference()
287
    {
288
        return $this->content;
289
    }
290
291
    /**
292
     * Clears the diff content.
293
     *
294
     * @return void
295
     */
296
    public function clearContent()
297
    {
298
        $this->content = null;
299
    }
300
301
    /**
302
     * @param bool $boolean
303
     *
304
     * @return $this
305
     *
306
     * @deprecated since 0.1.0
307
     */
308
    public function setGroupDiffs($boolean)
309
    {
310
        $this->config->setGroupDiffs($boolean);
311
312
        return $this;
313
    }
314
315
    /**
316
     * @return bool
317
     *
318
     * @deprecated since 0.1.0
319
     */
320
    public function isGroupDiffs()
321
    {
322
        return $this->config->isGroupDiffs();
323
    }
324
325
    /**
326
     * @param string $tag
327
     *
328
     * @return string
329
     */
330
    protected function getOpeningTag($tag)
331
    {
332
        return '/<'.$tag.'[^>]*/i';
333
    }
334
335
    /**
336
     * @param string $tag
337
     *
338
     * @return string
339
     */
340
    protected function getClosingTag($tag)
341
    {
342
        return '</'.$tag.'>';
343
    }
344
345
    /**
346
     * @param string $str
347
     * @param string $start
348
     * @param string $end
349
     *
350
     * @return string
351
     */
352
    protected function getStringBetween($str, $start, $end)
353
    {
354
        $expStr = explode($start, $str, 2);
355
        if (count($expStr) > 1) {
356
            $expStr = explode($end, $expStr[ 1 ]);
357
            if (count($expStr) > 1) {
358
                array_pop($expStr);
359
360
                return implode($end, $expStr);
361
            }
362
        }
363
364
        return '';
365
    }
366
367
    /**
368
     * @param string $html
369
     *
370
     * @return string
0 ignored issues
show
Documentation introduced by
Should the return type not be string|false|null?

This check compares the return type specified in the @return annotation of a function or method doc comment with the types returned by the function and raises an issue if they mismatch.

Loading history...
371
     */
372 12
    protected function purifyHtml($html)
373
    {
374 12
        if (class_exists('Tidy') && false) {
375
            $config = array('output-xhtml' => true, 'indent' => false);
376
            $tidy = new tidy();
377
            $tidy->parseString($html, $config, 'utf8');
378
            $html = (string) $tidy;
379
380
            return $this->getStringBetween($html, '<body>');
0 ignored issues
show
Bug introduced by
The call to getStringBetween() misses a required argument $end.

This check looks for function calls that miss required arguments.

Loading history...
381
        }
382
383 12
        return $this->purifier->purify($html);
384
    }
385
386 12
    protected function splitInputsToWords()
387
    {
388 12
        $this->oldWords = $this->convertHtmlToListOfWords($this->explode($this->oldText));
389 12
        $this->newWords = $this->convertHtmlToListOfWords($this->explode($this->newText));
390 12
    }
391
392
    /**
393
     * @param string $text
394
     *
395
     * @return bool
396
     */
397 12
    protected function isPartOfWord($text)
398
    {
399 12
        return ctype_alnum(str_replace($this->config->getSpecialCaseChars(), '', $text));
400
    }
401
402
    /**
403
     * @param array $characterString
404
     *
405
     * @return array
406
     */
407 12
    protected function convertHtmlToListOfWords($characterString)
408
    {
409 12
        $mode = 'character';
410 12
        $current_word = '';
411 12
        $words = array();
412 12
        foreach ($characterString as $i => $character) {
413
            switch ($mode) {
414 12
                case 'character':
415 12
                if ($this->isStartOfTag($character)) {
416 11
                    if ($current_word != '') {
417 10
                        $words[] = $current_word;
418 10
                    }
419
420 11
                    $current_word = '<';
421 11
                    $mode = 'tag';
422 12
                } elseif (preg_match("/\s/", $character)) {
423 12
                    if ($current_word !== '') {
424 12
                        $words[] = $current_word;
425 12
                    }
426 12
                    $current_word = preg_replace('/\s+/S', ' ', $character);
427 12
                    $mode = 'whitespace';
428 12
                } else {
429
                    if (
430 12
                        (ctype_alnum($character) && (strlen($current_word) == 0 || $this->isPartOfWord($current_word))) ||
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 122 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
431 12
                        (in_array($character, $this->config->getSpecialCaseChars()) && isset($characterString[$i + 1]) && $this->isPartOfWord($characterString[$i + 1]))
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 168 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
432 12
                    ) {
433 12
                        $current_word .= $character;
434 12
                    } else {
435 12
                        $words[] = $current_word;
436 12
                        $current_word = $character;
437
                    }
438
                }
439 12
                break;
440 12
                case 'tag' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
441 12
                if ($this->isEndOfTag($character)) {
442 12
                    $current_word .= '>';
443 12
                    $words[] = $current_word;
444 12
                    $current_word = '';
445
446 12
                    if (!preg_match('[^\s]', $character)) {
447 12
                        $mode = 'whitespace';
448 12
                    } else {
449
                        $mode = 'character';
450
                    }
451 12
                } else {
452 12
                    $current_word .= $character;
453
                }
454 12
                break;
455 12
                case 'whitespace':
456 12
                if ($this->isStartOfTag($character)) {
457 12
                    if ($current_word !== '') {
458 12
                        $words[] = $current_word;
459 12
                    }
460 12
                    $current_word = '<';
461 12
                    $mode = 'tag';
462 12
                } elseif (preg_match("/\s/", $character)) {
463 10
                    $current_word .= $character;
464 10
                    $current_word = preg_replace('/\s+/S', ' ', $current_word);
465 10
                } else {
466 12
                    if ($current_word != '') {
467 12
                        $words[] = $current_word;
468 12
                    }
469 12
                    $current_word = $character;
470 12
                    $mode = 'character';
471
                }
472 12
                break;
473
                default:
474
                break;
475
            }
476 12
        }
477 12
        if ($current_word != '') {
478
            $words[] = $current_word;
479
        }
480
481 12
        return $words;
482
    }
483
484
    /**
485
     * @param string $val
486
     *
487
     * @return bool
488
     */
489 12
    protected function isStartOfTag($val)
490
    {
491 12
        return $val == '<';
492
    }
493
494
    /**
495
     * @param string $val
496
     *
497
     * @return bool
498
     */
499 12
    protected function isEndOfTag($val)
500
    {
501 12
        return $val == '>';
502
    }
503
504
    /**
505
     * @param string $value
506
     *
507
     * @return bool
508
     */
509
    protected function isWhiteSpace($value)
510
    {
511
        return !preg_match('[^\s]', $value);
512
    }
513
514
    /**
515
     * @param string $value
516
     *
517
     * @return array
518
     */
519 12
    protected function explode($value)
520
    {
521
        // as suggested by @onassar
522 12
        return preg_split('//u', $value);
523
    }
524
}
525