Passed
Pull Request — master (#89)
by
unknown
04:35
created

AbstractDiff::purifyHtml()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 7
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 3
CRAP Score 2.0625

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 2
eloc 3
c 1
b 0
f 0
nc 2
nop 1
dl 0
loc 7
ccs 3
cts 4
cp 0.75
crap 2.0625
rs 10
1
<?php
2
3
namespace Caxy\HtmlDiff;
4
5
use Caxy\HtmlDiff\Util\MbStringUtil;
6
7
/**
8
 * Class AbstractDiff.
9
 */
10
abstract class AbstractDiff
11
{
12
    /**
13
     * @var array
14
     *
15
     * @deprecated since 0.1.0
16
     */
17
    public static $defaultSpecialCaseTags = array('strong', 'b', 'i', 'big', 'small', 'u', 'sub', 'sup', 'strike', 's', 'p');
18
19
    /**
20
     * @var array
21
     *
22
     * @deprecated since 0.1.0
23
     */
24
    public static $defaultSpecialCaseChars = array('.', ',', '(', ')', '\'');
25
26
    /**
27
     * @var bool
28
     *
29
     * @deprecated since 0.1.0
30
     */
31
    public static $defaultGroupDiffs = true;
32
33
    /**
34
     * @var HtmlDiffConfig
35
     */
36
    protected $config;
37
38
    /**
39
     * @var string
40
     */
41
    protected $content;
42
43
    /**
44
     * @var string
45
     */
46
    protected $oldText;
47
48
    /**
49
     * @var string
50
     */
51
    protected $newText;
52
53
    /**
54
     * @var array
55
     */
56
    protected $oldWords = array();
57
58
    /**
59
     * @var array
60
     */
61
    protected $newWords = array();
62
63
    /**
64
     * @var DiffCache[]
65
     */
66
    protected $diffCaches = array();
67
68
    /**
69
     * @var \HTMLPurifier
70
     */
71
    protected $purifier;
72
73
    /**
74
     * @var \HTMLPurifier_Config|null
75
     */
76
    protected $purifierConfig = null;
77
78
    /**
79
     * @see array_slice_cached();
80
     * @var bool
81
     */
82
    protected $resetCache = false;
83
84
    /**
85
     * @var MbStringUtil
86
     */
87
    protected $stringUtil;
88
89
    /**
90
     * AbstractDiff constructor.
91
     *
92
     * @param string     $oldText
93
     * @param string     $newText
94
     * @param string     $encoding
95
     * @param null|array $specialCaseTags
96
     * @param null|bool  $groupDiffs
97
     */
98 16
    public function __construct($oldText, $newText, $encoding = 'UTF-8', $specialCaseTags = null, $groupDiffs = null)
99
    {
100 16
        $this->stringUtil = new MbStringUtil($oldText, $newText);
101
102 16
        $this->setConfig(HtmlDiffConfig::create()->setEncoding($encoding));
103
104 16
        if ($specialCaseTags !== null) {
105 15
            $this->config->setSpecialCaseTags($specialCaseTags);
106
        }
107
108 16
        if ($groupDiffs !== null) {
109
            $this->config->setGroupDiffs($groupDiffs);
110
        }
111
112 16
        $this->oldText = $oldText;
113 16
        $this->newText = $newText;
114 16
        $this->content = '';
115 16
    }
116
117
    /**
118
     * @return bool|string
119
     */
120
    abstract public function build();
121
122
    /**
123
     * Initializes HTMLPurifier with cache location.
124
     *
125
     * @param null|string $defaultPurifierSerializerCache
126
     */
127 16
    public function initPurifier($defaultPurifierSerializerCache = null)
128
    {
129 16
        if (null !== $this->purifierConfig) {
130 2
            $HTMLPurifierConfig  = $this->purifierConfig;
131
        } else {
132 16
            $HTMLPurifierConfig = \HTMLPurifier_Config::createDefault();
133
        }
134
135
        // Cache.SerializerPath defaults to Null and sets
136
        // the location to inside the vendor HTMLPurifier library
137
        // under the DefinitionCache/Serializer folder.
138 16
        if (!is_null($defaultPurifierSerializerCache)) {
139 2
            $HTMLPurifierConfig->set('Cache.SerializerPath', $defaultPurifierSerializerCache);
140
        }
141
142
        // Cache.SerializerPermissions defaults to 0744.
143
        // This setting allows the cache files to be deleted by any user, as they are typically
144
        // created by the web/php user (www-user, php-fpm, etc.)
145 16
        $HTMLPurifierConfig->set('Cache.SerializerPermissions', 0777);
146
147 16
        $this->purifier = new \HTMLPurifier($HTMLPurifierConfig);
148 16
    }
149
150
    /**
151
     * Prepare (purify) the HTML
152
     *
153
     * @return void
154
     */
155 16
    protected function prepare()
156
    {
157 16
        if (!$this->config->isPurifierEnabled()) {
158
            return;
159
        }
160
161 16
        $this->initPurifier($this->config->getPurifierCacheLocation());
162
163 16
        $this->oldText = $this->purifyHtml($this->oldText);
164 16
        $this->newText = $this->purifyHtml($this->newText);
165 16
    }
166
167
    /**
168
     * @return DiffCache|null
169
     */
170
    protected function getDiffCache()
171
    {
172
        if (!$this->hasDiffCache()) {
173
            return null;
174
        }
175
176
        $hash = spl_object_hash($this->getConfig()->getCacheProvider());
177
178
        if (!array_key_exists($hash, $this->diffCaches)) {
179
            $this->diffCaches[$hash] = new DiffCache($this->getConfig()->getCacheProvider());
180
        }
181
182
        return $this->diffCaches[$hash];
183
    }
184
185
    /**
186
     * @return bool
187
     */
188 16
    protected function hasDiffCache()
189
    {
190 16
        return null !== $this->getConfig()->getCacheProvider();
191
    }
192
193
    /**
194
     * @return HtmlDiffConfig
195
     */
196 16
    public function getConfig()
197
    {
198 16
        return $this->config;
199
    }
200
201
    /**
202
     * @param HtmlDiffConfig $config
203
     *
204
     * @return AbstractDiff
205
     */
206 16
    public function setConfig(HtmlDiffConfig $config)
207
    {
208 16
        $this->config = $config;
209
210 16
        return $this;
211
    }
212
213
    /**
214
     * @return int
215
     *
216
     * @deprecated since 0.1.0
217
     */
218
    public function getMatchThreshold()
219
    {
220
        return $this->config->getMatchThreshold();
221
    }
222
223
    /**
224
     * @param int $matchThreshold
225
     *
226
     * @return AbstractDiff
227
     *
228
     * @deprecated since 0.1.0
229
     */
230
    public function setMatchThreshold($matchThreshold)
231
    {
232
        $this->config->setMatchThreshold($matchThreshold);
233
234
        return $this;
235
    }
236
237
    /**
238
     * @param array $chars
239
     *
240
     * @deprecated since 0.1.0
241
     */
242
    public function setSpecialCaseChars(array $chars)
243
    {
244
        $this->config->setSpecialCaseChars($chars);
245
    }
246
247
    /**
248
     * @return array|null
249
     *
250
     * @deprecated since 0.1.0
251
     */
252
    public function getSpecialCaseChars()
253
    {
254
        return $this->config->getSpecialCaseChars();
255
    }
256
257
    /**
258
     * @param string $char
259
     *
260
     * @deprecated since 0.1.0
261
     */
262
    public function addSpecialCaseChar($char)
263
    {
264
        $this->config->addSpecialCaseChar($char);
265
    }
266
267
    /**
268
     * @param string $char
269
     *
270
     * @deprecated since 0.1.0
271
     */
272
    public function removeSpecialCaseChar($char)
273
    {
274
        $this->config->removeSpecialCaseChar($char);
275
    }
276
277
    /**
278
     * @param array $tags
279
     *
280
     * @deprecated since 0.1.0
281
     */
282
    public function setSpecialCaseTags(array $tags = array())
283
    {
284
        $this->config->setSpecialCaseChars($tags);
285
    }
286
287
    /**
288
     * @param string $tag
289
     *
290
     * @deprecated since 0.1.0
291
     */
292
    public function addSpecialCaseTag($tag)
293
    {
294
        $this->config->addSpecialCaseTag($tag);
295
    }
296
297
    /**
298
     * @param string $tag
299
     *
300
     * @deprecated since 0.1.0
301
     */
302
    public function removeSpecialCaseTag($tag)
303
    {
304
        $this->config->removeSpecialCaseTag($tag);
305
    }
306
307
    /**
308
     * @return array|null
309
     *
310
     * @deprecated since 0.1.0
311
     */
312
    public function getSpecialCaseTags()
313
    {
314
        return $this->config->getSpecialCaseTags();
315
    }
316
317
    /**
318
     * @return string
319
     */
320
    public function getOldHtml()
321
    {
322
        return $this->oldText;
323
    }
324
325
    /**
326
     * @return string
327
     */
328
    public function getNewHtml()
329
    {
330
        return $this->newText;
331
    }
332
333
    /**
334
     * @return string
335
     */
336
    public function getDifference()
337
    {
338
        return $this->content;
339
    }
340
341
    /**
342
     * Clears the diff content.
343
     *
344
     * @return void
345
     */
346
    public function clearContent()
347
    {
348
        $this->content = null;
349
    }
350
351
    /**
352
     * @param bool $boolean
353
     *
354
     * @return $this
355
     *
356
     * @deprecated since 0.1.0
357
     */
358
    public function setGroupDiffs($boolean)
359
    {
360
        $this->config->setGroupDiffs($boolean);
361
362
        return $this;
363
    }
364
365
    /**
366
     * @return bool
367
     *
368
     * @deprecated since 0.1.0
369
     */
370 16
    public function isGroupDiffs()
371
    {
372 16
        return $this->config->isGroupDiffs();
373
    }
374
375
    /**
376
     * @param \HTMLPurifier_Config $config
377
     */
378 2
    public function setHTMLPurifierConfig(\HTMLPurifier_Config $config)
379
    {
380 2
        $this->purifierConfig = $config;
381 2
    }
382
383
    /**
384
     * @param string $tag
385
     *
386
     * @return string
387
     */
388
    protected function getOpeningTag($tag)
389
    {
390
        return '/<'.$tag.'[^>]*/i';
391
    }
392
393
    /**
394
     * @param string $tag
395
     *
396
     * @return string
397
     */
398
    protected function getClosingTag($tag)
399
    {
400
        return '</'.$tag.'>';
401
    }
402
403
    /**
404
     * @param string $html
405
     *
406
     * @return string
407
     */
408 16
    protected function purifyHtml($html)
409
    {
410 16
        if (!$this->purifier) {
411
            return $html;
412
        }
413
414 16
        return $this->purifier->purify($html);
415
    }
416
417 16
    protected function splitInputsToWords()
418
    {
419 16
        $this->setOldWords($this->convertHtmlToListOfWords($this->explode($this->oldText)));
420 16
        $this->setNewWords($this->convertHtmlToListOfWords($this->explode($this->newText)));
421 16
    }
422
423
    /**
424
     * @param array $oldWords
425
     */
426 16
    protected function setOldWords(array $oldWords)
427
    {
428 16
        $this->resetCache = true;
429 16
        $this->oldWords   = $oldWords;
430 16
    }
431
432
    /**
433
     * @param array $newWords
434
     */
435 16
    protected function setNewWords(array $newWords)
436
    {
437 16
        $this->resetCache = true;
438 16
        $this->newWords   = $newWords;
439 16
    }
440
441
    /**
442
     * @param string $text
443
     *
444
     * @return bool
445
     */
446 16
    protected function isPartOfWord($text)
447
    {
448 16
        return $this->ctypeAlphanumUnicode(str_replace($this->config->getSpecialCaseChars(), '', $text));
449
    }
450
451
    /**
452
     * @param array $characterString
453
     *
454
     * @return array
455
     */
456 16
    protected function convertHtmlToListOfWords($characterString)
457
    {
458 16
        $mode = 'character';
459 16
        $current_word = '';
460 16
        $words = array();
461 16
        $keepNewLines = $this->getConfig()->isKeepNewLines();
462 16
        foreach ($characterString as $i => $character) {
463
            switch ($mode) {
464 16
                case 'character':
465 16
                if ($this->isStartOfTag($character)) {
466 14
                    if ($current_word != '') {
467 13
                        $words[] = $current_word;
468
                    }
469
470 14
                    $current_word = '<';
471 14
                    $mode = 'tag';
472 16
                } elseif (preg_match("/\s/u", $character)) {
473 14
                    if ($current_word !== '') {
474 14
                        $words[] = $current_word;
475
                    }
476 14
                    $current_word = $keepNewLines ? $character : preg_replace('/\s+/Su', ' ', $character);
477 14
                    $mode = 'whitespace';
478
                } else {
479
                    if (
480 16
                        (($this->ctypeAlphanumUnicode($character) === true) && ($this->stringUtil->strlen($current_word) === 0 || $this->isPartOfWord($current_word))) ||
0 ignored issues
show
introduced by
Consider adding parentheses for clarity. Current Interpretation: ($this->ctypeAlphanumUni...haracterString[$i + 1]), Probably Intended Meaning: $this->ctypeAlphanumUnic...aracterString[$i + 1]))
Loading history...
481 11
                        (in_array($character, $this->config->getSpecialCaseChars()) && isset($characterString[$i + 1]) && $this->isPartOfWord($characterString[$i + 1]))
482
                    ) {
483 16
                        $current_word .= $character;
484
                    } else {
485 11
                        $words[] = $current_word;
486 11
                        $current_word = $character;
487
                    }
488
                }
489 16
                break;
490 16
                case 'tag' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
491 15
                if ($this->isEndOfTag($character)) {
492 15
                    $current_word .= '>';
493 15
                    $words[] = $current_word;
494 15
                    $current_word = '';
495
496 15
                    if (!preg_match('[^\s]u', $character)) {
497 15
                        $mode = 'whitespace';
498
                    } else {
499
                        $mode = 'character';
500
                    }
501
                } else {
502 15
                    $current_word .= $character;
503
                }
504 15
                break;
505 16
                case 'whitespace':
506 16
                if ($this->isStartOfTag($character)) {
507 13
                    if ($current_word !== '') {
508 13
                        $words[] = $current_word;
509
                    }
510 13
                    $current_word = '<';
511 13
                    $mode = 'tag';
512 16
                } elseif (preg_match("/\s/u", $character)) {
513 11
                    $current_word .= $character;
514 11
                    if (!$keepNewLines) $current_word = preg_replace('/\s+/Su', ' ', $current_word);
0 ignored issues
show
Coding Style Best Practice introduced by
It is generally a best practice to always use braces with control structures.

Adding braces to control structures avoids accidental mistakes as your code changes:

// Without braces (not recommended)
if (true)
    doSomething();

// Recommended
if (true) {
    doSomething();
}
Loading history...
515
                } else {
516 16
                    if ($current_word != '') {
517 13
                        $words[] = $current_word;
518
                    }
519 16
                    $current_word = $character;
520 16
                    $mode = 'character';
521
                }
522 16
                break;
523
                default:
524
                break;
525
            }
526
        }
527 16
        if ($current_word != '') {
528 8
            $words[] = $current_word;
529
        }
530
531 16
        return $words;
532
    }
533
534
    /**
535
     * @param string $val
536
     *
537
     * @return bool
538
     */
539 16
    protected function isStartOfTag($val)
540
    {
541 16
        return $val === '<';
542
    }
543
544
    /**
545
     * @param string $val
546
     *
547
     * @return bool
548
     */
549 15
    protected function isEndOfTag($val)
550
    {
551 15
        return $val === '>';
552
    }
553
554
    /**
555
     * @param string $value
556
     *
557
     * @return bool
558
     */
559
    protected function isWhiteSpace($value)
560
    {
561
        return !preg_match('[^\s]u', $value);
562
    }
563
564
    /**
565
     * @param string $value
566
     *
567
     * @return array
568
     */
569 16
    protected function explode($value)
570
    {
571
        // as suggested by @onassar
572 16
        return preg_split('//u', $value, -1, PREG_SPLIT_NO_EMPTY);
573
    }
574
575
    /**
576
     * @param string $str
577
     *
578
     * @return bool
579
     */
580 16
    protected function ctypeAlphanumUnicode($str)
581
    {
582 16
        return preg_match("/^[a-zA-Z0-9\pL]+$/u", $str) === 1;
583
    }
584
}
585