Passed
Push — master ( 6f39bc...2ba271 )
by Josh
02:32
created

AbstractDiff::getStringBetween()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 13
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 12

Importance

Changes 0
Metric Value
cc 3
eloc 7
nc 3
nop 3
dl 0
loc 13
ccs 0
cts 8
cp 0
crap 12
rs 10
c 0
b 0
f 0

1 Method

Rating   Name   Duplication   Size   Complexity  
A AbstractDiff::purifyHtml() 0 3 1
1
<?php
2
3
namespace Caxy\HtmlDiff;
4
5
use Caxy\HtmlDiff\Util\MbStringUtil;
6
7
/**
8
 * Class AbstractDiff.
9
 */
10
abstract class AbstractDiff
11
{
12
    /**
13
     * @var array
14
     *
15
     * @deprecated since 0.1.0
16
     */
17
    public static $defaultSpecialCaseTags = array('strong', 'b', 'i', 'big', 'small', 'u', 'sub', 'sup', 'strike', 's', 'p');
18
19
    /**
20
     * @var array
21
     *
22
     * @deprecated since 0.1.0
23
     */
24
    public static $defaultSpecialCaseChars = array('.', ',', '(', ')', '\'');
25
26
    /**
27
     * @var bool
28
     *
29
     * @deprecated since 0.1.0
30
     */
31
    public static $defaultGroupDiffs = true;
32
33
    /**
34
     * @var HtmlDiffConfig
35
     */
36
    protected $config;
37
38
    /**
39
     * @var string
40
     */
41
    protected $content;
42
43
    /**
44
     * @var string
45
     */
46
    protected $oldText;
47
48
    /**
49
     * @var string
50
     */
51
    protected $newText;
52
53
    /**
54
     * @var array
55
     */
56
    protected $oldWords = array();
57
58
    /**
59
     * @var array
60
     */
61
    protected $newWords = array();
62
63
    /**
64
     * @var DiffCache[]
65
     */
66
    protected $diffCaches = array();
67
68
    /**
69
     * @var \HTMLPurifier
70
     */
71
    protected $purifier;
72
73
    /**
74
     * @var \HTMLPurifier_Config|null
75
     */
76
    protected $purifierConfig = null;
77
78
    /**
79
     * @see array_slice_cached();
80
     * @var bool
81
     */
82
    protected $resetCache = false;
83
84
    /**
85
     * @var MbStringUtil
86
     */
87
    protected $stringUtil;
88
89
    /**
90
     * AbstractDiff constructor.
91
     *
92
     * @param string     $oldText
93
     * @param string     $newText
94
     * @param string     $encoding
95
     * @param null|array $specialCaseTags
96
     * @param null|bool  $groupDiffs
97
     */
98 16
    public function __construct($oldText, $newText, $encoding = 'UTF-8', $specialCaseTags = null, $groupDiffs = null)
99
    {
100 16
        $this->stringUtil = new MbStringUtil($oldText, $newText);
101
102 16
        $this->setConfig(HtmlDiffConfig::create()->setEncoding($encoding));
103
104 16
        if ($specialCaseTags !== null) {
105 15
            $this->config->setSpecialCaseTags($specialCaseTags);
106
        }
107
108 16
        if ($groupDiffs !== null) {
109
            $this->config->setGroupDiffs($groupDiffs);
110
        }
111
112 16
        $this->oldText = $oldText;
113 16
        $this->newText = $newText;
114 16
        $this->content = '';
115 16
    }
116
117
    /**
118
     * @return bool|string
119
     */
120
    abstract public function build();
121
122
    /**
123
     * Initializes HTMLPurifier with cache location.
124
     *
125
     * @param null|string $defaultPurifierSerializerCache
126
     */
127 16
    public function initPurifier($defaultPurifierSerializerCache = null)
128
    {
129 16
        if (null !== $this->purifierConfig) {
130 2
            $HTMLPurifierConfig  = $this->purifierConfig;
131
        } else {
132 16
            $HTMLPurifierConfig = \HTMLPurifier_Config::createDefault();
133
        }
134
135
        // Cache.SerializerPath defaults to Null and sets
136
        // the location to inside the vendor HTMLPurifier library
137
        // under the DefinitionCache/Serializer folder.
138 16
        if (!is_null($defaultPurifierSerializerCache)) {
139 2
            $HTMLPurifierConfig->set('Cache.SerializerPath', $defaultPurifierSerializerCache);
140
        }
141
142
        // Cache.SerializerPermissions defaults to 0744.
143
        // This setting allows the cache files to be deleted by any user, as they are typically
144
        // created by the web/php user (www-user, php-fpm, etc.)
145 16
        $HTMLPurifierConfig->set('Cache.SerializerPermissions', 0777);
146
147 16
        $this->purifier = new \HTMLPurifier($HTMLPurifierConfig);
148 16
    }
149
150
    /**
151
     * Prepare (purify) the HTML
152
     *
153
     * @return void
154
     */
155 16
    protected function prepare()
156
    {
157 16
        $this->initPurifier($this->config->getPurifierCacheLocation());
158
159 16
        $this->oldText = $this->purifyHtml($this->oldText);
160 16
        $this->newText = $this->purifyHtml($this->newText);
161 16
    }
162
163
    /**
164
     * @return DiffCache|null
165
     */
166
    protected function getDiffCache()
167
    {
168
        if (!$this->hasDiffCache()) {
169
            return null;
170
        }
171
172
        $hash = spl_object_hash($this->getConfig()->getCacheProvider());
173
174
        if (!array_key_exists($hash, $this->diffCaches)) {
175
            $this->diffCaches[$hash] = new DiffCache($this->getConfig()->getCacheProvider());
176
        }
177
178
        return $this->diffCaches[$hash];
179
    }
180
181
    /**
182
     * @return bool
183
     */
184 16
    protected function hasDiffCache()
185
    {
186 16
        return null !== $this->getConfig()->getCacheProvider();
187
    }
188
189
    /**
190
     * @return HtmlDiffConfig
191
     */
192 16
    public function getConfig()
193
    {
194 16
        return $this->config;
195
    }
196
197
    /**
198
     * @param HtmlDiffConfig $config
199
     *
200
     * @return AbstractDiff
201
     */
202 16
    public function setConfig(HtmlDiffConfig $config)
203
    {
204 16
        $this->config = $config;
205
206 16
        return $this;
207
    }
208
209
    /**
210
     * @return int
211
     *
212
     * @deprecated since 0.1.0
213
     */
214
    public function getMatchThreshold()
215
    {
216
        return $this->config->getMatchThreshold();
217
    }
218
219
    /**
220
     * @param int $matchThreshold
221
     *
222
     * @return AbstractDiff
223
     *
224
     * @deprecated since 0.1.0
225
     */
226
    public function setMatchThreshold($matchThreshold)
227
    {
228
        $this->config->setMatchThreshold($matchThreshold);
229
230
        return $this;
231
    }
232
233
    /**
234
     * @param array $chars
235
     *
236
     * @deprecated since 0.1.0
237
     */
238
    public function setSpecialCaseChars(array $chars)
239
    {
240
        $this->config->setSpecialCaseChars($chars);
241
    }
242
243
    /**
244
     * @return array|null
245
     *
246
     * @deprecated since 0.1.0
247
     */
248
    public function getSpecialCaseChars()
249
    {
250
        return $this->config->getSpecialCaseChars();
251
    }
252
253
    /**
254
     * @param string $char
255
     *
256
     * @deprecated since 0.1.0
257
     */
258
    public function addSpecialCaseChar($char)
259
    {
260
        $this->config->addSpecialCaseChar($char);
261
    }
262
263
    /**
264
     * @param string $char
265
     *
266
     * @deprecated since 0.1.0
267
     */
268
    public function removeSpecialCaseChar($char)
269
    {
270
        $this->config->removeSpecialCaseChar($char);
271
    }
272
273
    /**
274
     * @param array $tags
275
     *
276
     * @deprecated since 0.1.0
277
     */
278
    public function setSpecialCaseTags(array $tags = array())
279
    {
280
        $this->config->setSpecialCaseChars($tags);
281
    }
282
283
    /**
284
     * @param string $tag
285
     *
286
     * @deprecated since 0.1.0
287
     */
288
    public function addSpecialCaseTag($tag)
289
    {
290
        $this->config->addSpecialCaseTag($tag);
291
    }
292
293
    /**
294
     * @param string $tag
295
     *
296
     * @deprecated since 0.1.0
297
     */
298
    public function removeSpecialCaseTag($tag)
299
    {
300
        $this->config->removeSpecialCaseTag($tag);
301
    }
302
303
    /**
304
     * @return array|null
305
     *
306
     * @deprecated since 0.1.0
307
     */
308
    public function getSpecialCaseTags()
309
    {
310
        return $this->config->getSpecialCaseTags();
311
    }
312
313
    /**
314
     * @return string
315
     */
316
    public function getOldHtml()
317
    {
318
        return $this->oldText;
319
    }
320
321
    /**
322
     * @return string
323
     */
324
    public function getNewHtml()
325
    {
326
        return $this->newText;
327
    }
328
329
    /**
330
     * @return string
331
     */
332
    public function getDifference()
333
    {
334
        return $this->content;
335
    }
336
337
    /**
338
     * Clears the diff content.
339
     *
340
     * @return void
341
     */
342
    public function clearContent()
343
    {
344
        $this->content = null;
345
    }
346
347
    /**
348
     * @param bool $boolean
349
     *
350
     * @return $this
351
     *
352
     * @deprecated since 0.1.0
353
     */
354
    public function setGroupDiffs($boolean)
355
    {
356
        $this->config->setGroupDiffs($boolean);
357
358
        return $this;
359
    }
360
361
    /**
362
     * @return bool
363
     *
364
     * @deprecated since 0.1.0
365
     */
366 16
    public function isGroupDiffs()
367
    {
368 16
        return $this->config->isGroupDiffs();
369
    }
370
371
    /**
372
     * @param \HTMLPurifier_Config $config
373
     */
374 2
    public function setHTMLPurifierConfig(\HTMLPurifier_Config $config)
375
    {
376 2
        $this->purifierConfig = $config;
377 2
    }
378
379
    /**
380
     * @param string $tag
381
     *
382
     * @return string
383
     */
384
    protected function getOpeningTag($tag)
385
    {
386
        return '/<'.$tag.'[^>]*/i';
387
    }
388
389
    /**
390
     * @param string $tag
391
     *
392
     * @return string
393
     */
394
    protected function getClosingTag($tag)
395
    {
396
        return '</'.$tag.'>';
397
    }
398
399
    /**
400
     * @param string $html
401
     *
402
     * @return string
403
     */
404 16
    protected function purifyHtml($html)
405
    {
406 16
        return $this->purifier->purify($html);
407
    }
408
409 16
    protected function splitInputsToWords()
410
    {
411 16
        $this->setOldWords($this->convertHtmlToListOfWords($this->explode($this->oldText)));
412 16
        $this->setNewWords($this->convertHtmlToListOfWords($this->explode($this->newText)));
413 16
    }
414
415
    /**
416
     * @param array $oldWords
417
     */
418 16
    protected function setOldWords(array $oldWords)
419
    {
420 16
        $this->resetCache = true;
421 16
        $this->oldWords   = $oldWords;
422 16
    }
423
424
    /**
425
     * @param array $newWords
426
     */
427 16
    protected function setNewWords(array $newWords)
428
    {
429 16
        $this->resetCache = true;
430 16
        $this->newWords   = $newWords;
431 16
    }
432
433
    /**
434
     * @param string $text
435
     *
436
     * @return bool
437
     */
438 16
    protected function isPartOfWord($text)
439
    {
440 16
        return $this->ctypeAlphanumUnicode(str_replace($this->config->getSpecialCaseChars(), '', $text));
441
    }
442
443
    /**
444
     * @param array $characterString
445
     *
446
     * @return array
447
     */
448 16
    protected function convertHtmlToListOfWords($characterString)
449
    {
450 16
        $mode = 'character';
451 16
        $current_word = '';
452 16
        $words = array();
453 16
        $keepNewLines = $this->getConfig()->isKeepNewLines();
454 16
        foreach ($characterString as $i => $character) {
455
            switch ($mode) {
456 16
                case 'character':
457 16
                if ($this->isStartOfTag($character)) {
458 14
                    if ($current_word != '') {
459 13
                        $words[] = $current_word;
460
                    }
461
462 14
                    $current_word = '<';
463 14
                    $mode = 'tag';
464 16
                } elseif (preg_match("/\s/u", $character)) {
465 14
                    if ($current_word !== '') {
466 14
                        $words[] = $current_word;
467
                    }
468 14
                    $current_word = $keepNewLines ? $character : preg_replace('/\s+/Su', ' ', $character);
469 14
                    $mode = 'whitespace';
470
                } else {
471
                    if (
472 16
                        (($this->ctypeAlphanumUnicode($character) === true) && ($this->stringUtil->strlen($current_word) === 0 || $this->isPartOfWord($current_word))) ||
0 ignored issues
show
introduced by
Consider adding parentheses for clarity. Current Interpretation: ($this->ctypeAlphanumUni...haracterString[$i + 1]), Probably Intended Meaning: $this->ctypeAlphanumUnic...aracterString[$i + 1]))
Loading history...
473 11
                        (in_array($character, $this->config->getSpecialCaseChars()) && isset($characterString[$i + 1]) && $this->isPartOfWord($characterString[$i + 1]))
474
                    ) {
475 16
                        $current_word .= $character;
476
                    } else {
477 11
                        $words[] = $current_word;
478 11
                        $current_word = $character;
479
                    }
480
                }
481 16
                break;
482 16
                case 'tag' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
483 15
                if ($this->isEndOfTag($character)) {
484 15
                    $current_word .= '>';
485 15
                    $words[] = $current_word;
486 15
                    $current_word = '';
487
488 15
                    if (!preg_match('[^\s]u', $character)) {
489 15
                        $mode = 'whitespace';
490
                    } else {
491
                        $mode = 'character';
492
                    }
493
                } else {
494 15
                    $current_word .= $character;
495
                }
496 15
                break;
497 16
                case 'whitespace':
498 16
                if ($this->isStartOfTag($character)) {
499 13
                    if ($current_word !== '') {
500 13
                        $words[] = $current_word;
501
                    }
502 13
                    $current_word = '<';
503 13
                    $mode = 'tag';
504 16
                } elseif (preg_match("/\s/u", $character)) {
505 11
                    $current_word .= $character;
506 11
                    if (!$keepNewLines) $current_word = preg_replace('/\s+/Su', ' ', $current_word);
0 ignored issues
show
Coding Style Best Practice introduced by
It is generally a best practice to always use braces with control structures.

Adding braces to control structures avoids accidental mistakes as your code changes:

// Without braces (not recommended)
if (true)
    doSomething();

// Recommended
if (true) {
    doSomething();
}
Loading history...
507
                } else {
508 16
                    if ($current_word != '') {
509 13
                        $words[] = $current_word;
510
                    }
511 16
                    $current_word = $character;
512 16
                    $mode = 'character';
513
                }
514 16
                break;
515
                default:
516
                break;
517
            }
518
        }
519 16
        if ($current_word != '') {
520 8
            $words[] = $current_word;
521
        }
522
523 16
        return $words;
524
    }
525
526
    /**
527
     * @param string $val
528
     *
529
     * @return bool
530
     */
531 16
    protected function isStartOfTag($val)
532
    {
533 16
        return $val === '<';
534
    }
535
536
    /**
537
     * @param string $val
538
     *
539
     * @return bool
540
     */
541 15
    protected function isEndOfTag($val)
542
    {
543 15
        return $val === '>';
544
    }
545
546
    /**
547
     * @param string $value
548
     *
549
     * @return bool
550
     */
551
    protected function isWhiteSpace($value)
552
    {
553
        return !preg_match('[^\s]u', $value);
554
    }
555
556
    /**
557
     * @param string $value
558
     *
559
     * @return array
560
     */
561 16
    protected function explode($value)
562
    {
563
        // as suggested by @onassar
564 16
        return preg_split('//u', $value, -1, PREG_SPLIT_NO_EMPTY);
565
    }
566
567
    /**
568
     * @param string $str
569
     *
570
     * @return bool
571
     */
572 16
    protected function ctypeAlphanumUnicode($str)
573
    {
574 16
        return preg_match("/^[a-zA-Z0-9\pL]+$/u", $str) === 1;
575
    }
576
}
577