Passed
Push — master ( 6f39bc...2ba271 )
by Josh
02:32
created

lib/Caxy/HtmlDiff/AbstractDiff.php (1 issue)

1
<?php
2
3
namespace Caxy\HtmlDiff;
4
5
/**
6
 * Class AbstractDiff.
7
 */
8
abstract class AbstractDiff
9
{
10
    /**
11
     * @var array
12
     *
13
     * @deprecated since 0.1.0
14
     */
15
    public static $defaultSpecialCaseTags = array('strong', 'b', 'i', 'big', 'small', 'u', 'sub', 'sup', 'strike', 's', 'p');
16
17
    /**
18
     * @var array
19
     *
20
     * @deprecated since 0.1.0
21
     */
22
    public static $defaultSpecialCaseChars = array('.', ',', '(', ')', '\'');
23
24
    /**
25
     * @var bool
26
     *
27
     * @deprecated since 0.1.0
28
     */
29
    public static $defaultGroupDiffs = true;
30
31
    /**
32
     * @var HtmlDiffConfig
33
     */
34
    protected $config;
35
36
    /**
37
     * @var string
38
     */
39
    protected $content;
40
41
    /**
42
     * @var string
43
     */
44
    protected $oldText;
45
46
    /**
47
     * @var string
48
     */
49
    protected $newText;
50
51
    /**
52
     * @var array
53
     */
54
    protected $oldWords = array();
55
56
    /**
57
     * @var array
58
     */
59
    protected $newWords = array();
60
61
    /**
62
     * @var DiffCache[]
63
     */
64
    protected $diffCaches = array();
65
66
    /**
67
     * @var \HTMLPurifier
68
     */
69
    protected $purifier;
70
71
    /**
72
     * @var \HTMLPurifier_Config|null
73
     */
74
    protected $purifierConfig = null;
75
76
    /**
77
     * @see array_slice_cached();
78
     * @var bool
79
     */
80
    protected $resetCache = false;
81
82
    /**
83
     * AbstractDiff constructor.
84
     *
85
     * @param string     $oldText
86
     * @param string     $newText
87
     * @param string     $encoding
88
     * @param null|array $specialCaseTags
89
     * @param null|bool  $groupDiffs
90
     */
91 15
    public function __construct($oldText, $newText, $encoding = 'UTF-8', $specialCaseTags = null, $groupDiffs = null)
92
    {
93 15
        mb_substitute_character(0x20);
94
95 15
        $this->setConfig(HtmlDiffConfig::create()->setEncoding($encoding));
96
97 15
        if ($specialCaseTags !== null) {
98 14
            $this->config->setSpecialCaseTags($specialCaseTags);
99
        }
100
101 15
        if ($groupDiffs !== null) {
102
            $this->config->setGroupDiffs($groupDiffs);
103
        }
104
105 15
        $this->oldText = $oldText;
106 15
        $this->newText = $newText;
107 15
        $this->content = '';
108 15
    }
109
110
    /**
111
     * @return bool|string
112
     */
113
    abstract public function build();
114
115
    /**
116
     * Initializes HTMLPurifier with cache location.
117
     *
118
     * @param null|string $defaultPurifierSerializerCache
119
     */
120 15
    public function initPurifier($defaultPurifierSerializerCache = null)
121
    {
122 15
        if (null !== $this->purifierConfig) {
123 2
            $HTMLPurifierConfig  = $this->purifierConfig;
124
        } else {
125 15
            $HTMLPurifierConfig = \HTMLPurifier_Config::createDefault();
126
        }
127
128
        // Cache.SerializerPath defaults to Null and sets
129
        // the location to inside the vendor HTMLPurifier library
130
        // under the DefinitionCache/Serializer folder.
131 15
        if (!is_null($defaultPurifierSerializerCache)) {
132 2
            $HTMLPurifierConfig->set('Cache.SerializerPath', $defaultPurifierSerializerCache);
133
        }
134
135
        // Cache.SerializerPermissions defaults to 0744.
136
        // This setting allows the cache files to be deleted by any user, as they are typically
137
        // created by the web/php user (www-user, php-fpm, etc.)
138 15
        $HTMLPurifierConfig->set('Cache.SerializerPermissions', 0777);
139
140 15
        $this->purifier = new \HTMLPurifier($HTMLPurifierConfig);
141 15
    }
142
143
    /**
144
     * Prepare (purify) the HTML
145
     *
146
     * @return void
147
     */
148 15
    protected function prepare()
149
    {
150 15
        $this->initPurifier($this->config->getPurifierCacheLocation());
151
152 15
        $this->oldText = $this->purifyHtml($this->oldText);
153 15
        $this->newText = $this->purifyHtml($this->newText);
154 15
    }
155
156
    /**
157
     * @return DiffCache|null
158
     */
159
    protected function getDiffCache()
160
    {
161
        if (!$this->hasDiffCache()) {
162
            return null;
163
        }
164
165
        $hash = spl_object_hash($this->getConfig()->getCacheProvider());
166
167
        if (!array_key_exists($hash, $this->diffCaches)) {
168
            $this->diffCaches[$hash] = new DiffCache($this->getConfig()->getCacheProvider());
169
        }
170
171
        return $this->diffCaches[$hash];
172
    }
173
174
    /**
175
     * @return bool
176
     */
177 15
    protected function hasDiffCache()
178
    {
179 15
        return null !== $this->getConfig()->getCacheProvider();
180
    }
181
182
    /**
183
     * @return HtmlDiffConfig
184
     */
185 15
    public function getConfig()
186
    {
187 15
        return $this->config;
188
    }
189
190
    /**
191
     * @param HtmlDiffConfig $config
192
     *
193
     * @return AbstractDiff
194
     */
195 15
    public function setConfig(HtmlDiffConfig $config)
196
    {
197 15
        $this->config = $config;
198
199 15
        return $this;
200
    }
201
202
    /**
203
     * @return int
204
     *
205
     * @deprecated since 0.1.0
206
     */
207
    public function getMatchThreshold()
208
    {
209
        return $this->config->getMatchThreshold();
210
    }
211
212
    /**
213
     * @param int $matchThreshold
214
     *
215
     * @return AbstractDiff
216
     *
217
     * @deprecated since 0.1.0
218
     */
219
    public function setMatchThreshold($matchThreshold)
220
    {
221
        $this->config->setMatchThreshold($matchThreshold);
222
223
        return $this;
224
    }
225
226
    /**
227
     * @param array $chars
228
     *
229
     * @deprecated since 0.1.0
230
     */
231
    public function setSpecialCaseChars(array $chars)
232
    {
233
        $this->config->setSpecialCaseChars($chars);
234
    }
235
236
    /**
237
     * @return array|null
238
     *
239
     * @deprecated since 0.1.0
240
     */
241
    public function getSpecialCaseChars()
242
    {
243
        return $this->config->getSpecialCaseChars();
244
    }
245
246
    /**
247
     * @param string $char
248
     *
249
     * @deprecated since 0.1.0
250
     */
251
    public function addSpecialCaseChar($char)
252
    {
253
        $this->config->addSpecialCaseChar($char);
254
    }
255
256
    /**
257
     * @param string $char
258
     *
259
     * @deprecated since 0.1.0
260
     */
261
    public function removeSpecialCaseChar($char)
262
    {
263
        $this->config->removeSpecialCaseChar($char);
264
    }
265
266
    /**
267
     * @param array $tags
268
     *
269
     * @deprecated since 0.1.0
270
     */
271
    public function setSpecialCaseTags(array $tags = array())
272
    {
273
        $this->config->setSpecialCaseChars($tags);
274
    }
275
276
    /**
277
     * @param string $tag
278
     *
279
     * @deprecated since 0.1.0
280
     */
281
    public function addSpecialCaseTag($tag)
282
    {
283
        $this->config->addSpecialCaseTag($tag);
284
    }
285
286
    /**
287
     * @param string $tag
288
     *
289
     * @deprecated since 0.1.0
290
     */
291
    public function removeSpecialCaseTag($tag)
292
    {
293
        $this->config->removeSpecialCaseTag($tag);
294
    }
295
296
    /**
297
     * @return array|null
298
     *
299
     * @deprecated since 0.1.0
300
     */
301
    public function getSpecialCaseTags()
302
    {
303
        return $this->config->getSpecialCaseTags();
304
    }
305
306
    /**
307
     * @return string
308
     */
309
    public function getOldHtml()
310
    {
311
        return $this->oldText;
312
    }
313
314
    /**
315
     * @return string
316
     */
317
    public function getNewHtml()
318
    {
319
        return $this->newText;
320
    }
321
322
    /**
323
     * @return string
324
     */
325
    public function getDifference()
326
    {
327
        return $this->content;
328
    }
329
330
    /**
331
     * Clears the diff content.
332
     *
333
     * @return void
334
     */
335
    public function clearContent()
336
    {
337
        $this->content = null;
338
    }
339
340
    /**
341
     * @param bool $boolean
342
     *
343
     * @return $this
344
     *
345
     * @deprecated since 0.1.0
346
     */
347
    public function setGroupDiffs($boolean)
348
    {
349
        $this->config->setGroupDiffs($boolean);
350
351
        return $this;
352
    }
353
354
    /**
355
     * @return bool
356
     *
357
     * @deprecated since 0.1.0
358
     */
359 15
    public function isGroupDiffs()
360
    {
361 15
        return $this->config->isGroupDiffs();
362
    }
363
364
    /**
365
     * @param \HTMLPurifier_Config $config
366
     */
367 2
    public function setHTMLPurifierConfig(\HTMLPurifier_Config $config)
368
    {
369 2
        $this->purifierConfig = $config;
370 2
    }
371
372
    /**
373
     * @param string $tag
374
     *
375
     * @return string
376
     */
377
    protected function getOpeningTag($tag)
378
    {
379
        return '/<'.$tag.'[^>]*/i';
380
    }
381
382
    /**
383
     * @param string $tag
384
     *
385
     * @return string
386
     */
387
    protected function getClosingTag($tag)
388
    {
389
        return '</'.$tag.'>';
390
    }
391
392
    /**
393
     * @param string $str
394
     * @param string $start
395
     * @param string $end
396
     *
397
     * @return string
398
     */
399
    protected function getStringBetween($str, $start, $end)
400
    {
401
        $expStr = mb_split($start, $str, 2);
402
        if (count($expStr) > 1) {
403
            $expStr = mb_split($end, $expStr[ 1 ]);
404
            if (count($expStr) > 1) {
405
                array_pop($expStr);
406
407
                return implode($end, $expStr);
408
            }
409
        }
410
411
        return '';
412
    }
413
414
    /**
415
     * @param string $html
416
     *
417
     * @return string
418
     */
419 15
    protected function purifyHtml($html)
420
    {
421 15
        if (class_exists('Tidy') && false) {
422
            $config = array('output-xhtml' => true, 'indent' => false);
423
            $tidy = new tidy();
424
            $tidy->parseString($html, $config, 'utf8');
425
            $html = (string) $tidy;
426
427
            return $this->getStringBetween($html, '<body>');
428
        }
429
430 15
        return $this->purifier->purify($html);
431
    }
432
433 15
    protected function splitInputsToWords()
434
    {
435 15
        $this->setOldWords($this->convertHtmlToListOfWords($this->explode($this->oldText)));
436 15
        $this->setNewWords($this->convertHtmlToListOfWords($this->explode($this->newText)));
437 15
    }
438
439
    /**
440
     * @param array $oldWords
441
     */
442 15
    protected function setOldWords(array $oldWords)
443
    {
444 15
        $this->resetCache = true;
445 15
        $this->oldWords   = $oldWords;
446 15
    }
447
448
    /**
449
     * @param array $newWords
450
     */
451 15
    protected function setNewWords(array $newWords)
452
    {
453 15
        $this->resetCache = true;
454 15
        $this->newWords   = $newWords;
455 15
    }
456
457
    /**
458
     * @param string $text
459
     *
460
     * @return bool
461
     */
462 15
    protected function isPartOfWord($text)
463
    {
464 15
        return $this->ctypeAlphanumUnicode(str_replace($this->config->getSpecialCaseChars(), '', $text));
465
    }
466
467
    /**
468
     * @param array $characterString
469
     *
470
     * @return array
471
     */
472 15
    protected function convertHtmlToListOfWords($characterString)
473
    {
474 15
        $mode = 'character';
475 15
        $current_word = '';
476 15
        $words = array();
477 15
        $keepNewLines = $this->getConfig()->isKeepNewLines();
478 15
        foreach ($characterString as $i => $character) {
479
            switch ($mode) {
480 15
                case 'character':
481 15
                if ($this->isStartOfTag($character)) {
482 14
                    if ($current_word != '') {
483 13
                        $words[] = $current_word;
484
                    }
485
486 14
                    $current_word = '<';
487 14
                    $mode = 'tag';
488 15
                } elseif (preg_match("/\s/u", $character)) {
489 13
                    if ($current_word !== '') {
490 13
                        $words[] = $current_word;
491
                    }
492 13
                    $current_word = $keepNewLines ? $character : preg_replace('/\s+/Su', ' ', $character);
493 13
                    $mode = 'whitespace';
494
                } else {
495
                    if (
496 15
                        (($this->ctypeAlphanumUnicode($character)) && (mb_strlen($current_word) == 0 || $this->isPartOfWord($current_word))) ||
0 ignored issues
show
Consider adding parentheses for clarity. Current Interpretation: ($this->ctypeAlphanumUni...haracterString[$i + 1]), Probably Intended Meaning: $this->ctypeAlphanumUnic...aracterString[$i + 1]))
Loading history...
497 11
                        (in_array($character, $this->config->getSpecialCaseChars()) && isset($characterString[$i + 1]) && $this->isPartOfWord($characterString[$i + 1]))
498
                    ) {
499 15
                        $current_word .= $character;
500
                    } else {
501 11
                        $words[] = $current_word;
502 11
                        $current_word = $character;
503
                    }
504
                }
505 15
                break;
506 15
                case 'tag' :
507 15
                if ($this->isEndOfTag($character)) {
508 15
                    $current_word .= '>';
509 15
                    $words[] = $current_word;
510 15
                    $current_word = '';
511
512 15
                    if (!preg_match('[^\s]u', $character)) {
513 15
                        $mode = 'whitespace';
514
                    } else {
515
                        $mode = 'character';
516
                    }
517
                } else {
518 15
                    $current_word .= $character;
519
                }
520 15
                break;
521 15
                case 'whitespace':
522 15
                if ($this->isStartOfTag($character)) {
523 13
                    if ($current_word !== '') {
524 13
                        $words[] = $current_word;
525
                    }
526 13
                    $current_word = '<';
527 13
                    $mode = 'tag';
528 15
                } elseif (preg_match("/\s/u", $character)) {
529 11
                    $current_word .= $character;
530 11
                    if (!$keepNewLines) $current_word = preg_replace('/\s+/Su', ' ', $current_word);
531
                } else {
532 15
                    if ($current_word != '') {
533 12
                        $words[] = $current_word;
534
                    }
535 15
                    $current_word = $character;
536 15
                    $mode = 'character';
537
                }
538 15
                break;
539
                default:
540
                break;
541
            }
542
        }
543 15
        if ($current_word != '') {
544 7
            $words[] = $current_word;
545
        }
546
547 15
        return $words;
548
    }
549
550
    /**
551
     * @param string $val
552
     *
553
     * @return bool
554
     */
555 15
    protected function isStartOfTag($val)
556
    {
557 15
        return $val == '<';
558
    }
559
560
    /**
561
     * @param string $val
562
     *
563
     * @return bool
564
     */
565 15
    protected function isEndOfTag($val)
566
    {
567 15
        return $val == '>';
568
    }
569
570
    /**
571
     * @param string $value
572
     *
573
     * @return bool
574
     */
575
    protected function isWhiteSpace($value)
576
    {
577
        return !preg_match('[^\s]u', $value);
578
    }
579
580
    /**
581
     * @param string $value
582
     *
583
     * @return array
584
     */
585 15
    protected function explode($value)
586
    {
587
        // as suggested by @onassar
588 15
        return preg_split('//u', $value, -1, PREG_SPLIT_NO_EMPTY);
589
    }
590
591
    /**
592
     * @param string $str
593
     *
594
     * @return bool
595
     */
596 15
    protected function ctypeAlphanumUnicode($str)
597
    {
598 15
        return preg_match("/^[a-zA-Z0-9\pL]+$/u", $str);
599
    }
600
}
601