Passed
Pull Request — master (#31)
by Josh
03:20
created

AbstractDiff::isPartOfWord()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 1
Bugs 0 Features 0
Metric Value
c 1
b 0
f 0
dl 0
loc 4
ccs 2
cts 2
cp 1
rs 10
cc 1
eloc 2
nc 1
nop 1
crap 1
1
<?php
2
3
namespace Caxy\HtmlDiff;
4
5
/**
6
 * Class AbstractDiff
7
 * @package Caxy\HtmlDiff
8
 */
9
abstract class AbstractDiff
10
{
11
    /**
12
     * @var array
13
     */
14
    public static $defaultSpecialCaseTags = array('strong', 'b', 'i', 'big', 'small', 'u', 'sub', 'sup', 'strike', 's', 'p');
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 125 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
15
    /**
16
     * @var array
17
     */
18
    public static $defaultSpecialCaseChars = array('.', ',', '(', ')', '\'');
19
    /**
20
     * @var bool
21
     */
22
    public static $defaultGroupDiffs = true;
23
24
    /**
25
     * @var string
26
     */
27
    protected $content;
28
    /**
29
     * @var string
30
     */
31
    protected $oldText;
32
    /**
33
     * @var string
34
     */
35
    protected $newText;
36
    /**
37
     * @var array
38
     */
39
    protected $oldWords = array();
40
    /**
41
     * @var array
42
     */
43
    protected $newWords = array();
44
    /**
45
     * @var string
46
     */
47
    protected $encoding;
48
    /**
49
     * @var array
50
     */
51
    protected $specialCaseOpeningTags = array();
52
    /**
53
     * @var array
54
     */
55
    protected $specialCaseClosingTags = array();
56
    /**
57
     * @var array|null
58
     */
59
    protected $specialCaseTags;
60
    /**
61
     * @var array|null
62
     */
63
    protected $specialCaseChars;
64
    /**
65
     * @var bool|null
66
     */
67
    protected $groupDiffs;
68
    /**
69
     * @var int
70
     */
71
    protected $matchThreshold = 80;
72
73
    /**
74
     * AbstractDiff constructor.
75
     *
76
     * @param string     $oldText
77
     * @param string     $newText
78
     * @param string     $encoding
79
     * @param null|array $specialCaseTags
80
     * @param null|bool  $groupDiffs
81
     */
82 11
    public function __construct($oldText, $newText, $encoding = 'UTF-8', $specialCaseTags = null, $groupDiffs = null)
83
    {
84 11
        mb_substitute_character(0x20);
85
86 11
        if ($specialCaseTags === null) {
87
            $specialCaseTags = static::$defaultSpecialCaseTags;
88
        }
89
90 11
        if ($groupDiffs === null) {
91 11
            $groupDiffs = static::$defaultGroupDiffs;
92 11
        }
93
94 11
        $this->oldText = $this->purifyHtml(trim($oldText));
95 11
        $this->newText = $this->purifyHtml(trim($newText));
96 11
        $this->encoding = $encoding;
97 11
        $this->content = '';
98 11
        $this->groupDiffs = $groupDiffs;
99 11
        $this->setSpecialCaseTags($specialCaseTags);
100 11
        $this->setSpecialCaseChars(static::$defaultSpecialCaseChars);
101 11
    }
102
103
    /**
104
     * @return int
105
     */
106
    public function getMatchThreshold()
107
    {
108
        return $this->matchThreshold;
109
    }
110
111
    /**
112
     * @param int $matchThreshold
113
     *
114
     * @return AbstractDiff
115
     */
116 4
    public function setMatchThreshold($matchThreshold)
117
    {
118 4
        $this->matchThreshold = $matchThreshold;
119
120 4
        return $this;
121
    }
122
123
    /**
124
     * @param array $chars
125
     */
126 11
    public function setSpecialCaseChars(array $chars)
127
    {
128 11
        $this->specialCaseChars = $chars;
129 11
    }
130
131
    /**
132
     * @return array|null
133
     */
134
    public function getSpecialCaseChars()
135
    {
136
        return $this->specialCaseChars;
137
    }
138
139
    /**
140
     * @param string $char
141
     */
142
    public function addSpecialCaseChar($char)
143
    {
144
        if (!in_array($char, $this->specialCaseChars)) {
145
            $this->specialCaseChars[] = $char;
146
        }
147
    }
148
149
    /**
150
     * @param string $char
151
     */
152
    public function removeSpecialCaseChar($char)
153
    {
154
        $key = array_search($char, $this->specialCaseChars);
155
        if ($key !== false) {
156
            unset($this->specialCaseChars[$key]);
157
        }
158
    }
159
160
    /**
161
     * @param array $tags
162
     */
163 11
    public function setSpecialCaseTags(array $tags = array())
164
    {
165 11
        $this->specialCaseTags = $tags;
166
167 11
        foreach ($this->specialCaseTags as $tag) {
168
            $this->addSpecialCaseTag($tag);
169 11
        }
170 11
    }
171
172
    /**
173
     * @param string $tag
174
     */
175
    public function addSpecialCaseTag($tag)
176
    {
177
        if (!in_array($tag, $this->specialCaseTags)) {
178
            $this->specialCaseTags[] = $tag;
179
        }
180
181
        $opening = $this->getOpeningTag($tag);
182
        $closing = $this->getClosingTag($tag);
183
184
        if (!in_array($opening, $this->specialCaseOpeningTags)) {
185
            $this->specialCaseOpeningTags[] = $opening;
186
        }
187
        if (!in_array($closing, $this->specialCaseClosingTags)) {
188
            $this->specialCaseClosingTags[] = $closing;
189
        }
190
    }
191
192
    /**
193
     * @param string $tag
194
     */
195
    public function removeSpecialCaseTag($tag)
196
    {
197
        if (($key = array_search($tag, $this->specialCaseTags)) !== false) {
198
            unset($this->specialCaseTags[$key]);
199
200
            $opening = $this->getOpeningTag($tag);
201
            $closing = $this->getClosingTag($tag);
202
203
            if (($key = array_search($opening, $this->specialCaseOpeningTags)) !== false) {
204
                unset($this->specialCaseOpeningTags[$key]);
205
            }
206
            if (($key = array_search($closing, $this->specialCaseClosingTags)) !== false) {
207
                unset($this->specialCaseClosingTags[$key]);
208
            }
209
        }
210
    }
211
212
    /**
213
     * @return array|null
214
     */
215
    public function getSpecialCaseTags()
216
    {
217
        return $this->specialCaseTags;
218
    }
219
220
    /**
221
     * @return string
222
     */
223
    public function getOldHtml()
224
    {
225
        return $this->oldText;
226
    }
227
228
    /**
229
     * @return string
230
     */
231
    public function getNewHtml()
232
    {
233
        return $this->newText;
234
    }
235
236
    /**
237
     * @return string
238
     */
239
    public function getDifference()
240
    {
241
        return $this->content;
242
    }
243
244
    /**
245
     * @param bool $boolean
246
     *
247
     * @return $this
248
     */
249
    public function setGroupDiffs($boolean)
250
    {
251
        $this->groupDiffs = $boolean;
252
253
        return $this;
254
    }
255
256
    /**
257
     * @return bool
0 ignored issues
show
Documentation introduced by
Should the return type not be boolean|null?

This check compares the return type specified in the @return annotation of a function or method doc comment with the types returned by the function and raises an issue if they mismatch.

Loading history...
258
     */
259 11
    public function isGroupDiffs()
260
    {
261 11
        return $this->groupDiffs;
262
    }
263
264
    /**
265
     * @param string $tag
266
     *
267
     * @return string
268
     */
269
    protected function getOpeningTag($tag)
270
    {
271
        return "/<".$tag."[^>]*/i";
272
    }
273
274
    /**
275
     * @param string $tag
276
     *
277
     * @return string
278
     */
279
    protected function getClosingTag($tag)
280
    {
281
        return "</".$tag.">";
282
    }
283
284
    /**
285
     * @param string $str
286
     * @param string $start
287
     * @param string $end
288
     *
289
     * @return string
290
     */
291
    protected function getStringBetween($str, $start, $end)
292
    {
293
        $expStr = explode( $start, $str, 2 );
294
        if ( count( $expStr ) > 1 ) {
295
            $expStr = explode( $end, $expStr[ 1 ] );
296
            if ( count( $expStr ) > 1 ) {
297
                array_pop( $expStr );
298
299
                return implode( $end, $expStr );
300
            }
301
        }
302
303
        return '';
304
    }
305
306
    /**
307
     * @param string $html
308
     *
309
     * @return string
310
     */
311 11
    protected function purifyHtml($html)
312
    {
313 11
        if ( class_exists( 'Tidy' ) && false ) {
314
            $config = array( 'output-xhtml'   => true, 'indent' => false );
315
            $tidy = new tidy();
316
            $tidy->parseString( $html, $config, 'utf8' );
317
            $html = (string) $tidy;
318
319
            return $this->getStringBetween( $html, '<body>' );
0 ignored issues
show
Bug introduced by
The call to getStringBetween() misses a required argument $end.

This check looks for function calls that miss required arguments.

Loading history...
320
        }
321
322 11
        return $html;
323
    }
324
325 11
    protected function splitInputsToWords()
326
    {
327 11
        $this->oldWords = $this->convertHtmlToListOfWords( $this->explode( $this->oldText ) );
328 11
        $this->newWords = $this->convertHtmlToListOfWords( $this->explode( $this->newText ) );
329 11
    }
330
331
    /**
332
     * @param string $text
333
     *
334
     * @return bool
335
     */
336 11
    protected function isPartOfWord($text)
337
    {
338 11
        return ctype_alnum(str_replace($this->specialCaseChars, '', $text));
339
    }
340
341
    /**
342
     * @param array $characterString
343
     *
344
     * @return array
345
     */
346 11
    protected function convertHtmlToListOfWords($characterString)
347
    {
348 11
        $mode = 'character';
349 11
        $current_word = '';
350 11
        $words = array();
351 11
        foreach ($characterString as $i => $character) {
352
            switch ($mode) {
353 11
                case 'character':
354 11
                if ( $this->isStartOfTag( $character ) ) {
355 11
                    if ($current_word != '') {
356 10
                        $words[] = $current_word;
357 10
                    }
358 11
                    $current_word = "<";
359 11
                    $mode = 'tag';
360 11
                } elseif (preg_match("/\s/", $character)) {
361 11
                    if ($current_word !== '') {
362 11
                        $words[] = $current_word;
363 11
                    }
364 11
                    $current_word = preg_replace('/\s+/S', ' ', $character);
365 11
                    $mode = 'whitespace';
366 11
                } else {
367
                    if (
368 11
                        (ctype_alnum($character) && (strlen($current_word) == 0 || $this->isPartOfWord($current_word))) ||
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 122 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
369 11
                        (in_array($character, $this->specialCaseChars) && isset($characterString[$i+1]) && $this->isPartOfWord($characterString[$i+1]))
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 151 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
370 11
                    ) {
371 11
                        $current_word .= $character;
372 11
                    } else {
373 11
                        $words[] = $current_word;
374 11
                        $current_word = $character;
375
                    }
376
                }
377 11
                break;
378 11
                case 'tag' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
379 11
                if ( $this->isEndOfTag( $character ) ) {
380 11
                    $current_word .= ">";
381 11
                    $words[] = $current_word;
382 11
                    $current_word = "";
383
384 11
                    if ( !preg_match('[^\s]', $character ) ) {
385 11
                        $mode = 'whitespace';
386 11
                    } else {
387
                        $mode = 'character';
388
                    }
389 11
                } else {
390 11
                    $current_word .= $character;
391
                }
392 11
                break;
393 11
                case 'whitespace':
394 11
                if ( $this->isStartOfTag( $character ) ) {
395 11
                    if ($current_word !== '') {
396 11
                        $words[] = $current_word;
397 11
                    }
398 11
                    $current_word = "<";
399 11
                    $mode = 'tag';
400 11
                } elseif ( preg_match( "/\s/", $character ) ) {
401 10
                    $current_word .= $character;
402 10
                    $current_word = preg_replace('/\s+/S', ' ', $current_word);
403 10
                } else {
404 11
                    if ($current_word != '') {
405 11
                        $words[] = $current_word;
406 11
                    }
407 11
                    $current_word = $character;
408 11
                    $mode = 'character';
409
                }
410 11
                break;
411
                default:
412
                break;
413
            }
414 11
        }
415 11
        if ($current_word != '') {
416
            $words[] = $current_word;
417
        }
418
419 11
        return $words;
420
    }
421
422
    /**
423
     * @param string $val
424
     *
425
     * @return bool
426
     */
427 11
    protected function isStartOfTag($val)
428
    {
429 11
        return $val == "<";
430
    }
431
432
    /**
433
     * @param string $val
434
     *
435
     * @return bool
436
     */
437 11
    protected function isEndOfTag($val)
438
    {
439 11
        return $val == ">";
440
    }
441
442
    /**
443
     * @param string $value
444
     *
445
     * @return bool
446
     */
447
    protected function isWhiteSpace($value)
448
    {
449
        return !preg_match( '[^\s]', $value );
450
    }
451
452
    /**
453
     * @param string $value
454
     *
455
     * @return array
456
     */
457 11
    protected function explode($value)
458
    {
459
        // as suggested by @onassar
460 11
        return preg_split( '//u', $value );
461
    }
462
}
463