Passed
Push — master ( 948458...49e3d0 )
by Josh
01:11
created

AbstractDiff::isWhiteSpace()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 1
Bugs 0 Features 0
Metric Value
c 1
b 0
f 0
dl 0
loc 4
ccs 0
cts 2
cp 0
rs 10
cc 1
eloc 2
nc 1
nop 1
crap 2
1
<?php
2
3
namespace Caxy\HtmlDiff;
4
5
/**
6
 * Class AbstractDiff
7
 * @package Caxy\HtmlDiff
8
 */
9
abstract class AbstractDiff
10
{
11
    /**
12
     * @var array
13
     *
14
     * @deprecated since 0.1.0
15
     */
16
    public static $defaultSpecialCaseTags = array('strong', 'b', 'i', 'big', 'small', 'u', 'sub', 'sup', 'strike', 's', 'p');
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 125 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
17
    /**
18
     * @var array
19
     *
20
     * @deprecated since 0.1.0
21
     */
22
    public static $defaultSpecialCaseChars = array('.', ',', '(', ')', '\'');
23
    /**
24
     * @var bool
25
     *
26
     * @deprecated since 0.1.0
27
     */
28
    public static $defaultGroupDiffs = true;
29
30
    /**
31
     * @var HtmlDiffConfig
32
     */
33
    protected $config;
34
35
    /**
36
     * @var string
37
     */
38
    protected $content;
39
    /**
40
     * @var string
41
     */
42
    protected $oldText;
43
    /**
44
     * @var string
45
     */
46
    protected $newText;
47
    /**
48
     * @var array
49
     */
50
    protected $oldWords = array();
51
    /**
52
     * @var array
53
     */
54
    protected $newWords = array();
55
56
    /**
57
     * AbstractDiff constructor.
58
     *
59
     * @param string     $oldText
60
     * @param string     $newText
61
     * @param string     $encoding
62
     * @param null|array $specialCaseTags
63
     * @param null|bool  $groupDiffs
64
     */
65 11
    public function __construct($oldText, $newText, $encoding = 'UTF-8', $specialCaseTags = null, $groupDiffs = null)
66
    {
67 11
        mb_substitute_character(0x20);
68
69 11
        $this->config = HtmlDiffConfig::create()->setEncoding($encoding);
70
71 11
        if ($specialCaseTags !== null) {
72 11
            $this->config->setSpecialCaseTags($specialCaseTags);
73 11
        }
74
75 11
        if ($groupDiffs !== null) {
76
            $this->config->setGroupDiffs($groupDiffs);
77
        }
78
79 11
        $this->oldText = $this->purifyHtml(trim($oldText));
80 11
        $this->newText = $this->purifyHtml(trim($newText));
81 11
        $this->content = '';
82 11
    }
83
84
    /**
85
     * @return HtmlDiffConfig
86
     */
87
    public function getConfig()
88
    {
89
        return $this->config;
90
    }
91
92
    /**
93
     * @param HtmlDiffConfig $config
94
     *
95
     * @return AbstractDiff
96
     */
97 7
    public function setConfig(HtmlDiffConfig $config)
98
    {
99 7
        $this->config = $config;
100
101 7
        return $this;
102
    }
103
104
    /**
105
     * @return int
106
     *
107
     * @deprecated since 0.1.0
108
     */
109
    public function getMatchThreshold()
110
    {
111
        return $this->config->getMatchThreshold();
112
    }
113
114
    /**
115
     * @param int $matchThreshold
116
     *
117
     * @return AbstractDiff
118
     *
119
     * @deprecated since 0.1.0
120
     */
121
    public function setMatchThreshold($matchThreshold)
122
    {
123
        $this->config->setMatchThreshold($matchThreshold);
124
125
        return $this;
126
    }
127
128
    /**
129
     * @param array $chars
130
     *
131
     * @deprecated since 0.1.0
132
     */
133
    public function setSpecialCaseChars(array $chars)
134
    {
135
        $this->config->setSpecialCaseChars($chars);
136
    }
137
138
    /**
139
     * @return array|null
140
     *
141
     * @deprecated since 0.1.0
142
     */
143
    public function getSpecialCaseChars()
144
    {
145
        return $this->config->getSpecialCaseChars();
146
    }
147
148
    /**
149
     * @param string $char
150
     *
151
     * @deprecated since 0.1.0
152
     */
153
    public function addSpecialCaseChar($char)
154
    {
155
        $this->config->addSpecialCaseChar($char);
156
    }
157
158
    /**
159
     * @param string $char
160
     *
161
     * @deprecated since 0.1.0
162
     */
163
    public function removeSpecialCaseChar($char)
164
    {
165
        $this->config->removeSpecialCaseChar($char);
166
    }
167
168
    /**
169
     * @param array $tags
170
     *
171
     * @deprecated since 0.1.0
172
     */
173
    public function setSpecialCaseTags(array $tags = array())
174
    {
175
        $this->config->setSpecialCaseChars($tags);
176
    }
177
178
    /**
179
     * @param string $tag
180
     *
181
     * @deprecated since 0.1.0
182
     */
183
    public function addSpecialCaseTag($tag)
184
    {
185
        $this->config->addSpecialCaseTag($tag);
186
    }
187
188
    /**
189
     * @param string $tag
190
     *
191
     * @deprecated since 0.1.0
192
     */
193
    public function removeSpecialCaseTag($tag)
194
    {
195
        $this->config->removeSpecialCaseTag($tag);
196
    }
197
198
    /**
199
     * @return array|null
200
     *
201
     * @deprecated since 0.1.0
202
     */
203
    public function getSpecialCaseTags()
204
    {
205
        return $this->config->getSpecialCaseTags();
206
    }
207
208
    /**
209
     * @return string
210
     */
211
    public function getOldHtml()
212
    {
213
        return $this->oldText;
214
    }
215
216
    /**
217
     * @return string
218
     */
219
    public function getNewHtml()
220
    {
221
        return $this->newText;
222
    }
223
224
    /**
225
     * @return string
226
     */
227
    public function getDifference()
228
    {
229
        return $this->content;
230
    }
231
232
    /**
233
     * @param bool $boolean
234
     *
235
     * @return $this
236
     *
237
     * @deprecated since 0.1.0
238
     */
239
    public function setGroupDiffs($boolean)
240
    {
241
        $this->config->setGroupDiffs($boolean);
242
243
        return $this;
244
    }
245
246
    /**
247
     * @return bool
248
     *
249
     * @deprecated since 0.1.0
250
     */
251
    public function isGroupDiffs()
252
    {
253
        return $this->config->isGroupDiffs();
254
    }
255
256
    /**
257
     * @param string $tag
258
     *
259
     * @return string
260
     */
261
    protected function getOpeningTag($tag)
262
    {
263
        return "/<".$tag."[^>]*/i";
264
    }
265
266
    /**
267
     * @param string $tag
268
     *
269
     * @return string
270
     */
271
    protected function getClosingTag($tag)
272
    {
273
        return "</".$tag.">";
274
    }
275
276
    /**
277
     * @param string $str
278
     * @param string $start
279
     * @param string $end
280
     *
281
     * @return string
282
     */
283
    protected function getStringBetween($str, $start, $end)
284
    {
285
        $expStr = explode( $start, $str, 2 );
286
        if ( count( $expStr ) > 1 ) {
287
            $expStr = explode( $end, $expStr[ 1 ] );
288
            if ( count( $expStr ) > 1 ) {
289
                array_pop( $expStr );
290
291
                return implode( $end, $expStr );
292
            }
293
        }
294
295
        return '';
296
    }
297
298
    /**
299
     * @param string $html
300
     *
301
     * @return string
302
     */
303 11
    protected function purifyHtml($html)
304
    {
305 11
        if ( class_exists( 'Tidy' ) && false ) {
306
            $config = array( 'output-xhtml'   => true, 'indent' => false );
307
            $tidy = new tidy();
308
            $tidy->parseString( $html, $config, 'utf8' );
309
            $html = (string) $tidy;
310
311
            return $this->getStringBetween( $html, '<body>' );
0 ignored issues
show
Bug introduced by
The call to getStringBetween() misses a required argument $end.

This check looks for function calls that miss required arguments.

Loading history...
312
        }
313
314 11
        return $html;
315
    }
316
317 11
    protected function splitInputsToWords()
318
    {
319 11
        $this->oldWords = $this->convertHtmlToListOfWords( $this->explode( $this->oldText ) );
320 11
        $this->newWords = $this->convertHtmlToListOfWords( $this->explode( $this->newText ) );
321 11
    }
322
323
    /**
324
     * @param string $text
325
     *
326
     * @return bool
327
     */
328 11
    protected function isPartOfWord($text)
329
    {
330 11
        return ctype_alnum(str_replace($this->config->getSpecialCaseChars(), '', $text));
331
    }
332
333
    /**
334
     * @param array $characterString
335
     *
336
     * @return array
337
     */
338 11
    protected function convertHtmlToListOfWords($characterString)
339
    {
340 11
        $mode = 'character';
341 11
        $current_word = '';
342 11
        $words = array();
343 11
        foreach ($characterString as $i => $character) {
344
            switch ($mode) {
345 11
                case 'character':
346 11
                if ( $this->isStartOfTag( $character ) ) {
347 11
                    if ($current_word != '') {
348 10
                        $words[] = $current_word;
349 10
                    }
350 11
                    $current_word = "<";
351 11
                    $mode = 'tag';
352 11
                } elseif (preg_match("/\s/", $character)) {
353 11
                    if ($current_word !== '') {
354 11
                        $words[] = $current_word;
355 11
                    }
356 11
                    $current_word = preg_replace('/\s+/S', ' ', $character);
357 11
                    $mode = 'whitespace';
358 11
                } else {
359
                    if (
360 11
                        (ctype_alnum($character) && (strlen($current_word) == 0 || $this->isPartOfWord($current_word))) ||
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 122 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
361 11
                        (in_array($character, $this->config->getSpecialCaseChars()) && isset($characterString[$i+1]) && $this->isPartOfWord($characterString[$i+1]))
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 164 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
362 11
                    ) {
363 11
                        $current_word .= $character;
364 11
                    } else {
365 11
                        $words[] = $current_word;
366 11
                        $current_word = $character;
367
                    }
368
                }
369 11
                break;
370 11
                case 'tag' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
371 11
                if ( $this->isEndOfTag( $character ) ) {
372 11
                    $current_word .= ">";
373 11
                    $words[] = $current_word;
374 11
                    $current_word = "";
375
376 11
                    if ( !preg_match('[^\s]', $character ) ) {
377 11
                        $mode = 'whitespace';
378 11
                    } else {
379
                        $mode = 'character';
380
                    }
381 11
                } else {
382 11
                    $current_word .= $character;
383
                }
384 11
                break;
385 11
                case 'whitespace':
386 11
                if ( $this->isStartOfTag( $character ) ) {
387 11
                    if ($current_word !== '') {
388 11
                        $words[] = $current_word;
389 11
                    }
390 11
                    $current_word = "<";
391 11
                    $mode = 'tag';
392 11
                } elseif ( preg_match( "/\s/", $character ) ) {
393 10
                    $current_word .= $character;
394 10
                    $current_word = preg_replace('/\s+/S', ' ', $current_word);
395 10
                } else {
396 11
                    if ($current_word != '') {
397 11
                        $words[] = $current_word;
398 11
                    }
399 11
                    $current_word = $character;
400 11
                    $mode = 'character';
401
                }
402 11
                break;
403
                default:
404
                break;
405
            }
406 11
        }
407 11
        if ($current_word != '') {
408
            $words[] = $current_word;
409
        }
410
411 11
        return $words;
412
    }
413
414
    /**
415
     * @param string $val
416
     *
417
     * @return bool
418
     */
419 11
    protected function isStartOfTag($val)
420
    {
421 11
        return $val == "<";
422
    }
423
424
    /**
425
     * @param string $val
426
     *
427
     * @return bool
428
     */
429 11
    protected function isEndOfTag($val)
430
    {
431 11
        return $val == ">";
432
    }
433
434
    /**
435
     * @param string $value
436
     *
437
     * @return bool
438
     */
439
    protected function isWhiteSpace($value)
440
    {
441
        return !preg_match( '[^\s]', $value );
442
    }
443
444
    /**
445
     * @param string $value
446
     *
447
     * @return array
448
     */
449 11
    protected function explode($value)
450
    {
451
        // as suggested by @onassar
452 11
        return preg_split( '//u', $value );
453
    }
454
}
455