Passed
Pull Request — master (#31)
by Josh
03:43
created

AbstractDiff::getOpeningTag()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 1
Bugs 0 Features 0
Metric Value
c 1
b 0
f 0
dl 0
loc 4
ccs 0
cts 2
cp 0
rs 10
cc 1
eloc 2
nc 1
nop 1
crap 2
1
<?php
2
3
namespace Caxy\HtmlDiff;
4
5
abstract class AbstractDiff
6
{
7
    const STRATEGY_MATCHING = 'matching';
8
    const STRATEGY_RELATIVE = 'relative';
9
10
    public static $defaultSpecialCaseTags = array('strong', 'b', 'i', 'big', 'small', 'u', 'sub', 'sup', 'strike', 's', 'p');
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 125 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
11
    public static $defaultSpecialCaseChars = array('.', ',', '(', ')', '\'');
12
    public static $defaultGroupDiffs = true;
13
14
    protected $content;
15
    protected $oldText;
16
    protected $newText;
17
    protected $oldWords = array();
18
    protected $newWords = array();
19
    protected $encoding;
20
    protected $specialCaseOpeningTags = array();
21
    protected $specialCaseClosingTags = array();
22
    protected $specialCaseTags;
23
    protected $specialCaseChars;
24
    protected $groupDiffs;
25
    protected $matchThreshold = 80;
26
    protected $debug = false;
27
28
    protected $strategy = self::STRATEGY_MATCHING;
29
30 11
    public function __construct($oldText, $newText, $encoding = 'UTF-8', $specialCaseTags = null, $groupDiffs = null)
31
    {
32 11
        if ($specialCaseTags === null) {
33
            $specialCaseTags = static::$defaultSpecialCaseTags;
34
        }
35
36 11
        if ($groupDiffs === null) {
37 11
            $groupDiffs = static::$defaultGroupDiffs;
38 11
        }
39
40 11
        $this->oldText = $this->purifyHtml(trim($oldText));
41 11
        $this->newText = $this->purifyHtml(trim($newText));
42 11
        $this->encoding = $encoding;
43 11
        $this->content = '';
44 11
        $this->groupDiffs = $groupDiffs;
45 11
        $this->setSpecialCaseTags($specialCaseTags);
46 11
        $this->setSpecialCaseChars(static::$defaultSpecialCaseChars);
47 11
    }
48
49
    public function setStrategy($strategy)
50
    {
51
        $this->strategy = $strategy;
52
53
        return $this;
54
    }
55
56
    public function getStrategy()
0 ignored issues
show
Documentation introduced by
The return type could not be reliably inferred; please add a @return annotation.

Our type inference engine in quite powerful, but sometimes the code does not provide enough clues to go by. In these cases we request you to add a @return annotation as described here.

Loading history...
57
    {
58
        return $this->strategy;
59
    }
60
61
    public function setDebug($debug)
62
    {
63
        $this->debug = $debug;
64
65
        return $this;
66
    }
67
68
    public function getDebug()
69
    {
70
        return $this->debug;
71
    }
72
73
    /**
74
     * @return int
75
     */
76
    public function getMatchThreshold()
77
    {
78
        return $this->matchThreshold;
79
    }
80
81
    /**
82
     * @param int $matchThreshold
83
     *
84
     * @return AbstractDiff
85
     */
86 4
    public function setMatchThreshold($matchThreshold)
87
    {
88 4
        $this->matchThreshold = $matchThreshold;
89
90 4
        return $this;
91
    }
92
93
94
95 11
    public function setSpecialCaseChars(array $chars)
96
    {
97 11
        $this->specialCaseChars = $chars;
98 11
    }
99
100
    public function getSpecialCaseChars()
101
    {
102
        return $this->specialCaseChars;
103
    }
104
105
    public function addSpecialCaseChar($char)
106
    {
107
        if (!in_array($char, $this->specialCaseChars)) {
108
            $this->specialCaseChars[] = $char;
109
        }
110
    }
111
112
    public function removeSpecialCaseChar($char)
113
    {
114
        $key = array_search($char, $this->specialCaseChars);
115
        if ($key !== false) {
116
            unset($this->specialCaseChars[$key]);
117
        }
118
    }
119
120 11
    public function setSpecialCaseTags(array $tags = array())
121
    {
122 11
        $this->specialCaseTags = $tags;
123
124 11
        foreach ($this->specialCaseTags as $tag) {
125
            $this->addSpecialCaseTag($tag);
126 11
        }
127 11
    }
128
129
    public function addSpecialCaseTag($tag)
130
    {
131
        if (!in_array($tag, $this->specialCaseTags)) {
132
            $this->specialCaseTags[] = $tag;
133
        }
134
135
        $opening = $this->getOpeningTag($tag);
136
        $closing = $this->getClosingTag($tag);
137
138
        if (!in_array($opening, $this->specialCaseOpeningTags)) {
139
            $this->specialCaseOpeningTags[] = $opening;
140
        }
141
        if (!in_array($closing, $this->specialCaseClosingTags)) {
142
            $this->specialCaseClosingTags[] = $closing;
143
        }
144
    }
145
146
    public function removeSpecialCaseTag($tag)
147
    {
148
        if (($key = array_search($tag, $this->specialCaseTags)) !== false) {
149
            unset($this->specialCaseTags[$key]);
150
151
            $opening = $this->getOpeningTag($tag);
152
            $closing = $this->getClosingTag($tag);
153
154
            if (($key = array_search($opening, $this->specialCaseOpeningTags)) !== false) {
155
                unset($this->specialCaseOpeningTags[$key]);
156
            }
157
            if (($key = array_search($closing, $this->specialCaseClosingTags)) !== false) {
158
                unset($this->specialCaseClosingTags[$key]);
159
            }
160
        }
161
    }
162
163
    public function getSpecialCaseTags()
164
    {
165
        return $this->specialCaseTags;
166
    }
167
168
    public function getOldHtml()
0 ignored issues
show
Documentation introduced by
The return type could not be reliably inferred; please add a @return annotation.

Our type inference engine in quite powerful, but sometimes the code does not provide enough clues to go by. In these cases we request you to add a @return annotation as described here.

Loading history...
169
    {
170
        return $this->oldText;
171
    }
172
173
    public function getNewHtml()
0 ignored issues
show
Documentation introduced by
The return type could not be reliably inferred; please add a @return annotation.

Our type inference engine in quite powerful, but sometimes the code does not provide enough clues to go by. In these cases we request you to add a @return annotation as described here.

Loading history...
174
    {
175
        return $this->newText;
176
    }
177
178
    public function getDifference()
179
    {
180
        return $this->content;
181
    }
182
183
    public function setGroupDiffs($boolean)
184
    {
185
        $this->groupDiffs = $boolean;
186
    }
187
188 11
    public function isGroupDiffs()
0 ignored issues
show
Documentation introduced by
The return type could not be reliably inferred; please add a @return annotation.

Our type inference engine in quite powerful, but sometimes the code does not provide enough clues to go by. In these cases we request you to add a @return annotation as described here.

Loading history...
189
    {
190 11
        return $this->groupDiffs;
191
    }
192
193
    protected function getOpeningTag($tag)
194
    {
195
        return "/<".$tag."[^>]*/i";
196
    }
197
198
    protected function getClosingTag($tag)
199
    {
200
        return "</".$tag.">";
201
    }
202
203
    protected function getStringBetween($str, $start, $end)
204
    {
205
        $expStr = explode( $start, $str, 2 );
206
        if ( count( $expStr ) > 1 ) {
207
            $expStr = explode( $end, $expStr[ 1 ] );
208
            if ( count( $expStr ) > 1 ) {
209
                array_pop( $expStr );
210
211
                return implode( $end, $expStr );
212
            }
213
        }
214
215
        return '';
216
    }
217
218 11
    protected function purifyHtml($html, $tags = null)
0 ignored issues
show
Documentation introduced by
The return type could not be reliably inferred; please add a @return annotation.

Our type inference engine in quite powerful, but sometimes the code does not provide enough clues to go by. In these cases we request you to add a @return annotation as described here.

Loading history...
Unused Code introduced by
The parameter $tags is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
219
    {
220 11
        if ( class_exists( 'Tidy' ) && false ) {
221
            $config = array( 'output-xhtml'   => true, 'indent' => false );
222
            $tidy = new tidy();
223
            $tidy->parseString( $html, $config, 'utf8' );
224
            $html = (string) $tidy;
225
226
            return $this->getStringBetween( $html, '<body>' );
0 ignored issues
show
Bug introduced by
The call to getStringBetween() misses a required argument $end.

This check looks for function calls that miss required arguments.

Loading history...
227
        }
228
229 11
        return $html;
230
    }
231
232 11
    protected function splitInputsToWords()
233
    {
234 11
        $this->oldWords = $this->convertHtmlToListOfWords( $this->explode( $this->oldText ) );
235 11
        $this->newWords = $this->convertHtmlToListOfWords( $this->explode( $this->newText ) );
236 11
    }
237
238 11
    protected function isPartOfWord($text)
239
    {
240 11
        return ctype_alnum(str_replace($this->specialCaseChars, '', $text));
241
    }
242
243 11
    protected function convertHtmlToListOfWords($characterString)
244
    {
245 11
        $mode = 'character';
246 11
        $current_word = '';
247 11
        $words = array();
248 11
        foreach ($characterString as $i => $character) {
249
            switch ($mode) {
250 11
                case 'character':
251 11
                if ( $this->isStartOfTag( $character ) ) {
252 11
                    if ($current_word != '') {
253 10
                        $words[] = $current_word;
254 10
                    }
255 11
                    $current_word = "<";
256 11
                    $mode = 'tag';
257 11
                } elseif (preg_match("/\s/", $character)) {
258 11
                    if ($current_word !== '') {
259 11
                        $words[] = $current_word;
260 11
                    }
261 11
                    $current_word = preg_replace('/\s+/S', ' ', $character);
262 11
                    $mode = 'whitespace';
263 11
                } else {
264
                    if (
265 11
                        (ctype_alnum($character) && (strlen($current_word) == 0 || $this->isPartOfWord($current_word))) ||
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 122 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
266 11
                        (in_array($character, $this->specialCaseChars) && isset($characterString[$i+1]) && $this->isPartOfWord($characterString[$i+1]))
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 151 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
267 11
                    ) {
268 11
                        $current_word .= $character;
269 11
                    } else {
270 11
                        $words[] = $current_word;
271 11
                        $current_word = $character;
272
                    }
273
                }
274 11
                break;
275 11
                case 'tag' :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
276 11
                if ( $this->isEndOfTag( $character ) ) {
277 11
                    $current_word .= ">";
278 11
                    $words[] = $current_word;
279 11
                    $current_word = "";
280
281 11
                    if ( !preg_match('[^\s]', $character ) ) {
282 11
                        $mode = 'whitespace';
283 11
                    } else {
284
                        $mode = 'character';
285
                    }
286 11
                } else {
287 11
                    $current_word .= $character;
288
                }
289 11
                break;
290 11
                case 'whitespace':
291 11
                if ( $this->isStartOfTag( $character ) ) {
292 11
                    if ($current_word !== '') {
293 11
                        $words[] = $current_word;
294 11
                    }
295 11
                    $current_word = "<";
296 11
                    $mode = 'tag';
297 11
                } elseif ( preg_match( "/\s/", $character ) ) {
298 10
                    $current_word .= $character;
299 10
                    $current_word = preg_replace('/\s+/S', ' ', $current_word);
300 10
                } else {
301 11
                    if ($current_word != '') {
302 11
                        $words[] = $current_word;
303 11
                    }
304 11
                    $current_word = $character;
305 11
                    $mode = 'character';
306
                }
307 11
                break;
308
                default:
309
                break;
310
            }
311 11
        }
312 11
        if ($current_word != '') {
313
            $words[] = $current_word;
314
        }
315
316 11
        return $words;
317
    }
318
319 11
    protected function isStartOfTag($val)
320
    {
321 11
        return $val == "<";
322
    }
323
324 11
    protected function isEndOfTag($val)
325
    {
326 11
        return $val == ">";
327
    }
328
329
    protected function isWhiteSpace($value)
330
    {
331
        return !preg_match( '[^\s]', $value );
332
    }
333
334 11
    protected function explode($value)
335
    {
336
        // as suggested by @onassar
337 11
        return preg_split( '//u', $value );
338
    }
339
}
340