1
|
|
|
<?php |
|
|
|
|
2
|
|
|
|
3
|
|
|
namespace SilverStripe\View\Parsers; |
4
|
|
|
|
5
|
|
|
use InvalidArgumentException; |
6
|
|
|
use SilverStripe\Core\Convert; |
7
|
|
|
use SilverStripe\Core\Injector\Injector; |
8
|
|
|
|
9
|
|
|
require_once 'difflib/difflib.php'; |
10
|
|
|
|
11
|
|
|
/** |
12
|
|
|
* Class representing a 'diff' between two sequences of strings. |
13
|
|
|
*/ |
14
|
|
|
class Diff extends \Diff |
|
|
|
|
15
|
|
|
{ |
16
|
|
|
public static $html_cleaner_class = null; |
17
|
|
|
|
18
|
|
|
/** |
19
|
|
|
* Attempt to clean invalid HTML, which messes up diffs. |
20
|
|
|
* This cleans code if possible, using an instance of HTMLCleaner |
21
|
|
|
* |
22
|
|
|
* NB: By default, only extremely simple tidying is performed, |
23
|
|
|
* by passing through DomDocument::loadHTML and saveXML |
24
|
|
|
* |
25
|
|
|
* @param string $content HTML content |
26
|
|
|
* @param HTMLCleaner $cleaner Optional instance of a HTMLCleaner class to |
27
|
|
|
* use, overriding self::$html_cleaner_class |
28
|
|
|
* @return mixed|string |
29
|
|
|
*/ |
30
|
|
|
public static function cleanHTML($content, $cleaner = null) |
31
|
|
|
{ |
32
|
|
|
if (!$cleaner) { |
33
|
|
|
if (self::$html_cleaner_class && class_exists(self::$html_cleaner_class)) { |
34
|
|
|
$cleaner = Injector::inst()->create(self::$html_cleaner_class); |
35
|
|
|
} else { |
36
|
|
|
//load cleaner if the dependent class is available |
37
|
|
|
$cleaner = HTMLCleaner::inst(); |
38
|
|
|
} |
39
|
|
|
} |
40
|
|
|
|
41
|
|
|
if ($cleaner) { |
42
|
|
|
$content = $cleaner->cleanHTML($content); |
43
|
|
|
} else { |
44
|
|
|
// At most basic level of cleaning, use DOMDocument to save valid XML. |
45
|
|
|
$doc = HTMLValue::create($content); |
|
|
|
|
46
|
|
|
$content = $doc->getContent(); |
47
|
|
|
} |
48
|
|
|
|
49
|
|
|
// Remove empty <ins /> and <del /> tags because browsers hate them |
50
|
|
|
$content = preg_replace('/<(ins|del)[^>]*\/>/', '', $content); |
51
|
|
|
|
52
|
|
|
return $content; |
53
|
|
|
} |
54
|
|
|
|
55
|
|
|
/** |
56
|
|
|
* @param string $from |
57
|
|
|
* @param string $to |
58
|
|
|
* @param bool $escape |
59
|
|
|
* @return string |
60
|
|
|
*/ |
61
|
|
|
public static function compareHTML($from, $to, $escape = false) |
62
|
|
|
{ |
63
|
|
|
// First split up the content into words and tags |
64
|
|
|
$set1 = self::getHTMLChunks($from); |
65
|
|
|
$set2 = self::getHTMLChunks($to); |
66
|
|
|
|
67
|
|
|
// Diff that |
68
|
|
|
$diff = new Diff($set1, $set2); |
69
|
|
|
|
70
|
|
|
$tagStack[1] = $tagStack[2] = 0; |
|
|
|
|
71
|
|
|
$rechunked[1] = $rechunked[2] = array(); |
|
|
|
|
72
|
|
|
|
73
|
|
|
// Go through everything, converting edited tags (and their content) into single chunks. Otherwise |
74
|
|
|
// the generated HTML gets crusty |
75
|
|
|
foreach ($diff->edits as $edit) { |
76
|
|
|
$lookForTag = false; |
77
|
|
|
$stuffFor = []; |
78
|
|
|
switch ($edit->type) { |
79
|
|
|
case 'copy': |
80
|
|
|
$lookForTag = false; |
81
|
|
|
$stuffFor[1] = $edit->orig; |
82
|
|
|
$stuffFor[2] = $edit->orig; |
83
|
|
|
break; |
84
|
|
|
|
85
|
|
|
case 'change': |
86
|
|
|
$lookForTag = true; |
87
|
|
|
$stuffFor[1] = $edit->orig; |
88
|
|
|
$stuffFor[2] = $edit->final; |
89
|
|
|
break; |
90
|
|
|
|
91
|
|
|
case 'add': |
92
|
|
|
$lookForTag = true; |
93
|
|
|
$stuffFor[1] = null; |
94
|
|
|
$stuffFor[2] = $edit->final; |
95
|
|
|
break; |
96
|
|
|
|
97
|
|
|
case 'delete': |
98
|
|
|
$lookForTag = true; |
99
|
|
|
$stuffFor[1] = $edit->orig; |
100
|
|
|
$stuffFor[2] = null; |
101
|
|
|
break; |
102
|
|
|
} |
103
|
|
|
|
104
|
|
|
foreach ($stuffFor as $listName => $chunks) { |
105
|
|
|
if ($chunks) { |
106
|
|
|
foreach ($chunks as $item) { |
107
|
|
|
// $tagStack > 0 indicates that we should be tag-building |
108
|
|
|
if ($tagStack[$listName]) { |
109
|
|
|
$rechunked[$listName][sizeof($rechunked[$listName])-1] .= ' ' . $item; |
|
|
|
|
110
|
|
|
} else { |
111
|
|
|
$rechunked[$listName][] = $item; |
112
|
|
|
} |
113
|
|
|
|
114
|
|
|
if ($lookForTag |
115
|
|
|
&& !$tagStack[$listName] |
116
|
|
|
&& isset($item[0]) |
117
|
|
|
&& $item[0] == "<" |
118
|
|
|
&& substr($item, 0, 2) != "</" |
119
|
|
|
) { |
120
|
|
|
$tagStack[$listName] = 1; |
121
|
|
|
} elseif ($tagStack[$listName]) { |
122
|
|
|
if (substr($item, 0, 2) == "</") { |
123
|
|
|
$tagStack[$listName]--; |
124
|
|
|
} elseif (isset($item[0]) && $item[0] == "<") { |
125
|
|
|
$tagStack[$listName]++; |
126
|
|
|
} |
127
|
|
|
} |
128
|
|
|
} |
129
|
|
|
} |
130
|
|
|
} |
131
|
|
|
} |
132
|
|
|
|
133
|
|
|
// Diff the re-chunked data, turning it into maked up HTML |
134
|
|
|
$diff = new Diff($rechunked[1], $rechunked[2]); |
135
|
|
|
$content = ''; |
136
|
|
|
foreach ($diff->edits as $edit) { |
137
|
|
|
$orig = ($escape) ? Convert::raw2xml($edit->orig) : $edit->orig; |
138
|
|
|
$final = ($escape) ? Convert::raw2xml($edit->final) : $edit->final; |
139
|
|
|
|
140
|
|
|
switch ($edit->type) { |
141
|
|
|
case 'copy': |
142
|
|
|
$content .= " " . implode(" ", $orig) . " "; |
|
|
|
|
143
|
|
|
break; |
144
|
|
|
|
145
|
|
|
case 'change': |
146
|
|
|
$content .= " <ins>" . implode(" ", $final) . "</ins> "; |
147
|
|
|
$content .= " <del>" . implode(" ", $orig) . "</del> "; |
148
|
|
|
break; |
149
|
|
|
|
150
|
|
|
case 'add': |
151
|
|
|
$content .= " <ins>" . implode(" ", $final) . "</ins> "; |
152
|
|
|
break; |
153
|
|
|
|
154
|
|
|
case 'delete': |
155
|
|
|
$content .= " <del>" . implode(" ", $orig) . "</del> "; |
156
|
|
|
break; |
157
|
|
|
} |
158
|
|
|
} |
159
|
|
|
|
160
|
|
|
return self::cleanHTML($content); |
161
|
|
|
} |
162
|
|
|
|
163
|
|
|
/** |
164
|
|
|
* @param string|bool|array $content If passed as an array, values will be concatenated with a comma. |
165
|
|
|
* @return array |
166
|
|
|
*/ |
167
|
|
|
public static function getHTMLChunks($content) |
168
|
|
|
{ |
169
|
|
|
if ($content && !is_string($content) && !is_array($content) && !is_numeric($content) && !is_bool($content)) { |
170
|
|
|
throw new InvalidArgumentException('$content parameter needs to be a string or array'); |
171
|
|
|
} |
172
|
|
|
if (is_bool($content)) { |
173
|
|
|
// Convert boolean to strings |
174
|
|
|
$content = $content ? "true" : "false"; |
175
|
|
|
} |
176
|
|
|
if (is_array($content)) { |
177
|
|
|
// Convert array to CSV |
178
|
|
|
$content = implode(',', $content); |
179
|
|
|
} |
180
|
|
|
|
181
|
|
|
$content = str_replace(array(" ", "<", ">"), array(" "," <", "> "), $content); |
182
|
|
|
$candidateChunks = preg_split("/[\t\r\n ]+/", $content); |
183
|
|
|
$chunks = []; |
184
|
|
|
for ($i = 0; $i < count($candidateChunks); $i++) { |
|
|
|
|
185
|
|
|
$item = $candidateChunks[$i]; |
186
|
|
|
if (isset($item[0]) && $item[0] == "<") { |
187
|
|
|
$newChunk = $item; |
188
|
|
|
while ($item[strlen($item)-1] != ">") { |
189
|
|
|
if (++$i >= count($candidateChunks)) { |
190
|
|
|
break; |
191
|
|
|
} |
192
|
|
|
$item = $candidateChunks[$i]; |
193
|
|
|
$newChunk .= ' ' . $item; |
194
|
|
|
} |
195
|
|
|
$chunks[] = $newChunk; |
196
|
|
|
} else { |
197
|
|
|
$chunks[] = $item; |
198
|
|
|
} |
199
|
|
|
} |
200
|
|
|
return $chunks; |
201
|
|
|
} |
202
|
|
|
} |
203
|
|
|
|
The PSR-1: Basic Coding Standard recommends that a file should either introduce new symbols, that is classes, functions, constants or similar, or have side effects. Side effects are anything that executes logic, like for example printing output, changing ini settings or writing to a file.
The idea behind this recommendation is that merely auto-loading a class should not change the state of an application. It also promotes a cleaner style of programming and makes your code less prone to errors, because the logic is not spread out all over the place.
To learn more about the PSR-1, please see the PHP-FIG site on the PSR-1.