1 | <?php |
||
2 | /** |
||
3 | * File containing the {@see Diff} class. |
||
4 | * |
||
5 | * @package Diff |
||
6 | * @see Diff |
||
7 | */ |
||
8 | |||
9 | declare(strict_types=1); |
||
10 | |||
11 | namespace Mistralys\Diff; |
||
12 | |||
13 | use AppUtils\FileHelper; |
||
14 | use AppUtils\FileHelper_Exception; |
||
15 | use Mistrals\Diff\Renderer\HTMLTable; |
||
16 | use Mistrals\Diff\Renderer\PlainText; |
||
17 | use Mistrals\Diff\Renderer\HTML; |
||
18 | use Mistralys\Diff\Styler\Styler; |
||
19 | |||
20 | /** |
||
21 | * Class used to analyze and render differences between two |
||
22 | * strings. Directly derived from Kate Morley's Diff implementation. |
||
23 | * |
||
24 | * @package Diff |
||
25 | * @author Kate Morley |
||
26 | * @author Sebastian Mordziol <[email protected]> |
||
27 | * @link http://iamkate.com/ |
||
28 | * @link http://code.iamkate.com/php/diff-implementation/ |
||
29 | * @license CC0-1.0 http://creativecommons.org/publicdomain/zero/1.0/legalcode |
||
30 | */ |
||
31 | class Diff |
||
32 | { |
||
33 | public const ERROR_DIFF_ALREADY_DISPOSED = 66901; |
||
34 | public const ERROR_CANNOT_SPLIT_STRING = 66902; |
||
35 | |||
36 | public const UNMODIFIED = 0; |
||
37 | public const DELETED = 1; |
||
38 | public const INSERTED = 2; |
||
39 | |||
40 | private bool $compareCharacters = false; |
||
41 | private string $string1; |
||
42 | private string $string2; |
||
43 | private bool $disposed = false; |
||
44 | |||
45 | /** |
||
46 | * @var string|string[] |
||
47 | */ |
||
48 | private $sequence1 = ''; |
||
49 | |||
50 | /** |
||
51 | * @var string|string[] |
||
52 | */ |
||
53 | private $sequence2 = ''; |
||
54 | |||
55 | public function __construct(string $string1, string $string2) |
||
56 | { |
||
57 | $this->string1 = $string1; |
||
58 | $this->string2 = $string2; |
||
59 | } |
||
60 | |||
61 | /** |
||
62 | * Sets whether to compare single characters. Default is to |
||
63 | * compare only lines. |
||
64 | * |
||
65 | * @param bool $compare |
||
66 | * @return Diff |
||
67 | */ |
||
68 | public function setCompareCharacters(bool $compare=true) : Diff |
||
69 | { |
||
70 | $this->compareCharacters = $compare; |
||
71 | |||
72 | return $this; |
||
73 | } |
||
74 | |||
75 | /** |
||
76 | * Returns the diff for two strings. The return value is an array, each of |
||
77 | * whose values are an array containing two values: a line (or character, if |
||
78 | * $compareCharacters is true), and one of the constants DIFF::UNMODIFIED (the |
||
79 | * line or character is in both strings), DIFF::DELETED (the line or character |
||
80 | * is only in the first string), and DIFF::INSERTED (the line or character is |
||
81 | * only in the second string). |
||
82 | * |
||
83 | * @param string $string1 |
||
84 | * @param string $string2 |
||
85 | * @param bool $compareCharacters Whether to compare single characters (compares lines otherwise) |
||
86 | * @return Diff |
||
87 | */ |
||
88 | public static function compareStrings(string $string1, string $string2, bool $compareCharacters = false) : Diff |
||
89 | { |
||
90 | $diff = new Diff($string1, $string2); |
||
91 | $diff->setCompareCharacters($compareCharacters); |
||
92 | return $diff; |
||
93 | } |
||
94 | |||
95 | /** |
||
96 | * Like {@see Diff::compare()}, but returns the diff for two files. |
||
97 | * |
||
98 | * @param string $file1 |
||
99 | * @param string $file2 |
||
100 | * @param bool $compareCharacters Whether to compare single characters (compares lines otherwise) |
||
101 | * @return Diff |
||
102 | * |
||
103 | * @throws FileHelper_Exception If one of the files cannot be found or opened. |
||
104 | */ |
||
105 | public static function compareFiles(string $file1, string $file2, bool $compareCharacters = false) : Diff |
||
106 | { |
||
107 | return self::compareStrings( |
||
108 | FileHelper::readContents($file1), |
||
109 | FileHelper::readContents($file2), |
||
110 | $compareCharacters |
||
111 | ); |
||
112 | } |
||
113 | |||
114 | /** |
||
115 | * Creates an instance of the styler class, which |
||
116 | * is used to access the CSS used for the syntax |
||
117 | * highlighting in the HTML renderers. |
||
118 | * |
||
119 | * @return Styler |
||
120 | */ |
||
121 | public static function createStyler() : Styler |
||
122 | { |
||
123 | return new Styler(); |
||
124 | } |
||
125 | |||
126 | /** |
||
127 | * Retrieves the raw array that contains the diff definitions |
||
128 | * for the two strings. |
||
129 | * |
||
130 | * For example, comparing the following strings: |
||
131 | * |
||
132 | * Hello word |
||
133 | * Hello world |
||
134 | * |
||
135 | * Will return the following array: |
||
136 | * |
||
137 | * <pre> |
||
138 | * Array( |
||
139 | * [0] => Array |
||
140 | * ( |
||
141 | * [0] => Hello word |
||
142 | * [1] => 1 |
||
143 | * ) |
||
144 | * [1] => Array |
||
145 | * ( |
||
146 | * [0] => Hello world |
||
147 | * [1] => 2 |
||
148 | * ) |
||
149 | * ) |
||
150 | * </pre> |
||
151 | * |
||
152 | * Where the second entry in the sub-array is the status |
||
153 | * code, e.g. Diff::DELETED, Diff::INSERTED. |
||
154 | * |
||
155 | * @return array<int,array<int,int|string>> |
||
156 | * @throws DiffException |
||
157 | */ |
||
158 | public function toArray() : array |
||
159 | { |
||
160 | if($this->disposed) |
||
161 | { |
||
162 | throw new DiffException( |
||
163 | 'The diff has been disposed.', |
||
164 | 'The toArray method cannot be called again after disposing.', |
||
165 | self::ERROR_DIFF_ALREADY_DISPOSED |
||
166 | ); |
||
167 | } |
||
168 | |||
169 | // initialise the sequences and comparison start and end positions |
||
170 | $start = 0; |
||
171 | |||
172 | if ($this->compareCharacters) |
||
173 | { |
||
174 | $this->sequence1 = self::splitCharacters($this->string1); |
||
175 | $this->sequence2 = self::splitCharacters($this->string2); |
||
176 | } |
||
177 | else |
||
178 | { |
||
179 | $this->sequence1 = self::splitLines($this->string1); |
||
180 | $this->sequence2 = self::splitLines($this->string2); |
||
181 | } |
||
182 | |||
183 | $end1 = count($this->sequence1) - 1; |
||
184 | $end2 = count($this->sequence2) - 1; |
||
185 | $totalSequence = count($this->sequence1); |
||
186 | |||
187 | // skip any common prefix |
||
188 | while ($start <= $end1 && $start <= $end2 && $this->sequence1[$start] === $this->sequence2[$start]) |
||
189 | { |
||
190 | $start ++; |
||
191 | } |
||
192 | |||
193 | // skip any common suffix |
||
194 | while ($end1 >= $start && $end2 >= $start && $this->sequence1[$end1] === $this->sequence2[$end2]) |
||
195 | { |
||
196 | $end1 --; |
||
197 | $end2 --; |
||
198 | } |
||
199 | |||
200 | // generate the partial diff |
||
201 | $partialDiff = $this->generatePartialDiff($start, $end1, $end2); |
||
202 | |||
203 | // generate the full diff |
||
204 | $diff = array(); |
||
205 | |||
206 | for ($index = 0; $index < $start; $index ++) |
||
207 | { |
||
208 | $diff[] = array($this->sequence1[$index], self::UNMODIFIED); |
||
209 | } |
||
210 | |||
211 | while (count($partialDiff) > 0) |
||
212 | { |
||
213 | $diff[] = array_pop($partialDiff); |
||
214 | } |
||
215 | |||
216 | for ($index = $end1 + 1; $index < $totalSequence; $index ++) |
||
217 | { |
||
218 | $diff[] = array($this->sequence1[$index], self::UNMODIFIED); |
||
219 | } |
||
220 | |||
221 | // clear the sequences to free up memory, we don't need them anymore |
||
222 | $this->sequence1 = ''; |
||
223 | $this->sequence2 = ''; |
||
224 | |||
225 | return $diff; |
||
226 | } |
||
227 | |||
228 | /** |
||
229 | * Splits the string into individual lines. |
||
230 | * |
||
231 | * @param string $string |
||
232 | * @throws DiffException |
||
233 | * @return string[] |
||
234 | */ |
||
235 | public static function splitLines(string $string) : array |
||
236 | { |
||
237 | $split = preg_split('/\R/u', $string); |
||
238 | |||
239 | if(is_array($split)) |
||
0 ignored issues
–
show
introduced
by
![]() |
|||
240 | { |
||
241 | return $split; |
||
242 | } |
||
243 | |||
244 | throw new DiffException( |
||
245 | 'Could not split the target string.', |
||
246 | 'Could the string be badly formatted?', |
||
247 | self::ERROR_CANNOT_SPLIT_STRING |
||
248 | ); |
||
249 | } |
||
250 | |||
251 | /** |
||
252 | * Splits the string into individual characters. |
||
253 | * |
||
254 | * @param string $string |
||
255 | * @throws DiffException |
||
256 | * @return string[] |
||
257 | */ |
||
258 | public static function splitCharacters(string $string) : array |
||
259 | { |
||
260 | $split = mb_str_split($string); |
||
261 | |||
262 | if(is_array($split)) |
||
0 ignored issues
–
show
|
|||
263 | { |
||
264 | return $split; |
||
265 | } |
||
266 | |||
267 | throw new DiffException( |
||
268 | 'Could not split the target string.', |
||
269 | 'Could the string be badly formatted?', |
||
270 | self::ERROR_CANNOT_SPLIT_STRING |
||
271 | ); |
||
272 | } |
||
273 | |||
274 | /** |
||
275 | * Returns the table of the longest common subsequence lengths |
||
276 | * for the specified sequences. |
||
277 | * |
||
278 | * @param int $start |
||
279 | * @param int $end1 |
||
280 | * @param int $end2 |
||
281 | * @return array<int,array<int,int>> |
||
282 | */ |
||
283 | private function computeTable(int $start, int $end1, int $end2) : array |
||
284 | { |
||
285 | // determine the lengths to be compared |
||
286 | $length1 = $end1 - $start + 1; |
||
287 | $length2 = $end2 - $start + 1; |
||
288 | |||
289 | // initialise the table |
||
290 | $table = array(array_fill(0, $length2 + 1, 0)); |
||
291 | |||
292 | // loop over the rows |
||
293 | for ($index1 = 1; $index1 <= $length1; $index1 ++){ |
||
294 | |||
295 | // create the new row |
||
296 | $table[$index1] = array(0); |
||
297 | |||
298 | // loop over the columns |
||
299 | for ($index2 = 1; $index2 <= $length2; $index2 ++){ |
||
300 | |||
301 | // store the longest common subsequence length |
||
302 | if ($this->sequence1[$index1 + $start - 1] |
||
303 | === $this->sequence2[$index2 + $start - 1]){ |
||
304 | $table[$index1][$index2] = $table[$index1 - 1][$index2 - 1] + 1; |
||
305 | }else{ |
||
306 | $table[$index1][$index2] = |
||
307 | max($table[$index1 - 1][$index2], $table[$index1][$index2 - 1]); |
||
308 | } |
||
309 | |||
310 | } |
||
311 | } |
||
312 | |||
313 | return $table; |
||
314 | } |
||
315 | |||
316 | /** |
||
317 | * Returns the partial diff for the specified sequences, in reverse order. |
||
318 | * |
||
319 | * @param int $start |
||
320 | * @param int $end1 |
||
321 | * @param int $end2 |
||
322 | * @return array<int,array<int,int|string>> |
||
323 | */ |
||
324 | private function generatePartialDiff(int $start, int $end1, int $end2) : array |
||
325 | { |
||
326 | // compute the table of the longest common subsequence lengths |
||
327 | $table = $this->computeTable($start, $end1, $end2); |
||
328 | |||
329 | // initialise the diff |
||
330 | $diff = array(); |
||
331 | |||
332 | // initialise the indices |
||
333 | $index1 = count($table) - 1; |
||
334 | $index2 = count($table[0]) - 1; |
||
335 | |||
336 | // loop until there are no items remaining in either sequence |
||
337 | while ($index1 > 0 || $index2 > 0) |
||
338 | { |
||
339 | // check what has happened to the items at these indices |
||
340 | if ( |
||
341 | $index1 > 0 && $index2 > 0 |
||
342 | && $this->sequence1[$index1 + $start - 1] |
||
343 | === $this->sequence2[$index2 + $start - 1] |
||
344 | ){ |
||
345 | // update the diff and the indices |
||
346 | $diff[] = array($this->sequence1[$index1 + $start - 1], self::UNMODIFIED); |
||
347 | $index1 --; |
||
348 | $index2 --; |
||
349 | } |
||
350 | elseif ( |
||
351 | $index2 > 0 |
||
352 | && $table[$index1][$index2] === $table[$index1][$index2 - 1] |
||
353 | ) { |
||
354 | // update the diff and the indices |
||
355 | $diff[] = array($this->sequence2[$index2 + $start - 1], self::INSERTED); |
||
356 | $index2 --; |
||
357 | } |
||
358 | else |
||
359 | { |
||
360 | // update the diff and the indices |
||
361 | $diff[] = array($this->sequence1[$index1 + $start - 1], self::DELETED); |
||
362 | $index1 --; |
||
363 | } |
||
364 | } |
||
365 | |||
366 | // return the diff |
||
367 | return $diff; |
||
368 | } |
||
369 | |||
370 | /** |
||
371 | * Returns a diff as a string, where unmodified lines are prefixed by ' ', |
||
372 | * deletions are prefixed by '- ', and insertions are prefixed by '+ '. |
||
373 | * |
||
374 | * @param string $separator |
||
375 | * @return string |
||
376 | */ |
||
377 | public function toString(string $separator = "\n") : string |
||
378 | { |
||
379 | $renderer = new PlainText($this); |
||
380 | $renderer->setSeparator($separator); |
||
381 | |||
382 | return $renderer->render(); |
||
383 | } |
||
384 | |||
385 | /** |
||
386 | * Returns a diff as an HTML string, where unmodified lines are contained |
||
387 | * within 'span' elements, deletions are contained within 'del' elements, and |
||
388 | * insertions are contained within 'ins' elements. |
||
389 | * |
||
390 | * @param string $separator |
||
391 | * @return string |
||
392 | */ |
||
393 | public function toHTML(string $separator = '<br>') : string |
||
394 | { |
||
395 | $renderer = new HTML($this); |
||
396 | $renderer->setSeparator($separator); |
||
397 | |||
398 | return $renderer->render(); |
||
399 | } |
||
400 | |||
401 | /** |
||
402 | * Returns a diff as an HTML table. |
||
403 | * |
||
404 | * @param string $indentation |
||
405 | * @param string $separator |
||
406 | * @return string |
||
407 | */ |
||
408 | public function toHTMLTable(string $indentation = '', string $separator = '<br>') : string |
||
409 | { |
||
410 | $renderer = new HTMLTable($this); |
||
411 | $renderer->setIndentation($indentation); |
||
412 | $renderer->setSeparator($separator); |
||
413 | |||
414 | return $renderer->render(); |
||
415 | } |
||
416 | |||
417 | /** |
||
418 | * Disposes of the diff by clearing the stored strings, |
||
419 | * to free memory until the class is destructed. |
||
420 | * |
||
421 | * @return Diff |
||
422 | */ |
||
423 | public function dispose() : Diff |
||
424 | { |
||
425 | $this->string1 = ''; |
||
426 | $this->string2 = ''; |
||
427 | |||
428 | $this->disposed = true; |
||
429 | |||
430 | return $this; |
||
431 | } |
||
432 | } |
||
433 | |||
434 |