Diff   A
last analyzed

Complexity

Total Complexity 37

Size/Duplication

Total Lines 400
Duplicated Lines 0 %

Importance

Changes 3
Bugs 0 Features 0
Metric Value
wmc 37
eloc 117
c 3
b 0
f 0
dl 0
loc 400
rs 9.44

14 Methods

Rating   Name   Duplication   Size   Complexity  
A compareFiles() 0 6 1
A setCompareCharacters() 0 5 1
A createStyler() 0 3 1
A __construct() 0 4 1
A compareStrings() 0 5 1
A toHTML() 0 6 1
A dispose() 0 8 1
A splitCharacters() 0 13 2
A computeTable() 0 31 4
A toHTMLTable() 0 7 1
B generatePartialDiff() 0 44 8
A toString() 0 6 1
A splitLines() 0 13 2
C toArray() 0 68 12
1
<?php
2
/**
3
 * File containing the {@see Diff} class.
4
 * 
5
 * @package Diff
6
 * @see Diff
7
 */
8
9
declare(strict_types=1);
10
11
namespace Mistralys\Diff;
12
13
use AppUtils\FileHelper;
14
use AppUtils\FileHelper_Exception;
15
use Mistrals\Diff\Renderer\HTMLTable;
16
use Mistrals\Diff\Renderer\PlainText;
17
use Mistrals\Diff\Renderer\HTML;
18
use Mistralys\Diff\Styler\Styler;
19
20
/**
21
 * Class used to analyze and render differences between two
22
 * strings. Directly derived from Kate Morley's Diff implementation.
23
 *
24
 * @package Diff
25
 * @author Kate Morley
26
 * @author Sebastian Mordziol <[email protected]>
27
 * @link http://iamkate.com/
28
 * @link http://code.iamkate.com/php/diff-implementation/
29
 * @license CC0-1.0 http://creativecommons.org/publicdomain/zero/1.0/legalcode
30
 */
31
class Diff
32
{
33
    public const ERROR_DIFF_ALREADY_DISPOSED = 66901;
34
    public const ERROR_CANNOT_SPLIT_STRING = 66902;
35
    
36
    public const UNMODIFIED = 0;
37
    public const DELETED    = 1;
38
    public const INSERTED   = 2;
39
    
40
    private bool $compareCharacters = false;
41
    private string $string1;
42
    private string $string2;
43
    private bool $disposed = false;
44
45
   /**
46
    * @var string|string[]
47
    */
48
    private $sequence1 = '';
49
    
50
   /**
51
    * @var string|string[]
52
    */
53
    private $sequence2 = '';
54
    
55
    public function __construct(string $string1, string $string2)
56
    {
57
        $this->string1 = $string1;
58
        $this->string2 = $string2;
59
    }
60
    
61
   /**
62
    * Sets whether to compare single characters. Default is to 
63
    * compare only lines.
64
    * 
65
    * @param bool $compare
66
    * @return Diff
67
    */
68
    public function setCompareCharacters(bool $compare=true) : Diff
69
    {
70
        $this->compareCharacters = $compare;
71
        
72
        return $this;
73
    }
74
    
75
   /**
76
    * Returns the diff for two strings. The return value is an array, each of
77
    * whose values are an array containing two values: a line (or character, if
78
    * $compareCharacters is true), and one of the constants DIFF::UNMODIFIED (the
79
    * line or character is in both strings), DIFF::DELETED (the line or character
80
    * is only in the first string), and DIFF::INSERTED (the line or character is
81
    * only in the second string).
82
    * 
83
    * @param string $string1
84
    * @param string $string2
85
    * @param bool $compareCharacters Whether to compare single characters (compares lines otherwise)
86
    * @return Diff
87
    */
88
    public static function compareStrings(string $string1, string $string2, bool $compareCharacters = false) : Diff
89
    {
90
        $diff = new Diff($string1, $string2);
91
        $diff->setCompareCharacters($compareCharacters);
92
        return $diff;
93
    }
94
    
95
   /**
96
    * Like {@see Diff::compare()}, but returns the diff for two files.
97
    *
98
    * @param string $file1
99
    * @param string $file2
100
    * @param bool $compareCharacters Whether to compare single characters (compares lines otherwise)
101
    * @return Diff
102
    *
103
    * @throws FileHelper_Exception If one of the files cannot be found or opened.
104
    */
105
    public static function compareFiles(string $file1, string $file2, bool $compareCharacters = false) : Diff
106
    {
107
        return self::compareStrings(
108
            FileHelper::readContents($file1),
109
            FileHelper::readContents($file2),
110
            $compareCharacters
111
        );
112
    }
113
    
114
   /**
115
    * Creates an instance of the styler class, which
116
    * is used to access the CSS used for the syntax
117
    * highlighting in the HTML renderers.
118
    * 
119
    * @return Styler
120
    */
121
    public static function createStyler() : Styler
122
    {
123
        return new Styler();
124
    }
125
126
    /**
127
     * Retrieves the raw array that contains the diff definitions
128
     * for the two strings.
129
     *
130
     * For example, comparing the following strings:
131
     *
132
     * Hello word
133
     * Hello world
134
     *
135
     * Will return the following array:
136
     *
137
     * <pre>
138
     * Array(
139
     *   [0] => Array
140
     *   (
141
     *     [0] => Hello word
142
     *     [1] => 1
143
     *   )
144
     *   [1] => Array
145
     *   (
146
     *     [0] => Hello world
147
     *     [1] => 2
148
     *   )
149
     * )
150
     * </pre>
151
     *
152
     * Where the second entry in the sub-array is the status
153
     * code, e.g. Diff::DELETED, Diff::INSERTED.
154
     *
155
     * @return array<int,array<int,int|string>>
156
     * @throws DiffException
157
     */
158
    public function toArray() : array
159
    {
160
        if($this->disposed)
161
        {
162
            throw new DiffException(
163
                'The diff has been disposed.',
164
                'The toArray method cannot be called again after disposing.',
165
                self::ERROR_DIFF_ALREADY_DISPOSED
166
            );
167
        }
168
        
169
        // initialise the sequences and comparison start and end positions
170
        $start = 0;
171
172
        if ($this->compareCharacters)
173
        {
174
            $this->sequence1 = self::splitCharacters($this->string1);
175
            $this->sequence2 = self::splitCharacters($this->string2);
176
        }
177
        else
178
        {
179
            $this->sequence1 = self::splitLines($this->string1);
180
            $this->sequence2 = self::splitLines($this->string2);
181
        }
182
183
        $end1 = count($this->sequence1) - 1;
184
        $end2 = count($this->sequence2) - 1;
185
        $totalSequence = count($this->sequence1);
186
        
187
        // skip any common prefix
188
        while ($start <= $end1 && $start <= $end2 && $this->sequence1[$start] === $this->sequence2[$start])
189
        {
190
            $start ++;
191
        }
192
        
193
        // skip any common suffix
194
        while ($end1 >= $start && $end2 >= $start && $this->sequence1[$end1] === $this->sequence2[$end2])
195
        {
196
            $end1 --;
197
            $end2 --;
198
        }
199
        
200
        // generate the partial diff
201
        $partialDiff = $this->generatePartialDiff($start, $end1, $end2);
202
        
203
        // generate the full diff
204
        $diff = array();
205
        
206
        for ($index = 0; $index < $start; $index ++)
207
        {
208
            $diff[] = array($this->sequence1[$index], self::UNMODIFIED);
209
        }
210
        
211
        while (count($partialDiff) > 0)
212
        {
213
            $diff[] = array_pop($partialDiff);
214
        }
215
        
216
        for ($index = $end1 + 1; $index < $totalSequence; $index ++)
217
        {
218
            $diff[] = array($this->sequence1[$index], self::UNMODIFIED);
219
        }
220
        
221
        // clear the sequences to free up memory, we don't need them anymore
222
        $this->sequence1 = '';
223
        $this->sequence2 = '';
224
        
225
        return $diff;
226
    }
227
    
228
   /**
229
    * Splits the string into individual lines.
230
    * 
231
    * @param string $string
232
    * @throws DiffException
233
    * @return string[]
234
    */
235
    public static function splitLines(string $string) : array
236
    {
237
        $split = preg_split('/\R/u', $string);
238
        
239
        if(is_array($split))
0 ignored issues
show
introduced by
The condition is_array($split) is always true.
Loading history...
240
        {
241
            return $split;
242
        }
243
        
244
        throw new DiffException(
245
            'Could not split the target string.',
246
            'Could the string be badly formatted?',
247
            self::ERROR_CANNOT_SPLIT_STRING
248
        );
249
    }
250
251
    /**
252
     * Splits the string into individual characters.
253
     *
254
     * @param string $string
255
     * @throws DiffException
256
     * @return string[]
257
     */
258
    public static function splitCharacters(string $string) : array
259
    {
260
        $split = mb_str_split($string);
261
262
        if(is_array($split))
0 ignored issues
show
introduced by
The condition is_array($split) is always true.
Loading history...
263
        {
264
            return $split;
265
        }
266
267
        throw new DiffException(
268
            'Could not split the target string.',
269
            'Could the string be badly formatted?',
270
            self::ERROR_CANNOT_SPLIT_STRING
271
        );
272
    }
273
    
274
   /**
275
    * Returns the table of the longest common subsequence lengths
276
    * for the specified sequences.
277
    * 
278
    * @param int $start
279
    * @param int $end1
280
    * @param int $end2
281
    * @return array<int,array<int,int>>
282
    */
283
    private function computeTable(int $start, int $end1, int $end2) : array
284
    {
285
        // determine the lengths to be compared
286
        $length1 = $end1 - $start + 1;
287
        $length2 = $end2 - $start + 1;
288
        
289
        // initialise the table
290
        $table = array(array_fill(0, $length2 + 1, 0));
291
        
292
        // loop over the rows
293
        for ($index1 = 1; $index1 <= $length1; $index1 ++){
294
            
295
            // create the new row
296
            $table[$index1] = array(0);
297
            
298
            // loop over the columns
299
            for ($index2 = 1; $index2 <= $length2; $index2 ++){
300
                
301
                // store the longest common subsequence length
302
                if ($this->sequence1[$index1 + $start - 1]
303
                    === $this->sequence2[$index2 + $start - 1]){
304
                        $table[$index1][$index2] = $table[$index1 - 1][$index2 - 1] + 1;
305
                }else{
306
                    $table[$index1][$index2] =
307
                    max($table[$index1 - 1][$index2], $table[$index1][$index2 - 1]);
308
                }
309
                
310
            }
311
        }
312
        
313
        return $table;
314
    }
315
    
316
   /**
317
    * Returns the partial diff for the specified sequences, in reverse order.
318
    * 
319
    * @param int $start
320
    * @param int $end1
321
    * @param int $end2
322
    * @return array<int,array<int,int|string>>
323
    */
324
    private function generatePartialDiff(int $start, int $end1, int $end2) : array
325
    {
326
        // compute the table of the longest common subsequence lengths
327
        $table = $this->computeTable($start, $end1, $end2);
328
        
329
        //  initialise the diff
330
        $diff = array();
331
        
332
        // initialise the indices
333
        $index1 = count($table) - 1;
334
        $index2 = count($table[0]) - 1;
335
        
336
        // loop until there are no items remaining in either sequence
337
        while ($index1 > 0 || $index2 > 0)
338
        {
339
            // check what has happened to the items at these indices
340
            if (
341
                $index1 > 0 && $index2 > 0
342
                && $this->sequence1[$index1 + $start - 1]
343
                === $this->sequence2[$index2 + $start - 1]
344
            ){
345
                // update the diff and the indices
346
                $diff[] = array($this->sequence1[$index1 + $start - 1], self::UNMODIFIED);
347
                $index1 --;
348
                $index2 --;
349
            }
350
            elseif (
351
                $index2 > 0
352
                && $table[$index1][$index2] === $table[$index1][$index2 - 1]
353
            ) {
354
                // update the diff and the indices
355
                $diff[] = array($this->sequence2[$index2 + $start - 1], self::INSERTED);
356
                $index2 --;
357
            }
358
            else
359
            {
360
                // update the diff and the indices
361
                $diff[] = array($this->sequence1[$index1 + $start - 1], self::DELETED);
362
                $index1 --;
363
            }
364
        }
365
        
366
        // return the diff
367
        return $diff;
368
    }
369
    
370
   /**
371
    * Returns a diff as a string, where unmodified lines are prefixed by '  ',
372
    * deletions are prefixed by '- ', and insertions are prefixed by '+ '.
373
    * 
374
    * @param string $separator
375
    * @return string
376
    */
377
    public function toString(string $separator = "\n") : string
378
    {
379
        $renderer = new PlainText($this);
380
        $renderer->setSeparator($separator);
381
        
382
        return $renderer->render();
383
    }
384
    
385
   /**
386
    * Returns a diff as an HTML string, where unmodified lines are contained
387
    * within 'span' elements, deletions are contained within 'del' elements, and
388
    * insertions are contained within 'ins' elements.
389
    * 
390
    * @param string $separator
391
    * @return string
392
    */
393
    public function toHTML(string $separator = '<br>') : string
394
    {
395
        $renderer = new HTML($this);
396
        $renderer->setSeparator($separator);
397
        
398
        return $renderer->render();
399
    }
400
    
401
   /**
402
    * Returns a diff as an HTML table.
403
    * 
404
    * @param string $indentation
405
    * @param string $separator
406
    * @return string
407
    */
408
    public function toHTMLTable(string $indentation = '', string $separator = '<br>') : string
409
    {
410
        $renderer = new HTMLTable($this);
411
        $renderer->setIndentation($indentation);
412
        $renderer->setSeparator($separator);
413
        
414
        return $renderer->render();
415
    }
416
    
417
   /**
418
    * Disposes of the diff by clearing the stored strings,
419
    * to free memory until the class is destructed.
420
    * 
421
    * @return Diff
422
    */
423
    public function dispose() : Diff
424
    {
425
        $this->string1 = '';
426
        $this->string2 = '';
427
        
428
        $this->disposed = true;
429
        
430
        return $this;
431
    }
432
}
433
434