Passed
Push — master ( f10efd...0687fc )
by Sebastian
02:47
created

Diff   A

Complexity

Total Complexity 36

Size/Duplication

Total Lines 391
Duplicated Lines 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
wmc 36
eloc 112
c 1
b 0
f 0
dl 0
loc 391
rs 9.52

13 Methods

Rating   Name   Duplication   Size   Complexity  
A splitString() 0 13 2
A compareFiles() 0 6 1
A setCompareCharacters() 0 5 1
A toHTML() 0 6 1
A dispose() 0 8 1
A computeTable() 0 31 4
A createStyler() 0 3 1
A __construct() 0 4 1
A compareStrings() 0 5 1
A toHTMLTable() 0 7 1
A toString() 0 6 1
B generatePartialDiff() 0 41 8
C toArray() 0 72 13
1
<?php
2
/**
3
 * File containing the {@see Diff} class.
4
 * 
5
 * @package Diff
6
 * @see Diff
7
 */
8
9
declare(strict_types=1);
10
11
namespace Mistralys\Diff;
12
13
use AppUtils\FileHelper;
14
use AppUtils\FileHelper_Exception;
15
use Mistrals\Diff\Renderer\HTMLTable;
16
use Mistrals\Diff\Renderer\PlainText;
17
use Mistrals\Diff\Renderer\HTML;
18
use Mistralys\Diff\Styler\Styler;
19
20
/**
21
 * Class used to analyze and render differences between two
22
 * strings. Directly derived from Kate Morley's Diff implementation.
23
 *
24
 * @package Diff
25
 * @author Kate Morley
26
 * @author Sebastian Mordziol <[email protected]>
27
 * @link http://iamkate.com/
28
 * @link http://code.iamkate.com/php/diff-implementation/
29
 * @license CC0-1.0 http://creativecommons.org/publicdomain/zero/1.0/legalcode
30
 */
31
class Diff
32
{
33
    const ERROR_DIFF_ALREADY_DISPOSED = 66901;
34
    const ERROR_CANNOT_SPLIT_STRING = 66902;
35
    
36
    const UNMODIFIED = 0;
37
    const DELETED    = 1;
38
    const INSERTED   = 2;
39
    
40
   /**
41
    * @var boolean
42
    */
43
    private $compareCharacters = false;
44
    
45
   /**
46
    * @var string
47
    */
48
    private $string1;
49
    
50
   /**
51
    * @var string
52
    */
53
    private $string2;
54
    
55
   /**
56
    * @var string|string[]
57
    */
58
    private $sequence1 = '';
59
    
60
   /**
61
    * @var string|string[]
62
    */
63
    private $sequence2 = '';
64
    
65
   /**
66
    * @var boolean
67
    */
68
    private $disposed = false;
69
    
70
    public function __construct(string $string1, string $string2)
71
    {
72
        $this->string1 = $string1;
73
        $this->string2 = $string2;
74
    }
75
    
76
   /**
77
    * Sets whether to compare single characters. Default is to 
78
    * compare only lines.
79
    * 
80
    * @param bool $compare
81
    * @return Diff
82
    */
83
    public function setCompareCharacters(bool $compare=true) : Diff
84
    {
85
        $this->compareCharacters = $compare;
86
        
87
        return $this;
88
    }
89
    
90
   /**
91
    * Returns the diff for two strings. The return value is an array, each of
92
    * whose values is an array containing two values: a line (or character, if
93
    * $compareCharacters is true), and one of the constants DIFF::UNMODIFIED (the
94
    * line or character is in both strings), DIFF::DELETED (the line or character
95
    * is only in the first string), and DIFF::INSERTED (the line or character is
96
    * only in the second string).
97
    * 
98
    * @param string $string1
99
    * @param string $string2
100
    * @param bool $compareCharacters Whether to compare single characters (compares lines otherwise)
101
    * @return Diff
102
    */
103
    public static function compareStrings(string $string1, string $string2, bool $compareCharacters = false) : Diff
104
    {
105
        $diff = new Diff($string1, $string2);
106
        $diff->setCompareCharacters($compareCharacters);
107
        return $diff;
108
    }
109
    
110
   /**
111
    * Like {@see Diff::compare()}, but returns the diff for two files.
112
    *
113
    * @param string $file1
114
    * @param string $file2
115
    * @param bool $compareCharacters Whether to compare single characters (compares lines otherwise)
116
    * @return Diff
117
    *
118
    * @throws FileHelper_Exception If one of the files cannot be found or opened.
119
    */
120
    public static function compareFiles(string $file1, string $file2, bool $compareCharacters = false) : Diff
121
    {
122
        return self::compareStrings(
123
            FileHelper::readContents($file1),
124
            FileHelper::readContents($file2),
125
            $compareCharacters
126
        );
127
    }
128
    
129
   /**
130
    * Creates an instance of the styler class, which
131
    * is used to access the CSS used for the syntax
132
    * highlighting in the HTML renderers.
133
    * 
134
    * @return Styler
135
    */
136
    public static function createStyler() : Styler
137
    {
138
        return new Styler();
139
    }
140
    
141
   /**
142
    * Retrieves the raw array that contains the diff definitions
143
    * for the two strings.
144
    * 
145
    * For example, comparing the following strings:
146
    * 
147
    * Hello word
148
    * Hello world
149
    * 
150
    * Will return the following array:
151
    * 
152
    * <pre>
153
    * Array(
154
    *   [0] => Array
155
    *   (
156
    *     [0] => Hello word
157
    *     [1] => 1
158
    *   )
159
    *   [1] => Array
160
    *   (
161
    *     [0] => Hello world
162
    *     [1] => 2
163
    *   )
164
    * )
165
    * </pre>
166
    * 
167
    * Where the second entry in the sub-array is the status 
168
    * code, e.g. Diff::DELETED, Diff::INSERTED.
169
    * 
170
    * @return array<int,array<int,int|string>>
171
    */
172
    public function toArray() : array
173
    {
174
        if($this->disposed)
175
        {
176
            throw new DiffException(
177
                'The diff has been disposed.',
178
                'The toArray method cannot be called again after disposing.',
179
                self::ERROR_DIFF_ALREADY_DISPOSED
180
            );
181
        }
182
        
183
        // initialise the sequences and comparison start and end positions
184
        $start = 0;
185
        
186
        if ($this->compareCharacters)
187
        {
188
            $this->sequence1 = $this->string1;
189
            $this->sequence2 = $this->string2;
190
            $end1 = strlen($this->string1) - 1;
191
            $end2 = strlen($this->string2) - 1;
192
        }
193
        else
194
        {
195
            $this->sequence1 = $this->splitString($this->string1);
196
            $this->sequence2 = $this->splitString($this->string2);
197
            $end1 = count($this->sequence1) - 1;
198
            $end2 = count($this->sequence2) - 1;
199
        }
200
        
201
        // skip any common prefix
202
        while ($start <= $end1 && $start <= $end2 && $this->sequence1[$start] == $this->sequence2[$start])
203
        {
204
            $start ++;
205
        }
206
        
207
        // skip any common suffix
208
        while ($end1 >= $start && $end2 >= $start && $this->sequence1[$end1] == $this->sequence2[$end2])
209
        {
210
            $end1 --;
211
            $end2 --;
212
        }
213
        
214
        // compute the table of longest common subsequence lengths
215
        $table = $this->computeTable($start, $end1, $end2);
216
        
217
        // generate the partial diff
218
        $partialDiff = $this->generatePartialDiff($table, $start);
219
        
220
        // generate the full diff
221
        $diff = array();
222
        
223
        for ($index = 0; $index < $start; $index ++)
224
        {
225
            $diff[] = array($this->sequence1[$index], self::UNMODIFIED);
226
        }
227
        
228
        while (count($partialDiff) > 0)
229
        {
230
            $diff[] = array_pop($partialDiff);
231
        }
232
        
233
        $max = ($this->compareCharacters ? strlen($this->sequence1) : count($this->sequence1));
0 ignored issues
show
Bug introduced by
It seems like $this->sequence1 can also be of type string; however, parameter $var of count() does only seem to accept Countable|array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

233
        $max = ($this->compareCharacters ? strlen($this->sequence1) : count(/** @scrutinizer ignore-type */ $this->sequence1));
Loading history...
Bug introduced by
It seems like $this->sequence1 can also be of type string[]; however, parameter $string of strlen() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

233
        $max = ($this->compareCharacters ? strlen(/** @scrutinizer ignore-type */ $this->sequence1) : count($this->sequence1));
Loading history...
234
        
235
        for ($index = $end1 + 1; $index < $max; $index ++)
236
        {
237
            $diff[] = array($this->sequence1[$index], self::UNMODIFIED);
238
        }
239
        
240
        $this->sequence1 = '';
241
        $this->sequence2 = '';
242
        
243
        return $diff;
244
    }
245
    
246
   /**
247
    * Splits the string into individual characters.
248
    * 
249
    * @param string $string
250
    * @throws DiffException
251
    * @return string[]
252
    */
253
    private function splitString(string $string) : array
254
    {
255
        $split = preg_split('/\R/', $string);
256
        
257
        if(is_array($split))
258
        {
259
            return $split;
260
        }
261
        
262
        throw new DiffException(
263
            'Could not split the target string.',
264
            'Could the string be badly formatted?',
265
            self::ERROR_CANNOT_SPLIT_STRING
266
        );
267
    }
268
    
269
   /**
270
    * Returns the table of longest common subsequence lengths for 
271
    * the specified sequences.
272
    * 
273
    * @param int $start
274
    * @param int $end1
275
    * @param int $end2
276
    * @return array<int,array<int,int>>
277
    */
278
    private function computeTable(int $start, int $end1, int $end2) : array
279
    {
280
        // determine the lengths to be compared
281
        $length1 = $end1 - $start + 1;
282
        $length2 = $end2 - $start + 1;
283
        
284
        // initialise the table
285
        $table = array(array_fill(0, $length2 + 1, 0));
286
        
287
        // loop over the rows
288
        for ($index1 = 1; $index1 <= $length1; $index1 ++){
289
            
290
            // create the new row
291
            $table[$index1] = array(0);
292
            
293
            // loop over the columns
294
            for ($index2 = 1; $index2 <= $length2; $index2 ++){
295
                
296
                // store the longest common subsequence length
297
                if ($this->sequence1[$index1 + $start - 1]
298
                    == $this->sequence2[$index2 + $start - 1]){
299
                        $table[$index1][$index2] = $table[$index1 - 1][$index2 - 1] + 1;
300
                }else{
301
                    $table[$index1][$index2] =
302
                    max($table[$index1 - 1][$index2], $table[$index1][$index2 - 1]);
303
                }
304
                
305
            }
306
        }
307
        
308
        return $table;
309
    }
310
    
311
   /**
312
    * Returns the partial diff for the specificed sequences, in reverse order.
313
    * 
314
    * @param array<int,array<int,int>> $table
315
    * @param int $start
316
    * @return array<int,array<int,int|string>>
317
    */
318
    private function generatePartialDiff(array $table, int $start) : array
319
    {
320
        //  initialise the diff
321
        $diff = array();
322
        
323
        // initialise the indices
324
        $index1 = count($table) - 1;
325
        $index2 = count($table[0]) - 1;
326
        
327
        // loop until there are no items remaining in either sequence
328
        while ($index1 > 0 || $index2 > 0)
329
        {
330
            // check what has happened to the items at these indices
331
            if (
332
                $index1 > 0 && $index2 > 0
333
                && $this->sequence1[$index1 + $start - 1]
334
                == $this->sequence2[$index2 + $start - 1]
335
            ){
336
                // update the diff and the indices
337
                $diff[] = array($this->sequence1[$index1 + $start - 1], self::UNMODIFIED);
338
                $index1 --;
339
                $index2 --;
340
            }
341
            elseif (
342
                $index2 > 0
343
                && $table[$index1][$index2] == $table[$index1][$index2 - 1]
344
            ) {
345
                // update the diff and the indices
346
                $diff[] = array($this->sequence2[$index2 + $start - 1], self::INSERTED);
347
                $index2 --;
348
            }
349
            else
350
            {
351
                // update the diff and the indices
352
                $diff[] = array($this->sequence1[$index1 + $start - 1], self::DELETED);
353
                $index1 --;
354
            }
355
        }
356
        
357
        // return the diff
358
        return $diff;
359
    }
360
    
361
   /**
362
    * Returns a diff as a string, where unmodified lines are prefixed by '  ',
363
    * deletions are prefixed by '- ', and insertions are prefixed by '+ '.
364
    * 
365
    * @param string $separator
366
    * @return string
367
    */
368
    public function toString(string $separator = "\n") : string
369
    {
370
        $renderer = new PlainText($this);
371
        $renderer->setSeparator($separator);
372
        
373
        return $renderer->render();
374
    }
375
    
376
   /**
377
    * Returns a diff as an HTML string, where unmodified lines are contained
378
    * within 'span' elements, deletions are contained within 'del' elements, and
379
    * insertions are contained within 'ins' elements.
380
    * 
381
    * @param string $separator
382
    * @return string
383
    */
384
    public function toHTML(string $separator = '<br>') : string
385
    {
386
        $renderer = new HTML($this);
387
        $renderer->setSeparator($separator);
388
        
389
        return $renderer->render();
390
    }
391
    
392
   /**
393
    * Returns a diff as an HTML table.
394
    * 
395
    * @param string $indentation
396
    * @param string $separator
397
    * @return string
398
    */
399
    public function toHTMLTable(string $indentation = '', string $separator = '<br>') : string
400
    {
401
        $renderer = new HTMLTable($this);
402
        $renderer->setIndentation($indentation);
403
        $renderer->setSeparator($separator);
404
        
405
        return $renderer->render();
406
    }
407
    
408
   /**
409
    * Disposes of the diff by clearing the stored strings,
410
    * to free memory until the class is destructed.
411
    * 
412
    * @return Diff
413
    */
414
    public function dispose() : Diff
415
    {
416
        $this->string1 = '';
417
        $this->string2 = '';
418
        
419
        $this->disposed = true;
420
        
421
        return $this;
422
    }
423
}
424
425