Passed
Push — master ( ddbfa4...e1019e )
by Josh
03:27
created

lib/Caxy/HtmlDiff/ListDiff.php (5 issues)

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
3
namespace Caxy\HtmlDiff;
4
5
class ListDiff extends HtmlDiff
6
{
7
    /**
8
     * This is the minimum percentage a list item can match its counterpart in order to be considered a match.
9
     *
10
     * @var int
11
     */
12
    protected static $listMatchThreshold = 35;
13
14
    /** @var array */
15
    protected $listWords = array();
16
17
    /** @var array */
18
    protected $listTags = array();
19
20
    /** @var array */
21
    protected $listIsolatedDiffTags = array();
22
23
    /** @var array */
24
    protected $isolatedDiffTags = array(
25
        'ol' => '[[REPLACE_ORDERED_LIST]]',
26
        'ul' => '[[REPLACE_UNORDERED_LIST]]',
27
        'dl' => '[[REPLACE_DEFINITION_LIST]]',
28
    );
29
30
    /**
31
     * List (li) placeholder.
32
     *
33
     * @var string
34
     */
35
    protected static $listPlaceHolder = '[[REPLACE_LIST_ITEM]]';
36
37
    /**
38
     * Holds the type of list this is ol, ul, dl.
39
     *
40
     * @var string
41
     */
42
    protected $listType;
43
44
    /**
45
     * Used to hold what type of list the old list is.
46
     *
47
     * @var string
48
     */
49
    protected $oldListType;
50
51
    /**
52
     * Used to hold what type of list the new list is.
53
     *
54
     * @var string
55
     */
56
    protected $newListType;
57
58
    /**
59
     * Hold the old/new content of the content of the list.
60
     *
61
     * @var array
62
     */
63
    protected $list;
64
65
    /**
66
     * Contains the old/new child lists content within this list.
67
     *
68
     * @var array
69
     */
70
    protected $childLists;
71
72
    /**
73
     * Contains the old/new text strings that match.
74
     *
75
     * @var array
76
     */
77
    protected $textMatches;
78
79
    /**
80
     * Contains the indexed start positions of each list within word string.
81
     *
82
     * @var array
83
     */
84
    protected $listsIndex;
85
86
    /**
87
     * Array that holds the index of all content outside of the array. Format is array(index => content).
88
     *
89
     * @var array
90
     */
91
    protected $contentIndex = array();
92
93
    /** 
94
     * Holds the order and data on each list/content block within this list.
95
     *
96
     * @var array
97
     */
98
    protected $diffOrderIndex = array();
99
100
    /**
101
     * This is the opening ol,ul,dl ist tag.
102
     *
103
     * @var string
104
     */
105
    protected $oldParentTag;
106
107
    /**
108
     * This is the opening ol,ul,dl ist tag.
109
     *
110
     * @var string
111
     */
112
    protected $newParentTag;
113
114
    /**
115
     * We're using the same functions as the parent in build() to get us to the point of
116
     * manipulating the data within this class.
117
     *
118
     * @return string
119
     */
120
    public function build()
121
    {
122
        // Use the parent functions to get the data we need organized.
123
        $this->splitInputsToWords();
124
        $this->replaceIsolatedDiffTags();
125
        $this->indexNewWords();
126
        // Now use the custom functions in this class to use the data and generate our diff.
127
        $this->diffListContent();
128
129
        return $this->content;
130
    }
131
132
    /**
133
     * Calls to the actual custom functions of this class, to diff list content.
134
     */
135
    protected function diffListContent()
136
    {
137
        /* Format the list we're focusing on.
138
         * There will always be one list, though passed as an array with one item.
139
         * Format this to only have the list contents, outside of the array.
140
         */
141
        $this->formatThisListContent();
142
143
        /* Build an index of content outside of list tags.
144
         */
145
        $this->indexContent();
146
147
        /* In cases where we're dealing with nested lists,
148
         * make sure we use placeholders to replace the nested lists
149
         */
150
        $this->replaceListIsolatedDiffTags();
151
152
        /* Build a list of matches we can reference when we diff the contents of the lists.
153
         * This is needed so that we each NEW list node is matched against the best possible OLD list node/
154
         * It helps us determine whether the list was added, removed, or changed.
155
         */
156
        $this->matchAndCompareLists();
157
158
        /* Go through the list of matches, content, and diff each.
159
         * Any nested lists would be sent to parent's diffList function, which creates a new listDiff class.
160
         */
161
        $this->diff();
162
    }
163
164
    /**
165
     * This function is used to populate both contentIndex and diffOrderIndex arrays for use in the diff function.
166
     */
167
    protected function indexContent()
168
    {
169
        $this->contentIndex = array();
170
        $this->diffOrderIndex = array('new' => array(), 'old' => array());
171
        foreach ($this->list as $type => $list) {
172
            $this->contentIndex[$type] = array();
173
            $depth = 0;
174
            $parentList = 0;
175
            $position = 0;
176
            $newBlock = true;
177
            $listCount = 0;
178
            $contentCount = 0;
179
            foreach ($list as $key => $word) {
180
                if (!$parentList && $this->isOpeningListTag($word)) {
181
                    ++$depth;
182
183
                    $this->diffOrderIndex[$type][] = array('type' => 'list', 'position' => $listCount, 'index' => $key);
184
                    ++$listCount;
185
                    continue;
186
                }
187
188
                if (!$parentList && $this->isClosingListTag($word)) {
189
                    --$depth;
190
191
                    if ($depth == 0) {
192
                        $newBlock = true;
193
                    }
194
                    continue;
195
                }
196
197
                if ($this->isOpeningIsolatedDiffTag($word)) {
198
                    ++$parentList;
199
                }
200
201
                if ($this->isClosingIsolatedDiffTag($word)) {
202
                    --$parentList;
203
                }
204
205
                if ($depth == 0) {
206
                    if ($newBlock && !array_key_exists($contentCount, $this->contentIndex[$type])) {
207
                        $this->diffOrderIndex[$type][] = array('type' => 'content', 'position' => $contentCount, 'index' => $key);
208
209
                        $position = $contentCount;
210
                        $this->contentIndex[$type][$position] = '';
211
                        ++$contentCount;
212
                    }
213
214
                    $this->contentIndex[$type][$position] .= $word;
215
                }
216
217
                $newBlock = false;
218
            }
219
        }
220
    }
221
222
    /*
223
     * This function is used to remove the wrapped ul, ol, or dl characters from this list
224
     * and sets the listType as ul, ol, or dl, so that we can use it later.
225
     * $list is being set here as well, as an array with the old and new version of this list content.
226
     */
227
    protected function formatThisListContent()
228
    {
229
        $formatArray = array(
230
            array('type' => 'old', 'array' => $this->oldIsolatedDiffTags),
231
            array('type' => 'new', 'array' => $this->newIsolatedDiffTags),
232
        );
233
234
        foreach ($formatArray as $item) {
235
            $values = array_values($item['array']);
236
            $this->list[$item['type']] = count($values)
237
                ? $this->formatList($values[0], $item['type'])
238
                : array();
239
        }
240
241
        $this->listType = $this->newListType ?: $this->oldListType;
242
    }
243
244
    /**
245
     * @param array  $arrayData
246
     * @param string $index
247
     *
248
     * @return array
249
     */
250
    protected function formatList(array $arrayData, $index = 'old')
251
    {
252
        $openingTag = $this->getAndStripTag($arrayData[0]);
253
        $closingTag = $this->getAndStripTag($arrayData[count($arrayData) - 1]);
254
255
        if (array_key_exists($openingTag, $this->isolatedDiffTags) &&
256
            array_key_exists($closingTag, $this->isolatedDiffTags)
257
        ) {
258 View Code Duplication
            if ($index == 'new' && $this->isOpeningTag($arrayData[0])) {
259
                $this->newParentTag = $arrayData[0];
260
                $this->newListType = $this->getAndStripTag($arrayData[0]);
261
            }
262
263 View Code Duplication
            if ($index == 'old' && $this->isOpeningTag($arrayData[0])) {
264
                $this->oldParentTag = $arrayData[0];
265
                $this->oldListType = $this->getAndStripTag($arrayData[0]);
266
            }
267
268
            array_shift($arrayData);
269
            array_pop($arrayData);
270
        }
271
272
        return $arrayData;
273
    }
274
275
    /**
276
     * @param string $tag
277
     *
278
     * @return string
279
     */
280
    protected function getAndStripTag($tag)
281
    {
282
        $content = explode(' ', preg_replace('/[^A-Za-z0-9 ]/', '', $tag));
283
284
        return $content[0];
285
    }
286
287
    protected function matchAndCompareLists()
288
    {
289
        /*
290
         * Build the an array (childLists) to hold the contents of the list nodes within this list.
291
         * This only holds the content of each list node.
292
         */
293
        $this->buildChildLists();
294
295
        /*
296
         * Index the list, starting positions, so that we can refer back to it later.
297
         * This is used to see where one list node starts and another ends.
298
         */
299
        $this->indexLists();
300
301
        /*
302
         * Compare the lists and build $textMatches array with the matches.
303
         * Each match is an array of "new" and "old" keys, with the id of the list it matches to.
304
         * Whenever there is no match (in cases where a new list item was added or removed), null is used instead of the id.
305
         */
306
        $this->compareChildLists();
307
    }
308
309
    /**
310
     * Creates matches for lists.
311
     */
312
    protected function compareChildLists()
313
    {
314
        $this->createNewOldMatches($this->childLists, $this->textMatches, 'content');
315
    }
316
317
    /**
318
     * Abstracted function used to match items in an array.
319
     * This is used primarily for populating lists matches.
320
     * 
321
     * @param array       $listArray
322
     * @param array       $resultArray
323
     * @param string|null $column
324
     */
325
    protected function createNewOldMatches(&$listArray, &$resultArray, $column = null)
326
    {
327
        // Always compare the new against the old.
328
        // Compare each new string against each old string.
329
        $bestMatchPercentages = array();
330
331
        foreach ($listArray['new'] as $thisKey => $thisList) {
332
            $bestMatchPercentages[$thisKey] = array();
333
            foreach ($listArray['old'] as $thatKey => $thatList) {
334
                // Save the percent amount each new list content compares against the old list content.
335
                similar_text(
336
                    $column ? $thisList[$column] : $thisList,
337
                    $column ? $thatList[$column] : $thatList,
338
                    $percentage
339
                );
340
341
                $bestMatchPercentages[$thisKey][] = $percentage;
342
            }
343
        }
344
345
        // Sort each array by value, highest percent to lowest percent.
346
        foreach ($bestMatchPercentages as &$thisMatch) {
347
            arsort($thisMatch);
348
        }
349
350
        // Build matches.
351
        $matches = array();
352
        $taken = array();
353
        $takenItems = array();
354
        $absoluteMatch = 100;
355
        foreach ($bestMatchPercentages as $item => $percentages) {
356
            $highestMatch = -1;
357
            $highestMatchKey = -1;
358
            $takeItemKey = -1;
359
360
            foreach ($percentages as $key => $percent) {
361
                // Check that the key for the percentage is not already taken and the new percentage is higher.
362
                if (!in_array($key, $taken) && $percent > $highestMatch) {
363
                    // If an absolute match, choose this one.
364
                    if ($percent == $absoluteMatch) {
365
                        $highestMatch = $percent;
0 ignored issues
show
$highestMatch is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
366
                        $highestMatchKey = $key;
367
                        $takenItemKey = $item;
368
                        break;
369
                    } else {
370
                        // Get all the other matces for the same $key
371
                        $columns = $this->getArrayColumn($bestMatchPercentages, $key);
372
                        $thisBestMatches = array_filter(
373
                            $columns,
374
                            function ($v) use ($percent) {
375
                                return $v > $percent;
376
                            }
377
                        );
378
379
                        arsort($thisBestMatches);
380
381
                        /*
382
                         * If the list item does not meet the threshold, it will not be considered a match.
383
                         */
384
                        if ($percent >= self::$listMatchThreshold) {
385
                            // If no greater amounts, use this one.
386
                            if (!count($thisBestMatches)) {
387
                                $highestMatch = $percent;
0 ignored issues
show
$highestMatch is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
388
                                $highestMatchKey = $key;
389
                                $takenItemKey = $item;
390
                                break;
391
                            }
392
393
                            // Loop through, comparing only the items that have not already been added.
394
                            foreach ($thisBestMatches as $k => $v) {
395
                                if (in_array($k, $takenItems)) {
396
                                    $highestMatch = $percent;
0 ignored issues
show
$highestMatch is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
397
                                    $highestMatchKey = $key;
398
                                    $takenItemKey = $item;
399
                                    break(2);
400
                                }
401
                            }
402
                        }
403
                    }
404
                }
405
            }
406
407
            $matches[] = array('new' => $item, 'old' => $highestMatchKey > -1 ? $highestMatchKey : null);
408
            if ($highestMatchKey > -1) {
409
                $taken[] = $highestMatchKey;
410
                $takenItems[] = $takenItemKey;
411
            }
412
        }
413
414
        /* Checking for removed items. Basically, if a list item from the old lists is removed
415
         * it will not be accounted for, and will disappear in the results altogether.
416
         * Loop through all the old lists, any that has not been added, will be added as:
417
         * array( new => null, old => oldItemId )
418
         */
419
        $matchColumns = $this->getArrayColumn($matches, 'old');
420
        foreach ($listArray['old'] as $thisKey => $thisList) {
421
            if (!in_array($thisKey, $matchColumns)) {
422
                $matches[] = array('new' => null, 'old' => $thisKey);
423
            }
424
        }
425
426
        // Save the matches.
427
        $resultArray = $matches;
428
    }
429
430
    /**
431
     * This fuction is exactly like array_column. This is added for PHP versions that do not support array_column.
432
     *
433
     * @param array $targetArray
434
     * @param mixed $key
435
     *
436
     * @return array
437
     */
438
    protected function getArrayColumn(array $targetArray, $key)
439
    {
440
        $data = array();
441
        foreach ($targetArray as $item) {
442
            if (array_key_exists($key, $item)) {
443
                $data[] = $item[$key];
444
            }
445
        }
446
447
        return $data;
448
    }
449
450
    /**
451
     * Build multidimensional array holding the contents of each list node, old and new.
452
     */
453
    protected function buildChildLists()
454
    {
455
        $this->childLists['old'] = $this->getListsContent($this->list['old']);
456
        $this->childLists['new'] = $this->getListsContent($this->list['new']);
457
    }
458
459
    /**
460
     * Diff the actual contents of the lists against their matched counterpart.
461
     * Build the content of the class.
462
     */
463
    protected function diff()
464
    {
465
        // Add the opening parent node from listType. So if ol, <ol>, etc.
466
        $this->content = $this->addListTypeWrapper();
467
468
        $oldIndexCount = 0;
469
        $diffOrderNewKeys = array_keys($this->diffOrderIndex['new']);
470
        foreach ($this->diffOrderIndex['new'] as $key => $index) {
471
            if ($index['type'] == 'list') {
472
473
                // Check to see if an old list was deleted.
474
                $oldMatch = $this->getArrayByColumnValue($this->textMatches, 'old', $index['position']);
475 View Code Duplication
                if ($oldMatch && $oldMatch['new'] === null) {
0 ignored issues
show
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
476
                    $newList = '';
477
                    $oldList = $this->getListByMatch($oldMatch, 'old');
478
                    $this->content .= $this->addListElementToContent($newList, $oldList, $oldMatch, $index, 'old');
479
                }
480
481
                $match = $this->getArrayByColumnValue($this->textMatches, 'new', $index['position']);
482
                $newList = $this->childLists['new'][$match['new']];
483
                $oldList = $this->getListByMatch($match, 'old');
484
                $this->content .= $this->addListElementToContent($newList, $oldList, $match, $index, 'new');
485
            }
486
487
            if ($index['type'] == 'content') {
488
                $this->content .= $this->addContentElementsToContent($oldIndexCount, $index['position']);
489
            }
490
491
            ++$oldIndexCount;
492
493
            if ($key == $diffOrderNewKeys[count($diffOrderNewKeys) - 1]) {
494
                foreach ($this->diffOrderIndex['old'] as $oldKey => $oldIndex) {
495
                    if ($oldKey > $key) {
496
                        if ($oldIndex['type'] == 'list') {
497
                            $oldMatch = $this->getArrayByColumnValue($this->textMatches, 'old', $oldIndex['position']);
498 View Code Duplication
                            if ($oldMatch && $oldMatch['new'] === null) {
0 ignored issues
show
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
499
                                $newList = '';
500
                                $oldList = $this->getListByMatch($oldMatch, 'old');
501
                                $this->content .= $this->addListElementToContent($newList, $oldList, $oldMatch, $oldIndex, 'old');
502
                            }
503
                        } else {
504
                            $this->content .= $this->addContentElementsToContent($oldKey);
505
                        }
506
                    }
507
                }
508
            }
509
        }
510
511
        // Add the closing parent node from listType. So if ol, </ol>, etc.
512
        $this->content .= $this->addListTypeWrapper(false);
513
    }
514
515
    /**
516
     * @param string $newList
517
     * @param string $oldList
518
     * @param array  $match
519
     * @param array  $index
520
     *
521
     * @return string
522
     */
523
    protected function addListElementToContent($newList, $oldList, array $match, array $index, $type)
524
    {
525
        $content = $this->list[$type][$index['index']];
526
        $content .= $this->processPlaceholders(
527
            $this->diffElements(
528
                $this->convertListContentArrayToString($oldList),
529
                $this->convertListContentArrayToString($newList),
530
                false
531
            ),
532
            $match
533
        );
534
        $content .= '</li>';
535
536
        return $content;
537
    }
538
539
    /**
540
     * @param int      $oldIndexCount
541
     * @param null|int $newPosition
542
     *
543
     * @return string
544
     */
545
    protected function addContentElementsToContent($oldIndexCount, $newPosition = null)
546
    {
547
        $newContent = $newPosition && array_key_exists($newPosition, $this->contentIndex['new'])
548
            ? $this->contentIndex['new'][$newPosition]
549
            : '';
550
551
        $oldDiffOrderIndexMatch = array_key_exists($oldIndexCount, $this->diffOrderIndex['old'])
552
            ? $this->diffOrderIndex['old'][$oldIndexCount]
553
            : '';
554
555
        $oldContent = $oldDiffOrderIndexMatch && array_key_exists($oldDiffOrderIndexMatch['position'], $this->contentIndex['old'])
556
            ? $this->contentIndex['old'][$oldDiffOrderIndexMatch['position']]
557
            : '';
558
559
        $diffObject = new HtmlDiff($oldContent, $newContent);
560
        $content = $diffObject->build();
561
562
        return $content;
563
    }
564
565
    /**
566
     * @param array  $match
567
     * @param string $type
568
     *
569
     * @return array|string
570
     */
571
    protected function getListByMatch(array $match, $type = 'new')
572
    {
573
        return array_key_exists($match[$type], $this->childLists[$type])
574
            ? $this->childLists[$type][$match[$type]]
575
            : '';
576
    }
577
578
    /**
579
     * This function replaces array_column function in PHP for older versions of php.
580
     * 
581
     * @param array  $parentArray
582
     * @param string $column
583
     * @param mixed  $value
584
     * @param bool   $allMatches
585
     *
586
     * @return array|bool
587
     */
588
    protected function getArrayByColumnValue($parentArray, $column, $value, $allMatches = false)
589
    {
590
        $returnArray = array();
591
        foreach ($parentArray as $array) {
592
            if (array_key_exists($column, $array) && $array[$column] == $value) {
593
                if ($allMatches) {
594
                    $returnArray[] = $array;
595
                } else {
596
                    return $array;
597
                }
598
            }
599
        }
600
601
        return $allMatches ? $returnArray : false;
602
    }
603
604
    /**
605
     * Converts the list (li) content arrays to string.
606
     *
607
     * @param array $listContentArray
608
     *
609
     * @return string
610
     */
611
    protected function convertListContentArrayToString($listContentArray)
612
    {
613
        if (!is_array($listContentArray)) {
614
            return $listContentArray;
615
        }
616
617
        $content = array();
618
619
        $words = explode(' ', $listContentArray['content']);
620
        $nestedListCount = 0;
621
        foreach ($words as $word) {
622
            $match = $word == self::$listPlaceHolder;
623
624
            $content[] = $match
625
                ? '<li>'.$this->convertListContentArrayToString($listContentArray['kids'][$nestedListCount]).'</li>'
626
                : $word;
627
628
            if ($match) {
629
                ++$nestedListCount;
630
            }
631
        }
632
633
        return implode(' ', $content);
634
    }
635
636
    /**
637
     * Return the contents of each list node.
638
     * Process any placeholders for nested lists.
639
     *
640
     * @param string $text
641
     * @param array  $matches
642
     *
643
     * @return string
644
     */
645
    protected function processPlaceholders($text, array $matches)
646
    {
647
        // Prepare return
648
        $returnText = array();
649
        // Save the contents of all list nodes, new and old.
650
        $contentVault = array(
651
            'old' => $this->getListContent('old', $matches),
652
            'new' => $this->getListContent('new', $matches),
653
        );
654
655
        $count = 0;
656
        // Loop through the text checking for placeholders. If a nested list is found, create a new ListDiff object for it.
657
        foreach (explode(' ', $text) as $word) {
658
            $preContent = $this->checkWordForDiffTag($this->stripNewLine($word));
659
660
            if (in_array(
661
                    is_array($preContent) ? $preContent[1] : $preContent,
662
                    $this->isolatedDiffTags
663
                )
664
            ) {
665
                $oldText = array_key_exists($count, $contentVault['old']) ? implode('', $contentVault['old'][$count]) : '';
666
                $newText = array_key_exists($count, $contentVault['new']) ? implode('', $contentVault['new'][$count]) : '';
667
                $content = $this->diffList($oldText, $newText);
668
                ++$count;
669
            } else {
670
                $content = $preContent;
671
            }
672
673
            $returnText[] = is_array($preContent) ? $preContent[0].$content.$preContent[2] : $content;
674
        }
675
        // Return the result.
676
        return implode(' ', $returnText);
677
    }
678
679
    /**
680
     * Checks to see if a diff tag is in string.
681
     *
682
     * @param string $word
683
     *
684
     * @return string
685
     */
686
    protected function checkWordForDiffTag($word)
687
    {
688
        foreach ($this->isolatedDiffTags as $diffTag) {
689
            if (strpos($word, $diffTag) > -1) {
690
                $position = strpos($word, $diffTag);
691
                $length = strlen($diffTag);
692
                $result = array(
693
                    substr($word, 0, $position),
694
                    $diffTag,
695
                    substr($word, ($position + $length)),
696
                );
697
698
                return $result;
699
            }
700
        }
701
702
        return $word;
703
    }
704
705
    /**
706
     * Used to remove new lines.
707
     *
708
     * @param string $text
709
     *
710
     * @return string
711
     */
712
    protected function stripNewLine($text)
713
    {
714
        return trim(preg_replace('/\s\s+/', ' ', $text));
715
    }
716
717
    /**
718
     * Grab the list content using the listsIndex array.
719
     *
720
     * @param string $indexKey
721
     * @param array  $matches
722
     *
723
     * @return array
724
     */
725
    protected function getListContent($indexKey = 'new', array $matches)
726
    {
727
        $bucket = array();
728
729
        if (isset($matches[$indexKey]) && $matches[$indexKey] !== null) {
730
            $start = $this->listsIndex[$indexKey][$matches[$indexKey]];
731
            $stop = $this->findEndForIndex($this->list[$indexKey], $start);
732
733
            for ($x = $start; $x <= $stop; ++$x) {
734
                if (in_array($this->list[$indexKey][$x], $this->isolatedDiffTags)) {
735
                    $bucket[] = $this->listIsolatedDiffTags[$indexKey][$x];
736
                }
737
            }
738
        }
739
740
        return $bucket;
741
    }
742
743
    /**
744
     * Finds the end of list within its index.
745
     *
746
     * @param array $index
747
     * @param int   $start
748
     *
749
     * @return int
750
     */
751
    protected function findEndForIndex(array $index, $start)
752
    {
753
        $array = array_splice($index, $start);
754
        $count = 0;
755
        foreach ($array as $key => $item) {
756
            if ($this->isOpeningListTag($item)) {
757
                ++$count;
758
            }
759
760
            if ($this->isClosingListTag($item)) {
761
                --$count;
762
                if ($count === 0) {
763
                    return $start + $key;
764
                }
765
            }
766
        }
767
768
        return $start + count($array);
769
    }
770
771
    /**
772
     * indexLists.
773
     *
774
     * Index the list, starting positions, so that we can refer back to it later.
775
     * This is used to see where one list node starts and another ends.
776
     */
777
    protected function indexLists()
778
    {
779
        $this->listsIndex = array();
780
        $count = 0;
781
        foreach ($this->list as $type => $list) {
782
            $this->listsIndex[$type] = array();
783
784
            foreach ($list as $key => $listItem) {
785
                if ($this->isOpeningListTag($listItem)) {
786
                    ++$count;
787
                    if ($count === 1) {
788
                        $this->listsIndex[$type][] = $key;
789
                    }
790
                }
791
792
                if ($this->isClosingListTag($listItem)) {
793
                    --$count;
794
                }
795
            }
796
        }
797
    }
798
799
    /**
800
     * Adds the opening or closing list html element, based on listType.
801
     *
802
     * @param bool $opening
803
     *
804
     * @return string
805
     */
806
    protected function addListTypeWrapper($opening = true)
807
    {
808
        if ($opening) {
809
            return $this->newParentTag ?: $this->oldParentTag;
810
        } else {
811
            return '<'.(!$opening ? '/' : '').$this->listType.'>';
812
        }
813
    }
814
815
    /**
816
     * Replace nested list with placeholders.
817
     */
818
    public function replaceListIsolatedDiffTags()
819
    {
820
        $this->listIsolatedDiffTags['old'] = $this->createIsolatedDiffTagPlaceholders($this->list['old']);
821
        $this->listIsolatedDiffTags['new'] = $this->createIsolatedDiffTagPlaceholders($this->list['new']);
822
    }
823
824
    /**
825
     * Grab the contents of a list node.
826
     *
827
     * @param array $contentArray
828
     * @param bool  $stripTags
829
     *
830
     * @return array
831
     */
832
    protected function getListsContent(array $contentArray, $stripTags = true)
833
    {
834
        $lematches = array();
835
        $arrayDepth = 0;
836
        $nestedCount = array();
837
        foreach ($contentArray as $index => $word) {
838
            if ($this->isOpeningListTag($word)) {
839
                ++$arrayDepth;
840
                if (!array_key_exists($arrayDepth, $nestedCount)) {
841
                    $nestedCount[$arrayDepth] = 1;
842
                } else {
843
                    ++$nestedCount[$arrayDepth];
844
                }
845
                continue;
846
            }
847
848
            if ($this->isClosingListTag($word)) {
849
                --$arrayDepth;
850
                continue;
851
            }
852
853
            if ($arrayDepth > 0) {
854
                $this->addStringToArrayByDepth($word, $lematches, $arrayDepth, 1, $nestedCount);
855
            }
856
        }
857
858
        return $lematches;
859
    }
860
861
    /**
862
     * This function helps build the list content array of a list.
863
     * If a list has another list within it, the inner list is replaced with the list placeholder and the inner list
864
     * content becomes a child of the parent list.
865
     * This goes recursively down.
866
     *
867
     * @param string $word
868
     * @param array  $array
869
     * @param int    $targetDepth
870
     * @param int    $thisDepth
871
     * @param array  $nestedCount
872
     */
873
    protected function addStringToArrayByDepth($word, array &$array, $targetDepth, $thisDepth, array $nestedCount)
874
    {
875
        // determine what depth we're at
876
        if ($targetDepth == $thisDepth) {
877
            // decide on what to do at this level
878
879
            if (array_key_exists('content', $array)) {
880
                $array['content'] .= $word;
881
            } else {
882
                // if we're on depth 1, add content
883
                if ($nestedCount[$targetDepth] > count($array)) {
884
                    $array[] = array('content' => '', 'kids' => array());
885
                }
886
887
                $array[count($array) - 1]['content'] .= $word;
888
            }
889
        } else {
890
891
            // create first kid if not exist
892
            $newArray = array('content' => '', 'kids' => array());
893
894
            if (array_key_exists('kids', $array)) {
895
                if ($nestedCount[$targetDepth] > count($array['kids'])) {
896
                    $array['kids'][] = $newArray;
897
                    $array['content'] .= self::$listPlaceHolder;
898
                }
899
900
                // continue to the next depth
901
                ++$thisDepth;
902
903
                // get last kid and send to next depth
904
905
                $this->addStringToArrayByDepth(
906
                    $word,
907
                    $array['kids'][count($array['kids']) - 1],
908
                    $targetDepth,
909
                    $thisDepth,
910
                    $nestedCount
911
                );
912
            } else {
913
                if ($nestedCount[$targetDepth] > count($array[count($array) - 1]['kids'])) {
914
                    $array[count($array) - 1]['kids'][] = $newArray;
915
                    $array[count($array) - 1]['content'] .= self::$listPlaceHolder;
916
                }
917
                // continue to the next depth
918
                ++$thisDepth;
919
920
                // get last kid and send to next depth
921
922
                $this->addStringToArrayByDepth(
923
                    $word,
924
                    $array[count($array) - 1]['kids'][count($array[count($array) - 1]['kids']) - 1],
925
                    $targetDepth,
926
                    $thisDepth,
927
                    $nestedCount
928
                );
929
            }
930
        }
931
    }
932
933
    /**
934
     * Checks if text is opening list tag.
935
     *
936
     * @param string $item
937
     *
938
     * @return bool
939
     */
940
    protected function isOpeningListTag($item)
941
    {
942
        if (preg_match('#<li[^>]*>\\s*#iU', $item)) {
943
            return true;
944
        }
945
946
        return false;
947
    }
948
949
    /**
950
     * Check if text is closing list tag.
951
     *
952
     * @param string $item
953
     *
954
     * @return bool
955
     */
956
    protected function isClosingListTag($item)
957
    {
958
        if (preg_match('#</li[^>]*>\\s*#iU', $item)) {
959
            return true;
960
        }
961
962
        return false;
963
    }
964
}
965