1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace Caxy\HtmlDiff\Strategy; |
4
|
|
|
|
5
|
|
|
use Caxy\HtmlDiff\Preprocessor; |
6
|
|
|
use Caxy\HtmlDiff\Util\MbStringUtil; |
7
|
|
|
|
8
|
|
|
class ListItemMatchStrategy implements MatchStrategyInterface |
9
|
|
|
{ |
10
|
|
|
/** |
11
|
|
|
* @var MbStringUtil |
12
|
|
|
*/ |
13
|
|
|
protected $stringUtil; |
14
|
|
|
|
15
|
|
|
/** |
16
|
|
|
* @var int |
17
|
|
|
*/ |
18
|
|
|
protected $similarityThreshold; |
19
|
|
|
|
20
|
|
|
/** |
21
|
|
|
* @var float |
22
|
|
|
*/ |
23
|
|
|
protected $lengthRatioThreshold; |
24
|
|
|
|
25
|
|
|
/** |
26
|
|
|
* @var float |
27
|
|
|
*/ |
28
|
|
|
protected $commonTextRatioThreshold; |
29
|
|
|
|
30
|
|
|
/** |
31
|
|
|
* ListItemMatchStrategy constructor. |
32
|
|
|
* |
33
|
|
|
* @param MbStringUtil $stringUtil |
34
|
|
|
* @param int $similarityThreshold |
35
|
|
|
* @param float $lengthRatioThreshold |
36
|
|
|
* @param float $commonTextRatioThreshold |
37
|
|
|
*/ |
38
|
7 |
|
public function __construct($stringUtil, $similarityThreshold = 80, $lengthRatioThreshold = 0.1, $commonTextRatioThreshold = 0.6) |
39
|
|
|
{ |
40
|
7 |
|
$this->stringUtil = $stringUtil; |
41
|
7 |
|
$this->similarityThreshold = $similarityThreshold; |
42
|
7 |
|
$this->lengthRatioThreshold = $lengthRatioThreshold; |
43
|
7 |
|
$this->commonTextRatioThreshold = $commonTextRatioThreshold; |
44
|
7 |
|
} |
45
|
|
|
|
46
|
|
|
/** |
47
|
|
|
* @param string $a |
48
|
|
|
* @param string $b |
49
|
|
|
* |
50
|
|
|
* @return bool |
51
|
|
|
*/ |
52
|
7 |
|
public function isMatch($a, $b) |
53
|
|
|
{ |
54
|
7 |
|
$percentage = null; |
55
|
|
|
|
56
|
|
|
// Strip tags and check similarity |
57
|
7 |
|
$aStripped = strip_tags($a); |
58
|
7 |
|
$bStripped = strip_tags($b); |
59
|
7 |
|
similar_text($aStripped, $bStripped, $percentage); |
60
|
|
|
|
61
|
7 |
|
if ($percentage >= $this->similarityThreshold) { |
62
|
7 |
|
return true; |
63
|
|
|
} |
64
|
|
|
|
65
|
|
|
// Check w/o stripped tags |
66
|
6 |
|
similar_text($a, $b, $percentage); |
67
|
6 |
|
if ($percentage >= $this->similarityThreshold) { |
68
|
|
|
return true; |
69
|
|
|
} |
70
|
|
|
|
71
|
|
|
// Check common prefix/ suffix length |
72
|
6 |
|
$aCleaned = trim($aStripped); |
73
|
6 |
|
$bCleaned = trim($bStripped); |
74
|
6 |
|
if ($this->stringUtil->strlen($aCleaned) === 0 || $this->stringUtil->strlen($bCleaned) === 0) { |
75
|
1 |
|
$aCleaned = $a; |
76
|
1 |
|
$bCleaned = $b; |
77
|
|
|
} |
78
|
6 |
|
if ($this->stringUtil->strlen($aCleaned) === 0 || $this->stringUtil->strlen($bCleaned) === 0) { |
79
|
|
|
return false; |
80
|
|
|
} |
81
|
6 |
|
$prefixIndex = Preprocessor::diffCommonPrefix($aCleaned, $bCleaned, $this->stringUtil); |
82
|
6 |
|
$suffixIndex = Preprocessor::diffCommonSuffix($aCleaned, $bCleaned, $this->stringUtil); |
83
|
|
|
|
84
|
|
|
// Use shorter string, and see how much of it is leftover |
85
|
6 |
|
$len = min($this->stringUtil->strlen($aCleaned), $this->stringUtil->strlen($bCleaned)); |
86
|
6 |
|
$remaining = $len - ($prefixIndex + $suffixIndex); |
87
|
6 |
|
$strLengthPercent = $len / max($this->stringUtil->strlen($a), $this->stringUtil->strlen($b)); |
88
|
|
|
|
89
|
6 |
|
if ($remaining === 0 && $strLengthPercent > $this->lengthRatioThreshold) { |
|
|
|
|
90
|
|
|
return true; |
91
|
|
|
} |
92
|
|
|
|
93
|
6 |
|
$percentCommon = ($prefixIndex + $suffixIndex) / $len; |
94
|
|
|
|
95
|
6 |
|
if ($strLengthPercent > 0.1 && $percentCommon > $this->commonTextRatioThreshold) { |
96
|
2 |
|
return true; |
97
|
|
|
} |
98
|
|
|
|
99
|
6 |
|
return false; |
100
|
|
|
} |
101
|
|
|
} |
102
|
|
|
|