1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace Caxy\HtmlDiff\Strategy; |
4
|
|
|
|
5
|
|
|
use Caxy\HtmlDiff\Preprocessor; |
6
|
|
|
|
7
|
|
|
class ListItemMatchStrategy implements MatchStrategyInterface |
8
|
|
|
{ |
9
|
|
|
/** |
10
|
|
|
* @var int |
11
|
|
|
*/ |
12
|
|
|
protected $similarityThreshold; |
13
|
|
|
|
14
|
|
|
/** |
15
|
|
|
* @var float |
16
|
|
|
*/ |
17
|
|
|
protected $lengthRatioThreshold; |
18
|
|
|
|
19
|
|
|
/** |
20
|
|
|
* @var float |
21
|
|
|
*/ |
22
|
|
|
protected $commonTextRatioThreshold; |
23
|
|
|
|
24
|
|
|
/** |
25
|
|
|
* ListItemMatchStrategy constructor. |
26
|
|
|
* |
27
|
|
|
* @param int $similarityThreshold |
28
|
|
|
* @param float $lengthRatioThreshold |
29
|
|
|
* @param float $commonTextRatioThreshold |
30
|
|
|
*/ |
31
|
7 |
|
public function __construct($similarityThreshold = 80, $lengthRatioThreshold = 0.1, $commonTextRatioThreshold = 0.6) |
32
|
|
|
{ |
33
|
7 |
|
$this->similarityThreshold = $similarityThreshold; |
34
|
7 |
|
$this->lengthRatioThreshold = $lengthRatioThreshold; |
35
|
7 |
|
$this->commonTextRatioThreshold = $commonTextRatioThreshold; |
36
|
7 |
|
} |
37
|
|
|
|
38
|
|
|
/** |
39
|
|
|
* @param string $a |
40
|
|
|
* @param string $b |
41
|
|
|
* |
42
|
|
|
* @return bool |
43
|
|
|
*/ |
44
|
7 |
|
public function isMatch($a, $b) |
45
|
|
|
{ |
46
|
7 |
|
$percentage = null; |
47
|
|
|
|
48
|
|
|
// Strip tags and check similarity |
49
|
7 |
|
$aStripped = strip_tags($a); |
50
|
7 |
|
$bStripped = strip_tags($b); |
51
|
7 |
|
similar_text($aStripped, $bStripped, $percentage); |
52
|
|
|
|
53
|
7 |
|
if ($percentage >= $this->similarityThreshold) { |
54
|
7 |
|
return true; |
55
|
|
|
} |
56
|
|
|
|
57
|
|
|
// Check w/o stripped tags |
58
|
6 |
|
similar_text($a, $b, $percentage); |
59
|
6 |
|
if ($percentage >= $this->similarityThreshold) { |
60
|
|
|
return true; |
61
|
|
|
} |
62
|
|
|
|
63
|
|
|
// Check common prefix/ suffix length |
64
|
6 |
|
$aCleaned = trim($aStripped); |
65
|
6 |
|
$bCleaned = trim($bStripped); |
66
|
6 |
|
if (strlen($aCleaned) === 0 || strlen($bCleaned) === 0) { |
67
|
1 |
|
$aCleaned = $a; |
68
|
1 |
|
$bCleaned = $b; |
69
|
1 |
|
} |
70
|
6 |
|
if (strlen($aCleaned) === 0 || strlen($bCleaned) === 0) { |
71
|
|
|
return false; |
72
|
|
|
} |
73
|
6 |
|
$prefixIndex = Preprocessor::diffCommonPrefix($aCleaned, $bCleaned); |
74
|
6 |
|
$suffixIndex = Preprocessor::diffCommonSuffix($aCleaned, $bCleaned); |
75
|
|
|
|
76
|
|
|
// Use shorter string, and see how much of it is leftover |
77
|
6 |
|
$len = min(strlen($aCleaned), strlen($bCleaned)); |
78
|
6 |
|
$remaining = $len - ($prefixIndex + $suffixIndex); |
79
|
6 |
|
$strLengthPercent = $len / max(strlen($a), strlen($b)); |
80
|
|
|
|
81
|
6 |
|
if ($remaining === 0 && $strLengthPercent > $this->lengthRatioThreshold) { |
82
|
|
|
return true; |
83
|
|
|
} |
84
|
|
|
|
85
|
6 |
|
$percentCommon = ($prefixIndex + $suffixIndex) / $len; |
86
|
|
|
|
87
|
6 |
|
if ($strLengthPercent > 0.1 && $percentCommon > $this->commonTextRatioThreshold) { |
|
|
|
|
88
|
2 |
|
return true; |
89
|
|
|
} |
90
|
|
|
|
91
|
6 |
|
return false; |
92
|
|
|
} |
93
|
|
|
} |
94
|
|
|
|