1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* File containing the class {@see ConvertHelper_TextComparer}. |
4
|
|
|
* |
5
|
|
|
* @package Application Utils |
6
|
|
|
* @subpackage ConvertHelper |
7
|
|
|
* @see ConvertHelper_TextComparer |
8
|
|
|
*/ |
9
|
|
|
|
10
|
|
|
declare(strict_types=1); |
11
|
|
|
|
12
|
|
|
namespace AppUtils; |
13
|
|
|
|
14
|
|
|
/** |
15
|
|
|
* Text comparison tool: can be used to calculate how |
16
|
|
|
* close two texts are from each other, using the |
17
|
|
|
* Levenshtein method. |
18
|
|
|
* |
19
|
|
|
* Converts the resulting match rating to a percentage |
20
|
|
|
* for easy processing. |
21
|
|
|
* |
22
|
|
|
* @package Application Utils |
23
|
|
|
* @subpackage ConvertHelper |
24
|
|
|
* @author Sebastian Mordziol <[email protected]> |
25
|
|
|
* |
26
|
|
|
* @see ConvertHelper::matchString() |
27
|
|
|
*/ |
28
|
|
|
class ConvertHelper_TextComparer implements Interface_Optionable |
29
|
|
|
{ |
30
|
|
|
use Traits_Optionable; |
31
|
|
|
|
32
|
|
|
const OPTION_MAX_LEVENSHTEIN_DISTANCE = 'maxLevenshtein'; |
33
|
|
|
const OPTION_PRECISION = 'precision'; |
34
|
|
|
|
35
|
|
|
public function getDefaultOptions() : array |
36
|
|
|
{ |
37
|
|
|
return array( |
38
|
|
|
self::OPTION_MAX_LEVENSHTEIN_DISTANCE => 10, |
39
|
|
|
self::OPTION_PRECISION => 1 |
40
|
|
|
); |
41
|
|
|
} |
42
|
|
|
|
43
|
|
|
public function getMaxDistance() : int |
44
|
|
|
{ |
45
|
|
|
return $this->getIntOption(self::OPTION_MAX_LEVENSHTEIN_DISTANCE); |
46
|
|
|
} |
47
|
|
|
|
48
|
|
|
public function getPrecision() : int |
49
|
|
|
{ |
50
|
|
|
return $this->getIntOption(self::OPTION_PRECISION); |
51
|
|
|
} |
52
|
|
|
|
53
|
|
|
/** |
54
|
|
|
* Sets the maximum Levensthein distance: results above this |
55
|
|
|
* value are ignored (will return a 0% match). |
56
|
|
|
* |
57
|
|
|
* @param int $distance |
58
|
|
|
* @return ConvertHelper_TextComparer |
59
|
|
|
*/ |
60
|
|
|
public function setMaxDistance(int $distance) : ConvertHelper_TextComparer |
61
|
|
|
{ |
62
|
|
|
return $this->setOption(self::OPTION_MAX_LEVENSHTEIN_DISTANCE, $distance); |
63
|
|
|
} |
64
|
|
|
|
65
|
|
|
/** |
66
|
|
|
* Sets the precision of the returned match percentage value. |
67
|
|
|
* |
68
|
|
|
* @param int $precision |
69
|
|
|
* @return ConvertHelper_TextComparer |
70
|
|
|
*/ |
71
|
|
|
public function setPrecision(int $precision) : ConvertHelper_TextComparer |
72
|
|
|
{ |
73
|
|
|
return $this->setOption(self::OPTION_PRECISION, $precision); |
74
|
|
|
} |
75
|
|
|
|
76
|
|
|
/** |
77
|
|
|
* Calculates a percentage match of the source string with the target string. |
78
|
|
|
* |
79
|
|
|
* NOTE: The percentage is based on the maximum Levensthein distance |
80
|
|
|
* option. As such, the smaller the calculated distance, the higher |
81
|
|
|
* the percentage. The maximum distance equals to 0%. |
82
|
|
|
* |
83
|
|
|
* @param string $source |
84
|
|
|
* @param string $target |
85
|
|
|
* @return float |
86
|
|
|
*/ |
87
|
|
|
public function match(string $source, string $target) : float |
88
|
|
|
{ |
89
|
|
|
// avoid doing this via levenshtein |
90
|
|
|
if($source === $target) { |
91
|
|
|
return 100; |
92
|
|
|
} |
93
|
|
|
|
94
|
|
|
$maxL = $this->getMaxDistance(); |
95
|
|
|
|
96
|
|
|
$diff = levenshtein($source, $target); |
97
|
|
|
if($diff > $maxL) { |
98
|
|
|
return 0; |
99
|
|
|
} |
100
|
|
|
|
101
|
|
|
$percent = $diff * 100 / ($maxL + 1); |
102
|
|
|
return round(100 - $percent, $this->getPrecision()); |
103
|
|
|
} |
104
|
|
|
} |
105
|
|
|
|