Completed
Push — master ( a89c11...c36570 )
by raphael
04:54
created

Finder::threshold()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 6

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 3
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 6
ccs 3
cts 3
cp 1
rs 10
c 0
b 0
f 0
cc 1
nc 1
nop 1
crap 1
1
<?php
2
namespace SimilarText;
3
4
/**
5
 * Class Finder
6
 * @package SimilarText
7
 *
8
 * Thanks to: http://php.net/manual/fr/function.levenshtein.php#113702
9
 */
10
class Finder
11
{
12
    /**
13
     * @var string Needle
14
     */
15
    protected $needle;
16
17
    /**
18
     * @var array Haystack
19
     */
20
    protected $haystack;
21
22
    /**
23
     * Hold the sorted comparison stack.
24
     * 
25
     * @var array Haystack
26
     */
27
    protected $sorted_haystack;
28
29
    /**
30
     * @var null|int Threshold
31
     */
32
    protected $threshold;
33
34
    /**
35
     * Finder constructor.
36
     * 
37
     * @param string $needle
38
     * @param array $haystack
39
     * @return void
0 ignored issues
show
Comprehensibility Best Practice introduced by
Adding a @return annotation to constructors is generally not recommended as a constructor does not have a meaningful return value.

Adding a @return annotation to a constructor is not recommended, since a constructor does not have a meaningful return value.

Please refer to the PHP core documentation on constructors.

Loading history...
40
     */
41 15
    public function __construct($needle, $haystack)
42
    {
43 15
        $this->needle = $needle;
44 15
        $this->haystack = $haystack;
45 15
    }
46
47
    /**
48
     * Sort Haystack.
49
     * 
50
     * @return void
51
     */
52 9
    protected function sortHaystack()
53
    {
54 9
        $sorted_haystack = [];
55 9
        foreach ($this->haystack as $string) {
56 9
            $sorted_haystack[$string] = $this->levenshteinUtf8($this->needle, $string);
57
        }
58
59
        // Apply threshold when set.
60 9
        if(!is_null($this->threshold)){
61 3
            $sorted_haystack = array_filter($sorted_haystack, function ($score){
62 3
                return $score <= $this->threshold;
63 3
            });
64
        }
65
66 9
        asort($sorted_haystack);
67
68 9
        $this->sorted_haystack = $sorted_haystack;
69 9
    }
70
71
    /**
72
     * Apply threshold to filter only relevant results. The higher
73
     * the threshold the more results there will be returned.
74
     *
75
     * @param int|null $threshold
76
     * @return Finder
77
     */
78 3
    public function threshold($threshold = null)
79
    {
80 3
        $this->threshold = $threshold;
81
82 3
        return $this;
83
    }
84
85
    /**
86
     * Return the highest match.
87
     * 
88
     * @return mixed
89
     */
90 6
    public function first()
91
    {
92 6
        $this->sortHaystack();
93 6
        reset($this->sorted_haystack);
94 6
        return key($this->sorted_haystack);
95
    }
96
97
    /**
98
     * Return all strings in sorted match order.
99
     * 
100
     * @return array
101
     */
102 6
    public function all()
103
    {
104 6
        $this->sortHaystack();
105 6
        return array_keys($this->sorted_haystack);
106
    }
107
108
    /**
109
     * Return whether there is an exact match.
110
     * 
111
     * @return bool
112
     */
113 6
    public function hasExactMatch()
114
    {
115 6
        return in_array($this->needle, $this->haystack);
116
    }
117
118
    /**
119
     * Ensure a string only uses ascii characters.
120
     * 
121
     * @param string $str
122
     * @param array $map
123
     * @return string
124
     */
125 9
    protected function utf8ToExtendedAscii($str, &$map)
126
    {
127
        // Find all multi-byte characters (cf. utf-8 encoding specs).
128 9
        $matches = array();
129 9
        if (!preg_match_all('/[\xC0-\xF7][\x80-\xBF]+/', $str, $matches)) {
130 9
            return $str; // plain ascii string
131
        }
132
133
        // Update the encoding map with the characters not already met.
134 3
        foreach ($matches[0] as $mbc) {
135 3
            if (!isset($map[$mbc])) {
136 3
                $map[$mbc] = chr(128 + count($map));
137
            }
138
        }
139
140
        // Finally remap non-ascii characters.
141 3
        return strtr($str, $map);
142
    }
143
144
    /**
145
     * Calculate the levenshtein distance between two strings.
146
     * 
147
     * @param string $string1
148
     * @param string $string2
149
     * @return int
150
     */
151 9
    protected function levenshteinUtf8($string1, $string2)
152
    {
153 9
        $charMap = array();
154 9
        $string1 = $this->utf8ToExtendedAscii($string1, $charMap);
155 9
        $string2 = $this->utf8ToExtendedAscii($string2, $charMap);
156
157 9
        return levenshtein($string1, $string2);
158
    }
159
}
160