@@ -6,321 +6,321 @@ |
||
| 6 | 6 | class DamerauLevenshteinTest extends \PHPUnit_Framework_TestCase |
| 7 | 7 | { |
| 8 | 8 | |
| 9 | - /** |
|
| 10 | - * Tests `getSimilarity`. |
|
| 11 | - * |
|
| 12 | - * @return void |
|
| 13 | - */ |
|
| 14 | - public function testGetSimilarity() |
|
| 15 | - { |
|
| 16 | - $inputs = [ |
|
| 17 | - ['foo', 'foo'], |
|
| 18 | - ['foo', 'fooo'], |
|
| 19 | - ['foo', 'bar'], |
|
| 20 | - |
|
| 21 | - ['123', '12'], |
|
| 22 | - ['qwe', 'qwa'], |
|
| 23 | - ['awe', 'qwe'], |
|
| 24 | - ['фыв', 'фыа'], |
|
| 25 | - ['vvvqw', 'vvvwq'], |
|
| 26 | - ['qw', 'wq'], |
|
| 27 | - ['qq', 'ww'], |
|
| 28 | - ['qw', 'qw'], |
|
| 29 | - ['пионер', 'плеер'], |
|
| 30 | - ['пионер', 'пионеер'], |
|
| 31 | - ['пионер', 'поинер'], |
|
| 32 | - ['pioner', 'poner'], |
|
| 33 | - ['пионер', 'понер'], |
|
| 34 | - ]; |
|
| 35 | - $outputs = [ |
|
| 36 | - 0, |
|
| 37 | - 1, |
|
| 38 | - 3, |
|
| 39 | - |
|
| 40 | - 1, |
|
| 41 | - 1, |
|
| 42 | - 1, |
|
| 43 | - 1, |
|
| 44 | - 1, |
|
| 45 | - 1, |
|
| 46 | - 2, |
|
| 47 | - 0, |
|
| 48 | - 3, |
|
| 49 | - 1, |
|
| 50 | - 1, |
|
| 51 | - 1, |
|
| 52 | - 1, |
|
| 53 | - ]; |
|
| 54 | - |
|
| 55 | - foreach ($inputs as $i => $input) { |
|
| 56 | - $DamerauLevenshtein = new DamerauLevenshtein($input[0], $input[1]); |
|
| 57 | - $result = $DamerauLevenshtein->getSimilarity(); |
|
| 58 | - $expected = $outputs[$i]; |
|
| 59 | - |
|
| 60 | - $this->assertSame($expected, $result); |
|
| 61 | - } |
|
| 62 | - } |
|
| 63 | - |
|
| 64 | - /** |
|
| 65 | - * Tests `getInsCost`. |
|
| 66 | - * |
|
| 67 | - * @return void |
|
| 68 | - */ |
|
| 69 | - public function testGetInsCost() |
|
| 70 | - { |
|
| 71 | - list($firstString, $secondString) = $this->getDefaultStrings(); |
|
| 72 | - list($insCost, $delCost, $subCost, $transCost) = $this->getDefaultCosts(); |
|
| 73 | - |
|
| 74 | - // Default insert cost |
|
| 75 | - |
|
| 76 | - $DamerauLevenshtein = new DamerauLevenshtein($firstString, $secondString); |
|
| 77 | - $result = $DamerauLevenshtein->getInsCost(); |
|
| 78 | - $expected = $insCost; |
|
| 79 | - |
|
| 80 | - $this->assertSame($expected, $result); |
|
| 81 | - |
|
| 82 | - // Non-default insert cost |
|
| 83 | - |
|
| 84 | - $insCost = 2; |
|
| 85 | - |
|
| 86 | - $DamerauLevenshtein = new DamerauLevenshtein( |
|
| 87 | - $firstString, |
|
| 88 | - $secondString, |
|
| 89 | - $insCost, |
|
| 90 | - $delCost, |
|
| 91 | - $subCost, |
|
| 92 | - $transCost |
|
| 93 | - ); |
|
| 94 | - $result = $DamerauLevenshtein->getInsCost(); |
|
| 95 | - $expected = $insCost; |
|
| 96 | - |
|
| 97 | - $this->assertSame($expected, $result); |
|
| 98 | - } |
|
| 99 | - |
|
| 100 | - /** |
|
| 101 | - * Tests `getDelCost`. |
|
| 102 | - * |
|
| 103 | - * @return void |
|
| 104 | - */ |
|
| 105 | - public function testGetDelCost() |
|
| 106 | - { |
|
| 107 | - list($firstString, $secondString) = $this->getDefaultStrings(); |
|
| 108 | - list($insCost, $delCost, $subCost, $transCost) = $this->getDefaultCosts(); |
|
| 109 | - |
|
| 110 | - // Default delete cost |
|
| 111 | - |
|
| 112 | - $DamerauLevenshtein = new DamerauLevenshtein($firstString, $secondString); |
|
| 113 | - $result = $DamerauLevenshtein->getDelCost(); |
|
| 114 | - $expected = $delCost; |
|
| 115 | - |
|
| 116 | - $this->assertSame($expected, $result); |
|
| 117 | - |
|
| 118 | - // Non-default delete cost |
|
| 119 | - |
|
| 120 | - $delCost = 2; |
|
| 121 | - |
|
| 122 | - $DamerauLevenshtein = new DamerauLevenshtein( |
|
| 123 | - $firstString, |
|
| 124 | - $secondString, |
|
| 125 | - $insCost, |
|
| 126 | - $delCost, |
|
| 127 | - $subCost, |
|
| 128 | - $transCost |
|
| 129 | - ); |
|
| 130 | - $result = $DamerauLevenshtein->getDelCost(); |
|
| 131 | - $expected = $delCost; |
|
| 132 | - |
|
| 133 | - $this->assertSame($expected, $result); |
|
| 134 | - } |
|
| 135 | - |
|
| 136 | - /** |
|
| 137 | - * Tests `getSubCost`. |
|
| 138 | - * |
|
| 139 | - * @return void |
|
| 140 | - */ |
|
| 141 | - public function testGetSubCost() |
|
| 142 | - { |
|
| 143 | - list($firstString, $secondString) = $this->getDefaultStrings(); |
|
| 144 | - list($insCost, $delCost, $subCost, $transCost) = $this->getDefaultCosts(); |
|
| 145 | - |
|
| 146 | - // Default substitution cost |
|
| 147 | - |
|
| 148 | - $DamerauLevenshtein = new DamerauLevenshtein($firstString, $secondString); |
|
| 149 | - $result = $DamerauLevenshtein->getSubCost(); |
|
| 150 | - $expected = $subCost; |
|
| 151 | - |
|
| 152 | - $this->assertSame($expected, $result); |
|
| 153 | - |
|
| 154 | - // Non-default substitution cost |
|
| 155 | - |
|
| 156 | - $subCost = 2; |
|
| 157 | - |
|
| 158 | - $DamerauLevenshtein = new DamerauLevenshtein( |
|
| 159 | - $firstString, |
|
| 160 | - $secondString, |
|
| 161 | - $insCost, |
|
| 162 | - $delCost, |
|
| 163 | - $subCost, |
|
| 164 | - $transCost |
|
| 165 | - ); |
|
| 166 | - $result = $DamerauLevenshtein->getSubCost(); |
|
| 167 | - $expected = $subCost; |
|
| 168 | - |
|
| 169 | - $this->assertSame($expected, $result); |
|
| 170 | - } |
|
| 171 | - |
|
| 172 | - /** |
|
| 173 | - * Tests `getTransCost`. |
|
| 174 | - * |
|
| 175 | - * @return void |
|
| 176 | - */ |
|
| 177 | - public function testGetTransCost() |
|
| 178 | - { |
|
| 179 | - list($firstString, $secondString) = $this->getDefaultStrings(); |
|
| 180 | - list($insCost, $delCost, $subCost, $transCost) = $this->getDefaultCosts(); |
|
| 181 | - |
|
| 182 | - // Default transposition cost |
|
| 183 | - |
|
| 184 | - $DamerauLevenshtein = new DamerauLevenshtein($firstString, $secondString); |
|
| 185 | - $result = $DamerauLevenshtein->getTransCost(); |
|
| 186 | - $expected = $transCost; |
|
| 187 | - |
|
| 188 | - $this->assertSame($expected, $result); |
|
| 189 | - |
|
| 190 | - // Non-default transposition cost |
|
| 191 | - |
|
| 192 | - $transCost = 2; |
|
| 193 | - |
|
| 194 | - $DamerauLevenshtein = new DamerauLevenshtein( |
|
| 195 | - $firstString, |
|
| 196 | - $secondString, |
|
| 197 | - $insCost, |
|
| 198 | - $delCost, |
|
| 199 | - $subCost, |
|
| 200 | - $transCost |
|
| 201 | - ); |
|
| 202 | - $result = $DamerauLevenshtein->getTransCost(); |
|
| 203 | - $expected = $transCost; |
|
| 204 | - |
|
| 205 | - $this->assertSame($expected, $result); |
|
| 206 | - } |
|
| 207 | - |
|
| 208 | - /** |
|
| 209 | - * Tests `getRelativeDistance`. |
|
| 210 | - * |
|
| 211 | - * @return void |
|
| 212 | - */ |
|
| 213 | - public function testGetRelativeDistance() |
|
| 214 | - { |
|
| 215 | - $delta = pow(10, -4); |
|
| 216 | - |
|
| 217 | - $firstString = 'O\'Callaghan'; |
|
| 218 | - $secondString = 'OCallaghan'; |
|
| 219 | - |
|
| 220 | - $DamerauLevenshtein = new DamerauLevenshtein($firstString, $secondString); |
|
| 221 | - $result = $DamerauLevenshtein->getRelativeDistance(); |
|
| 222 | - $expected = 0.90909090909091; |
|
| 223 | - $this->assertEquals($expected, $result, '', $delta); |
|
| 224 | - |
|
| 225 | - $firstString = 'Thom'; |
|
| 226 | - $secondString = 'Mira'; |
|
| 227 | - |
|
| 228 | - $DamerauLevenshtein = new DamerauLevenshtein($firstString, $secondString); |
|
| 229 | - $result = $DamerauLevenshtein->getRelativeDistance(); |
|
| 230 | - $expected = 0.0; |
|
| 231 | - $this->assertEquals($expected, $result, '', $delta); |
|
| 232 | - |
|
| 233 | - $firstString = 'Oldeboom'; |
|
| 234 | - $secondString = 'Ven'; |
|
| 235 | - |
|
| 236 | - $DamerauLevenshtein = new DamerauLevenshtein($firstString, $secondString); |
|
| 237 | - $result = $DamerauLevenshtein->getRelativeDistance(); |
|
| 238 | - $expected = 0.125; |
|
| 239 | - $this->assertEquals($expected, $result, '', $delta); |
|
| 240 | - |
|
| 241 | - $firstString = 'ven'; |
|
| 242 | - $secondString = 'Ven'; |
|
| 243 | - |
|
| 244 | - $DamerauLevenshtein = new DamerauLevenshtein($firstString, $secondString); |
|
| 245 | - $result = $DamerauLevenshtein->getRelativeDistance(); |
|
| 246 | - $expected = 0.66666666666667; |
|
| 247 | - $this->assertEquals($expected, $result, '', $delta); |
|
| 248 | - |
|
| 249 | - $firstString = 'enV'; |
|
| 250 | - $secondString = 'Ven'; |
|
| 251 | - |
|
| 252 | - $DamerauLevenshtein = new DamerauLevenshtein($firstString, $secondString); |
|
| 253 | - $result = $DamerauLevenshtein->getRelativeDistance(); |
|
| 254 | - $expected = 0.33333333333333; |
|
| 255 | - $this->assertEquals($expected, $result, '', $delta); |
|
| 256 | - } |
|
| 257 | - |
|
| 258 | - /** |
|
| 259 | - * Tests `getMatrix`. |
|
| 260 | - * |
|
| 261 | - * @return void |
|
| 262 | - */ |
|
| 263 | - public function testGetMatrix() |
|
| 264 | - { |
|
| 265 | - list($firstString, $secondString) = $this->getDefaultStrings(); |
|
| 266 | - |
|
| 267 | - $DamerauLevenshtein = new DamerauLevenshtein($firstString, $secondString); |
|
| 268 | - $actual = $DamerauLevenshtein->getMatrix(); |
|
| 269 | - $expected = [ |
|
| 270 | - [0, 1, 2, 3], |
|
| 271 | - [1, 1, 2, 3], |
|
| 272 | - [2, 2, 2, 3], |
|
| 273 | - [3, 3, 3, 3] |
|
| 274 | - ]; |
|
| 275 | - $this->assertSame($expected, $actual); |
|
| 276 | - } |
|
| 277 | - |
|
| 278 | - /** |
|
| 279 | - * Tests `displayMatrix`. |
|
| 280 | - * |
|
| 281 | - * @return void |
|
| 282 | - */ |
|
| 283 | - public function testDisplayMatrix() |
|
| 284 | - { |
|
| 285 | - list($firstString, $secondString) = $this->getDefaultStrings(); |
|
| 286 | - |
|
| 287 | - $DamerauLevenshtein = new DamerauLevenshtein($firstString, $secondString); |
|
| 288 | - $actual = $DamerauLevenshtein->displayMatrix(); |
|
| 289 | - $expected = implode('', [ |
|
| 290 | - " foo\n", |
|
| 291 | - " 0123\n", |
|
| 292 | - "b1123\n", |
|
| 293 | - "a2223\n", |
|
| 294 | - "r3333\n", |
|
| 295 | - ]); |
|
| 296 | - $this->assertSame($expected, $actual); |
|
| 297 | - } |
|
| 298 | - |
|
| 299 | - /** |
|
| 300 | - * Returns the default costs. |
|
| 301 | - * |
|
| 302 | - * @return array Costs (insert, delete, substitution, transposition) |
|
| 303 | - */ |
|
| 304 | - protected function getDefaultCosts() |
|
| 305 | - { |
|
| 306 | - $insCost = 1; |
|
| 307 | - $delCost = 1; |
|
| 308 | - $subCost = 1; |
|
| 309 | - $transCost = 1; |
|
| 310 | - |
|
| 311 | - return [$insCost, $delCost, $subCost, $transCost]; |
|
| 312 | - } |
|
| 313 | - |
|
| 314 | - /** |
|
| 315 | - * Returns the default strings. |
|
| 316 | - * |
|
| 317 | - * @return array Strings (foo, bar) |
|
| 318 | - */ |
|
| 319 | - protected function getDefaultStrings() |
|
| 320 | - { |
|
| 321 | - $firstString = 'foo'; |
|
| 322 | - $secondString = 'bar'; |
|
| 323 | - |
|
| 324 | - return [$firstString, $secondString]; |
|
| 325 | - } |
|
| 9 | + /** |
|
| 10 | + * Tests `getSimilarity`. |
|
| 11 | + * |
|
| 12 | + * @return void |
|
| 13 | + */ |
|
| 14 | + public function testGetSimilarity() |
|
| 15 | + { |
|
| 16 | + $inputs = [ |
|
| 17 | + ['foo', 'foo'], |
|
| 18 | + ['foo', 'fooo'], |
|
| 19 | + ['foo', 'bar'], |
|
| 20 | + |
|
| 21 | + ['123', '12'], |
|
| 22 | + ['qwe', 'qwa'], |
|
| 23 | + ['awe', 'qwe'], |
|
| 24 | + ['фыв', 'фыа'], |
|
| 25 | + ['vvvqw', 'vvvwq'], |
|
| 26 | + ['qw', 'wq'], |
|
| 27 | + ['qq', 'ww'], |
|
| 28 | + ['qw', 'qw'], |
|
| 29 | + ['пионер', 'плеер'], |
|
| 30 | + ['пионер', 'пионеер'], |
|
| 31 | + ['пионер', 'поинер'], |
|
| 32 | + ['pioner', 'poner'], |
|
| 33 | + ['пионер', 'понер'], |
|
| 34 | + ]; |
|
| 35 | + $outputs = [ |
|
| 36 | + 0, |
|
| 37 | + 1, |
|
| 38 | + 3, |
|
| 39 | + |
|
| 40 | + 1, |
|
| 41 | + 1, |
|
| 42 | + 1, |
|
| 43 | + 1, |
|
| 44 | + 1, |
|
| 45 | + 1, |
|
| 46 | + 2, |
|
| 47 | + 0, |
|
| 48 | + 3, |
|
| 49 | + 1, |
|
| 50 | + 1, |
|
| 51 | + 1, |
|
| 52 | + 1, |
|
| 53 | + ]; |
|
| 54 | + |
|
| 55 | + foreach ($inputs as $i => $input) { |
|
| 56 | + $DamerauLevenshtein = new DamerauLevenshtein($input[0], $input[1]); |
|
| 57 | + $result = $DamerauLevenshtein->getSimilarity(); |
|
| 58 | + $expected = $outputs[$i]; |
|
| 59 | + |
|
| 60 | + $this->assertSame($expected, $result); |
|
| 61 | + } |
|
| 62 | + } |
|
| 63 | + |
|
| 64 | + /** |
|
| 65 | + * Tests `getInsCost`. |
|
| 66 | + * |
|
| 67 | + * @return void |
|
| 68 | + */ |
|
| 69 | + public function testGetInsCost() |
|
| 70 | + { |
|
| 71 | + list($firstString, $secondString) = $this->getDefaultStrings(); |
|
| 72 | + list($insCost, $delCost, $subCost, $transCost) = $this->getDefaultCosts(); |
|
| 73 | + |
|
| 74 | + // Default insert cost |
|
| 75 | + |
|
| 76 | + $DamerauLevenshtein = new DamerauLevenshtein($firstString, $secondString); |
|
| 77 | + $result = $DamerauLevenshtein->getInsCost(); |
|
| 78 | + $expected = $insCost; |
|
| 79 | + |
|
| 80 | + $this->assertSame($expected, $result); |
|
| 81 | + |
|
| 82 | + // Non-default insert cost |
|
| 83 | + |
|
| 84 | + $insCost = 2; |
|
| 85 | + |
|
| 86 | + $DamerauLevenshtein = new DamerauLevenshtein( |
|
| 87 | + $firstString, |
|
| 88 | + $secondString, |
|
| 89 | + $insCost, |
|
| 90 | + $delCost, |
|
| 91 | + $subCost, |
|
| 92 | + $transCost |
|
| 93 | + ); |
|
| 94 | + $result = $DamerauLevenshtein->getInsCost(); |
|
| 95 | + $expected = $insCost; |
|
| 96 | + |
|
| 97 | + $this->assertSame($expected, $result); |
|
| 98 | + } |
|
| 99 | + |
|
| 100 | + /** |
|
| 101 | + * Tests `getDelCost`. |
|
| 102 | + * |
|
| 103 | + * @return void |
|
| 104 | + */ |
|
| 105 | + public function testGetDelCost() |
|
| 106 | + { |
|
| 107 | + list($firstString, $secondString) = $this->getDefaultStrings(); |
|
| 108 | + list($insCost, $delCost, $subCost, $transCost) = $this->getDefaultCosts(); |
|
| 109 | + |
|
| 110 | + // Default delete cost |
|
| 111 | + |
|
| 112 | + $DamerauLevenshtein = new DamerauLevenshtein($firstString, $secondString); |
|
| 113 | + $result = $DamerauLevenshtein->getDelCost(); |
|
| 114 | + $expected = $delCost; |
|
| 115 | + |
|
| 116 | + $this->assertSame($expected, $result); |
|
| 117 | + |
|
| 118 | + // Non-default delete cost |
|
| 119 | + |
|
| 120 | + $delCost = 2; |
|
| 121 | + |
|
| 122 | + $DamerauLevenshtein = new DamerauLevenshtein( |
|
| 123 | + $firstString, |
|
| 124 | + $secondString, |
|
| 125 | + $insCost, |
|
| 126 | + $delCost, |
|
| 127 | + $subCost, |
|
| 128 | + $transCost |
|
| 129 | + ); |
|
| 130 | + $result = $DamerauLevenshtein->getDelCost(); |
|
| 131 | + $expected = $delCost; |
|
| 132 | + |
|
| 133 | + $this->assertSame($expected, $result); |
|
| 134 | + } |
|
| 135 | + |
|
| 136 | + /** |
|
| 137 | + * Tests `getSubCost`. |
|
| 138 | + * |
|
| 139 | + * @return void |
|
| 140 | + */ |
|
| 141 | + public function testGetSubCost() |
|
| 142 | + { |
|
| 143 | + list($firstString, $secondString) = $this->getDefaultStrings(); |
|
| 144 | + list($insCost, $delCost, $subCost, $transCost) = $this->getDefaultCosts(); |
|
| 145 | + |
|
| 146 | + // Default substitution cost |
|
| 147 | + |
|
| 148 | + $DamerauLevenshtein = new DamerauLevenshtein($firstString, $secondString); |
|
| 149 | + $result = $DamerauLevenshtein->getSubCost(); |
|
| 150 | + $expected = $subCost; |
|
| 151 | + |
|
| 152 | + $this->assertSame($expected, $result); |
|
| 153 | + |
|
| 154 | + // Non-default substitution cost |
|
| 155 | + |
|
| 156 | + $subCost = 2; |
|
| 157 | + |
|
| 158 | + $DamerauLevenshtein = new DamerauLevenshtein( |
|
| 159 | + $firstString, |
|
| 160 | + $secondString, |
|
| 161 | + $insCost, |
|
| 162 | + $delCost, |
|
| 163 | + $subCost, |
|
| 164 | + $transCost |
|
| 165 | + ); |
|
| 166 | + $result = $DamerauLevenshtein->getSubCost(); |
|
| 167 | + $expected = $subCost; |
|
| 168 | + |
|
| 169 | + $this->assertSame($expected, $result); |
|
| 170 | + } |
|
| 171 | + |
|
| 172 | + /** |
|
| 173 | + * Tests `getTransCost`. |
|
| 174 | + * |
|
| 175 | + * @return void |
|
| 176 | + */ |
|
| 177 | + public function testGetTransCost() |
|
| 178 | + { |
|
| 179 | + list($firstString, $secondString) = $this->getDefaultStrings(); |
|
| 180 | + list($insCost, $delCost, $subCost, $transCost) = $this->getDefaultCosts(); |
|
| 181 | + |
|
| 182 | + // Default transposition cost |
|
| 183 | + |
|
| 184 | + $DamerauLevenshtein = new DamerauLevenshtein($firstString, $secondString); |
|
| 185 | + $result = $DamerauLevenshtein->getTransCost(); |
|
| 186 | + $expected = $transCost; |
|
| 187 | + |
|
| 188 | + $this->assertSame($expected, $result); |
|
| 189 | + |
|
| 190 | + // Non-default transposition cost |
|
| 191 | + |
|
| 192 | + $transCost = 2; |
|
| 193 | + |
|
| 194 | + $DamerauLevenshtein = new DamerauLevenshtein( |
|
| 195 | + $firstString, |
|
| 196 | + $secondString, |
|
| 197 | + $insCost, |
|
| 198 | + $delCost, |
|
| 199 | + $subCost, |
|
| 200 | + $transCost |
|
| 201 | + ); |
|
| 202 | + $result = $DamerauLevenshtein->getTransCost(); |
|
| 203 | + $expected = $transCost; |
|
| 204 | + |
|
| 205 | + $this->assertSame($expected, $result); |
|
| 206 | + } |
|
| 207 | + |
|
| 208 | + /** |
|
| 209 | + * Tests `getRelativeDistance`. |
|
| 210 | + * |
|
| 211 | + * @return void |
|
| 212 | + */ |
|
| 213 | + public function testGetRelativeDistance() |
|
| 214 | + { |
|
| 215 | + $delta = pow(10, -4); |
|
| 216 | + |
|
| 217 | + $firstString = 'O\'Callaghan'; |
|
| 218 | + $secondString = 'OCallaghan'; |
|
| 219 | + |
|
| 220 | + $DamerauLevenshtein = new DamerauLevenshtein($firstString, $secondString); |
|
| 221 | + $result = $DamerauLevenshtein->getRelativeDistance(); |
|
| 222 | + $expected = 0.90909090909091; |
|
| 223 | + $this->assertEquals($expected, $result, '', $delta); |
|
| 224 | + |
|
| 225 | + $firstString = 'Thom'; |
|
| 226 | + $secondString = 'Mira'; |
|
| 227 | + |
|
| 228 | + $DamerauLevenshtein = new DamerauLevenshtein($firstString, $secondString); |
|
| 229 | + $result = $DamerauLevenshtein->getRelativeDistance(); |
|
| 230 | + $expected = 0.0; |
|
| 231 | + $this->assertEquals($expected, $result, '', $delta); |
|
| 232 | + |
|
| 233 | + $firstString = 'Oldeboom'; |
|
| 234 | + $secondString = 'Ven'; |
|
| 235 | + |
|
| 236 | + $DamerauLevenshtein = new DamerauLevenshtein($firstString, $secondString); |
|
| 237 | + $result = $DamerauLevenshtein->getRelativeDistance(); |
|
| 238 | + $expected = 0.125; |
|
| 239 | + $this->assertEquals($expected, $result, '', $delta); |
|
| 240 | + |
|
| 241 | + $firstString = 'ven'; |
|
| 242 | + $secondString = 'Ven'; |
|
| 243 | + |
|
| 244 | + $DamerauLevenshtein = new DamerauLevenshtein($firstString, $secondString); |
|
| 245 | + $result = $DamerauLevenshtein->getRelativeDistance(); |
|
| 246 | + $expected = 0.66666666666667; |
|
| 247 | + $this->assertEquals($expected, $result, '', $delta); |
|
| 248 | + |
|
| 249 | + $firstString = 'enV'; |
|
| 250 | + $secondString = 'Ven'; |
|
| 251 | + |
|
| 252 | + $DamerauLevenshtein = new DamerauLevenshtein($firstString, $secondString); |
|
| 253 | + $result = $DamerauLevenshtein->getRelativeDistance(); |
|
| 254 | + $expected = 0.33333333333333; |
|
| 255 | + $this->assertEquals($expected, $result, '', $delta); |
|
| 256 | + } |
|
| 257 | + |
|
| 258 | + /** |
|
| 259 | + * Tests `getMatrix`. |
|
| 260 | + * |
|
| 261 | + * @return void |
|
| 262 | + */ |
|
| 263 | + public function testGetMatrix() |
|
| 264 | + { |
|
| 265 | + list($firstString, $secondString) = $this->getDefaultStrings(); |
|
| 266 | + |
|
| 267 | + $DamerauLevenshtein = new DamerauLevenshtein($firstString, $secondString); |
|
| 268 | + $actual = $DamerauLevenshtein->getMatrix(); |
|
| 269 | + $expected = [ |
|
| 270 | + [0, 1, 2, 3], |
|
| 271 | + [1, 1, 2, 3], |
|
| 272 | + [2, 2, 2, 3], |
|
| 273 | + [3, 3, 3, 3] |
|
| 274 | + ]; |
|
| 275 | + $this->assertSame($expected, $actual); |
|
| 276 | + } |
|
| 277 | + |
|
| 278 | + /** |
|
| 279 | + * Tests `displayMatrix`. |
|
| 280 | + * |
|
| 281 | + * @return void |
|
| 282 | + */ |
|
| 283 | + public function testDisplayMatrix() |
|
| 284 | + { |
|
| 285 | + list($firstString, $secondString) = $this->getDefaultStrings(); |
|
| 286 | + |
|
| 287 | + $DamerauLevenshtein = new DamerauLevenshtein($firstString, $secondString); |
|
| 288 | + $actual = $DamerauLevenshtein->displayMatrix(); |
|
| 289 | + $expected = implode('', [ |
|
| 290 | + " foo\n", |
|
| 291 | + " 0123\n", |
|
| 292 | + "b1123\n", |
|
| 293 | + "a2223\n", |
|
| 294 | + "r3333\n", |
|
| 295 | + ]); |
|
| 296 | + $this->assertSame($expected, $actual); |
|
| 297 | + } |
|
| 298 | + |
|
| 299 | + /** |
|
| 300 | + * Returns the default costs. |
|
| 301 | + * |
|
| 302 | + * @return array Costs (insert, delete, substitution, transposition) |
|
| 303 | + */ |
|
| 304 | + protected function getDefaultCosts() |
|
| 305 | + { |
|
| 306 | + $insCost = 1; |
|
| 307 | + $delCost = 1; |
|
| 308 | + $subCost = 1; |
|
| 309 | + $transCost = 1; |
|
| 310 | + |
|
| 311 | + return [$insCost, $delCost, $subCost, $transCost]; |
|
| 312 | + } |
|
| 313 | + |
|
| 314 | + /** |
|
| 315 | + * Returns the default strings. |
|
| 316 | + * |
|
| 317 | + * @return array Strings (foo, bar) |
|
| 318 | + */ |
|
| 319 | + protected function getDefaultStrings() |
|
| 320 | + { |
|
| 321 | + $firstString = 'foo'; |
|
| 322 | + $secondString = 'bar'; |
|
| 323 | + |
|
| 324 | + return [$firstString, $secondString]; |
|
| 325 | + } |
|
| 326 | 326 | } |
@@ -10,363 +10,363 @@ |
||
| 10 | 10 | class DamerauLevenshtein |
| 11 | 11 | { |
| 12 | 12 | |
| 13 | - /** |
|
| 14 | - * First string. |
|
| 15 | - * |
|
| 16 | - * @var String |
|
| 17 | - */ |
|
| 18 | - private $compOne; |
|
| 19 | - |
|
| 20 | - /** |
|
| 21 | - * Second string. |
|
| 22 | - * |
|
| 23 | - * @var String |
|
| 24 | - */ |
|
| 25 | - private $compTwo; |
|
| 26 | - |
|
| 27 | - /** |
|
| 28 | - * Matrix for Damerau Levenshtein distance dynamic programming computation. |
|
| 29 | - * |
|
| 30 | - * @var int[][] |
|
| 31 | - */ |
|
| 32 | - private $matrix; |
|
| 33 | - |
|
| 34 | - /** |
|
| 35 | - * Boolean flag determining whether is matrix computed for input strings. |
|
| 36 | - * |
|
| 37 | - * @var bool |
|
| 38 | - */ |
|
| 39 | - private $calculated = false; |
|
| 40 | - |
|
| 41 | - /** |
|
| 42 | - * Cost of character insertion (to first string to match second string). |
|
| 43 | - * |
|
| 44 | - * @var int |
|
| 45 | - */ |
|
| 46 | - private $insCost = 1; |
|
| 47 | - |
|
| 48 | - /** |
|
| 49 | - * Cost of character deletion (from first string to match second string). |
|
| 50 | - * |
|
| 51 | - * @var int |
|
| 52 | - */ |
|
| 53 | - private $delCost = 1; |
|
| 54 | - |
|
| 55 | - /** |
|
| 56 | - * Substitution cost. |
|
| 57 | - * |
|
| 58 | - * @var int |
|
| 59 | - */ |
|
| 60 | - private $subCost = 1; |
|
| 61 | - |
|
| 62 | - /** |
|
| 63 | - * Transposition cost. |
|
| 64 | - * |
|
| 65 | - * @var int |
|
| 66 | - */ |
|
| 67 | - private $transCost = 1; |
|
| 68 | - |
|
| 69 | - /** |
|
| 70 | - * Constructor. |
|
| 71 | - * |
|
| 72 | - * @param string $firstString first string to compute distance |
|
| 73 | - * @param string $secondString second string to compute distance |
|
| 74 | - * @param int $insCost Cost of character insertion |
|
| 75 | - * @param int $delCost Cost of character deletion |
|
| 76 | - * @param int $subCost Substitution cost |
|
| 77 | - * @param int $transCost Transposition cost |
|
| 78 | - */ |
|
| 79 | - public function __construct($firstString, $secondString, $insCost = 1, $delCost = 1, $subCost = 1, $transCost = 1) |
|
| 80 | - { |
|
| 81 | - if (!empty($firstString) || !empty($secondString)) { |
|
| 82 | - $this->compOne = $firstString; |
|
| 83 | - $this->compTwo = $secondString; |
|
| 84 | - } |
|
| 85 | - |
|
| 86 | - $this->insCost = $insCost; |
|
| 87 | - $this->delCost = $delCost; |
|
| 88 | - $this->subCost = $subCost; |
|
| 89 | - $this->transCost = $transCost; |
|
| 90 | - } |
|
| 91 | - |
|
| 92 | - /** |
|
| 93 | - * Returns computed matrix for given input strings. |
|
| 94 | - * |
|
| 95 | - * @return int[][] matrix |
|
| 96 | - */ |
|
| 97 | - public function getMatrix() |
|
| 98 | - { |
|
| 99 | - $this->setupMatrix(); |
|
| 100 | - return $this->matrix; |
|
| 101 | - } |
|
| 102 | - |
|
| 103 | - /** |
|
| 104 | - * Returns similarity of strings, absolute number = Damerau Levenshtein distance. |
|
| 105 | - * |
|
| 106 | - * @return int |
|
| 107 | - */ |
|
| 108 | - public function getSimilarity() |
|
| 109 | - { |
|
| 110 | - if (!$this->calculated) { |
|
| 111 | - $this->setupMatrix(); |
|
| 112 | - } |
|
| 113 | - |
|
| 114 | - return $this->matrix[mb_strlen($this->compOne, 'UTF-8')][mb_strlen($this->compTwo, 'UTF-8')]; |
|
| 115 | - } |
|
| 116 | - |
|
| 117 | - /** |
|
| 118 | - * Procedure to compute matrix for given input strings. |
|
| 119 | - * |
|
| 120 | - * @return void |
|
| 121 | - */ |
|
| 122 | - private function setupMatrix() |
|
| 123 | - { |
|
| 124 | - $cost = -1; |
|
| 125 | - $del = 0; |
|
| 126 | - $sub = 0; |
|
| 127 | - $ins = 0; |
|
| 128 | - $trans = 0; |
|
| 129 | - $this->matrix = [[]]; |
|
| 130 | - |
|
| 131 | - $oneSize = mb_strlen($this->compOne, 'UTF-8'); |
|
| 132 | - $twoSize = mb_strlen($this->compTwo, 'UTF-8'); |
|
| 133 | - for ($i = 0; $i <= $oneSize; $i += 1) { |
|
| 134 | - $this->matrix[$i][0] = $i > 0 ? $this->matrix[$i - 1][0] + $this->delCost : 0; |
|
| 135 | - } |
|
| 136 | - |
|
| 137 | - for ($i = 0; $i <= $twoSize; $i += 1) { |
|
| 138 | - // Insertion actualy |
|
| 139 | - $this->matrix[0][$i] = $i > 0 ? $this->matrix[0][$i - 1] + $this->insCost : 0; |
|
| 140 | - } |
|
| 141 | - |
|
| 142 | - for ($i = 1; $i <= $oneSize; $i += 1) { |
|
| 143 | - // Curchar for the first string |
|
| 144 | - $cOne = mb_substr($this->compOne, $i - 1, 1, 'UTF-8'); |
|
| 145 | - for ($j = 1; $j <= $twoSize; $j += 1) { |
|
| 146 | - // Curchar for the second string |
|
| 147 | - $cTwo = mb_substr($this->compTwo, $j - 1, 1, 'UTF-8'); |
|
| 148 | - |
|
| 149 | - // Compute substitution cost |
|
| 150 | - if ($this->compare($cOne, $cTwo) == 0) { |
|
| 151 | - $cost = 0; |
|
| 152 | - $trans = 0; |
|
| 153 | - } else { |
|
| 154 | - $cost = $this->subCost; |
|
| 155 | - $trans = $this->transCost; |
|
| 156 | - } |
|
| 157 | - |
|
| 158 | - // Deletion cost |
|
| 159 | - $del = $this->matrix[$i - 1][$j] + $this->delCost; |
|
| 160 | - |
|
| 161 | - // Insertion cost |
|
| 162 | - $ins = $this->matrix[$i][$j - 1] + $this->insCost; |
|
| 163 | - |
|
| 164 | - // Substitution cost, 0 if same |
|
| 165 | - $sub = $this->matrix[$i - 1][$j - 1] + $cost; |
|
| 166 | - |
|
| 167 | - // Compute optimal |
|
| 168 | - $this->matrix[$i][$j] = min($del, $ins, $sub); |
|
| 169 | - |
|
| 170 | - // Transposition cost |
|
| 171 | - if (($i > 1) && ($j > 1)) { |
|
| 172 | - // Last two |
|
| 173 | - $ccOne = mb_substr($this->compOne, $i - 2, 1, 'UTF-8'); |
|
| 174 | - $ccTwo = mb_substr($this->compTwo, $j - 2, 1, 'UTF-8'); |
|
| 175 | - |
|
| 176 | - if ($this->compare($cOne, $ccTwo) == 0 && $this->compare($ccOne, $cTwo) == 0) { |
|
| 177 | - // Transposition cost is computed as minimal of two |
|
| 178 | - $this->matrix[$i][$j] = min( |
|
| 179 | - $this->matrix[$i][$j], |
|
| 180 | - $this->matrix[$i - 2][$j - 2] + $trans |
|
| 181 | - ); |
|
| 182 | - } |
|
| 183 | - } |
|
| 184 | - } |
|
| 185 | - } |
|
| 186 | - |
|
| 187 | - $this->calculated = true; |
|
| 188 | - } |
|
| 189 | - |
|
| 190 | - /** |
|
| 191 | - * Returns maximal possible edit Damerau Levenshtein distance between texts. |
|
| 192 | - * |
|
| 193 | - * On common substring of same length perform substitution / insert + delete |
|
| 194 | - * (depends on what is cheaper), then on extra characters perform insertion / deletion |
|
| 195 | - * |
|
| 196 | - * @return int |
|
| 197 | - */ |
|
| 198 | - public function getMaximalDistance() |
|
| 199 | - { |
|
| 200 | - $oneSize = mb_strlen($this->compOne, 'UTF-8'); |
|
| 201 | - $twoSize = mb_strlen($this->compTwo, 'UTF-8'); |
|
| 202 | - |
|
| 203 | - // Max cost, result value |
|
| 204 | - $maxCost = 0; |
|
| 205 | - |
|
| 206 | - // Is substitution cheaper that delete + insert? |
|
| 207 | - $subCost = min($this->subCost, $this->delCost + $this->insCost); |
|
| 208 | - |
|
| 209 | - // Get common size |
|
| 210 | - $minSize = min($oneSize, $twoSize); |
|
| 211 | - $maxSize = max($oneSize, $twoSize); |
|
| 212 | - $extraSize = $maxSize - $minSize; |
|
| 213 | - |
|
| 214 | - // On common size perform substitution / delete + insert, what is cheaper |
|
| 215 | - $maxCost = $subCost * $minSize; |
|
| 216 | - |
|
| 217 | - // On resulting do insert/delete |
|
| 218 | - if ($oneSize > $twoSize) { |
|
| 219 | - // Delete extra characters |
|
| 220 | - $maxCost += $extraSize * $this->delCost; |
|
| 221 | - } else { |
|
| 222 | - // Insert extra characters |
|
| 223 | - $maxCost += $extraSize * $this->insCost; |
|
| 224 | - } |
|
| 225 | - |
|
| 226 | - return $maxCost; |
|
| 227 | - } |
|
| 228 | - |
|
| 229 | - /** |
|
| 230 | - * Returns relative distance of input strings (computed with maximal possible distance). |
|
| 231 | - * |
|
| 232 | - * @return int |
|
| 233 | - */ |
|
| 234 | - public function getRelativeDistance() |
|
| 235 | - { |
|
| 236 | - if (!$this->calculated) { |
|
| 237 | - $this->setupMatrix(); |
|
| 238 | - } |
|
| 239 | - |
|
| 240 | - return 1 - (($this->getSimilarity()) / $this->getMaximalDistance()); |
|
| 241 | - } |
|
| 242 | - |
|
| 243 | - /** |
|
| 244 | - * Compares two characters from string (this method may be overriden in child class). |
|
| 245 | - * |
|
| 246 | - * @param string $firstCharacter First character |
|
| 247 | - * @param string $secondCharacter Second character |
|
| 248 | - * @return int |
|
| 249 | - */ |
|
| 250 | - protected function compare($firstCharacter, $secondCharacter) |
|
| 251 | - { |
|
| 252 | - return strcmp($firstCharacter, $secondCharacter); |
|
| 253 | - } |
|
| 254 | - |
|
| 255 | - /** |
|
| 256 | - * Returns computed matrix for given input strings (For debugging purposes). |
|
| 257 | - * |
|
| 258 | - * @return string |
|
| 259 | - */ |
|
| 260 | - public function displayMatrix() |
|
| 261 | - { |
|
| 262 | - $this->setupMatrix(); |
|
| 263 | - |
|
| 264 | - $oneSize = mb_strlen($this->compOne, 'UTF-8'); |
|
| 265 | - $twoSize = mb_strlen($this->compTwo, 'UTF-8'); |
|
| 266 | - |
|
| 267 | - $out = ' ' . $this->compOne . "\n"; |
|
| 268 | - for ($y = 0; $y <= $twoSize; $y += 1) { |
|
| 269 | - if ($y - 1 < 0) { |
|
| 270 | - $out .= ' '; |
|
| 271 | - } else { |
|
| 272 | - $out .= (mb_substr($this->compTwo, $y - 1, 1, 'UTF-8')); |
|
| 273 | - } |
|
| 274 | - |
|
| 275 | - for ($x = 0; $x <= $oneSize; $x += 1) { |
|
| 276 | - $out .= $this->matrix[$x][$y]; |
|
| 277 | - } |
|
| 278 | - |
|
| 279 | - $out .= "\n"; |
|
| 280 | - } |
|
| 281 | - |
|
| 282 | - return $out; |
|
| 283 | - } |
|
| 284 | - |
|
| 285 | - /** |
|
| 286 | - * Returns current cost of insertion operation. |
|
| 287 | - * |
|
| 288 | - * @return int |
|
| 289 | - */ |
|
| 290 | - public function getInsCost() |
|
| 291 | - { |
|
| 292 | - return $this->insCost; |
|
| 293 | - } |
|
| 294 | - |
|
| 295 | - /** |
|
| 296 | - * Sets cost of insertion operation (insert characters to first string to match second string). |
|
| 297 | - * |
|
| 298 | - * @param int $insCost Cost of character insertion |
|
| 299 | - * @return void |
|
| 300 | - */ |
|
| 301 | - public function setInsCost($insCost) |
|
| 302 | - { |
|
| 303 | - $this->calculated = ($insCost == $this->insCost) ? $this->calculated : false; |
|
| 304 | - $this->insCost = $insCost; |
|
| 305 | - } |
|
| 306 | - |
|
| 307 | - /** |
|
| 308 | - * Returns current cost of deletion operation. |
|
| 309 | - * |
|
| 310 | - * @return int |
|
| 311 | - */ |
|
| 312 | - public function getDelCost() |
|
| 313 | - { |
|
| 314 | - return $this->delCost; |
|
| 315 | - } |
|
| 316 | - |
|
| 317 | - /** |
|
| 318 | - * Sets cost of deletion operation (delete characters from first string to match second string). |
|
| 319 | - * |
|
| 320 | - * @param int $delCost Cost of character deletion |
|
| 321 | - * @return void |
|
| 322 | - */ |
|
| 323 | - public function setDelCost($delCost) |
|
| 324 | - { |
|
| 325 | - $this->calculated = ($delCost == $this->delCost) ? $this->calculated : false; |
|
| 326 | - $this->delCost = $delCost; |
|
| 327 | - } |
|
| 328 | - |
|
| 329 | - /** |
|
| 330 | - * Returns current cost of substitution operation. |
|
| 331 | - * |
|
| 332 | - * @return int |
|
| 333 | - */ |
|
| 334 | - public function getSubCost() |
|
| 335 | - { |
|
| 336 | - return $this->subCost; |
|
| 337 | - } |
|
| 338 | - |
|
| 339 | - /** |
|
| 340 | - * Sets cost of substitution operation. |
|
| 341 | - * |
|
| 342 | - * @param int $subCost Cost of character substitution |
|
| 343 | - * @return void |
|
| 344 | - */ |
|
| 345 | - public function setSubCost($subCost) |
|
| 346 | - { |
|
| 347 | - $this->calculated = ($subCost == $this->subCost) ? $this->calculated : false; |
|
| 348 | - $this->subCost = $subCost; |
|
| 349 | - } |
|
| 350 | - |
|
| 351 | - /** |
|
| 352 | - * Returns current cost of transposition operation. |
|
| 353 | - * |
|
| 354 | - * @return int |
|
| 355 | - */ |
|
| 356 | - public function getTransCost() |
|
| 357 | - { |
|
| 358 | - return $this->transCost; |
|
| 359 | - } |
|
| 360 | - |
|
| 361 | - /** |
|
| 362 | - * Sets cost of transposition operation. |
|
| 363 | - * |
|
| 364 | - * @param int $transCost Cost of character transposition |
|
| 365 | - * @return void |
|
| 366 | - */ |
|
| 367 | - public function setTransCost($transCost) |
|
| 368 | - { |
|
| 369 | - $this->calculated = ($transCost == $this->transCost) ? $this->calculated : false; |
|
| 370 | - $this->transCost = $transCost; |
|
| 371 | - } |
|
| 13 | + /** |
|
| 14 | + * First string. |
|
| 15 | + * |
|
| 16 | + * @var String |
|
| 17 | + */ |
|
| 18 | + private $compOne; |
|
| 19 | + |
|
| 20 | + /** |
|
| 21 | + * Second string. |
|
| 22 | + * |
|
| 23 | + * @var String |
|
| 24 | + */ |
|
| 25 | + private $compTwo; |
|
| 26 | + |
|
| 27 | + /** |
|
| 28 | + * Matrix for Damerau Levenshtein distance dynamic programming computation. |
|
| 29 | + * |
|
| 30 | + * @var int[][] |
|
| 31 | + */ |
|
| 32 | + private $matrix; |
|
| 33 | + |
|
| 34 | + /** |
|
| 35 | + * Boolean flag determining whether is matrix computed for input strings. |
|
| 36 | + * |
|
| 37 | + * @var bool |
|
| 38 | + */ |
|
| 39 | + private $calculated = false; |
|
| 40 | + |
|
| 41 | + /** |
|
| 42 | + * Cost of character insertion (to first string to match second string). |
|
| 43 | + * |
|
| 44 | + * @var int |
|
| 45 | + */ |
|
| 46 | + private $insCost = 1; |
|
| 47 | + |
|
| 48 | + /** |
|
| 49 | + * Cost of character deletion (from first string to match second string). |
|
| 50 | + * |
|
| 51 | + * @var int |
|
| 52 | + */ |
|
| 53 | + private $delCost = 1; |
|
| 54 | + |
|
| 55 | + /** |
|
| 56 | + * Substitution cost. |
|
| 57 | + * |
|
| 58 | + * @var int |
|
| 59 | + */ |
|
| 60 | + private $subCost = 1; |
|
| 61 | + |
|
| 62 | + /** |
|
| 63 | + * Transposition cost. |
|
| 64 | + * |
|
| 65 | + * @var int |
|
| 66 | + */ |
|
| 67 | + private $transCost = 1; |
|
| 68 | + |
|
| 69 | + /** |
|
| 70 | + * Constructor. |
|
| 71 | + * |
|
| 72 | + * @param string $firstString first string to compute distance |
|
| 73 | + * @param string $secondString second string to compute distance |
|
| 74 | + * @param int $insCost Cost of character insertion |
|
| 75 | + * @param int $delCost Cost of character deletion |
|
| 76 | + * @param int $subCost Substitution cost |
|
| 77 | + * @param int $transCost Transposition cost |
|
| 78 | + */ |
|
| 79 | + public function __construct($firstString, $secondString, $insCost = 1, $delCost = 1, $subCost = 1, $transCost = 1) |
|
| 80 | + { |
|
| 81 | + if (!empty($firstString) || !empty($secondString)) { |
|
| 82 | + $this->compOne = $firstString; |
|
| 83 | + $this->compTwo = $secondString; |
|
| 84 | + } |
|
| 85 | + |
|
| 86 | + $this->insCost = $insCost; |
|
| 87 | + $this->delCost = $delCost; |
|
| 88 | + $this->subCost = $subCost; |
|
| 89 | + $this->transCost = $transCost; |
|
| 90 | + } |
|
| 91 | + |
|
| 92 | + /** |
|
| 93 | + * Returns computed matrix for given input strings. |
|
| 94 | + * |
|
| 95 | + * @return int[][] matrix |
|
| 96 | + */ |
|
| 97 | + public function getMatrix() |
|
| 98 | + { |
|
| 99 | + $this->setupMatrix(); |
|
| 100 | + return $this->matrix; |
|
| 101 | + } |
|
| 102 | + |
|
| 103 | + /** |
|
| 104 | + * Returns similarity of strings, absolute number = Damerau Levenshtein distance. |
|
| 105 | + * |
|
| 106 | + * @return int |
|
| 107 | + */ |
|
| 108 | + public function getSimilarity() |
|
| 109 | + { |
|
| 110 | + if (!$this->calculated) { |
|
| 111 | + $this->setupMatrix(); |
|
| 112 | + } |
|
| 113 | + |
|
| 114 | + return $this->matrix[mb_strlen($this->compOne, 'UTF-8')][mb_strlen($this->compTwo, 'UTF-8')]; |
|
| 115 | + } |
|
| 116 | + |
|
| 117 | + /** |
|
| 118 | + * Procedure to compute matrix for given input strings. |
|
| 119 | + * |
|
| 120 | + * @return void |
|
| 121 | + */ |
|
| 122 | + private function setupMatrix() |
|
| 123 | + { |
|
| 124 | + $cost = -1; |
|
| 125 | + $del = 0; |
|
| 126 | + $sub = 0; |
|
| 127 | + $ins = 0; |
|
| 128 | + $trans = 0; |
|
| 129 | + $this->matrix = [[]]; |
|
| 130 | + |
|
| 131 | + $oneSize = mb_strlen($this->compOne, 'UTF-8'); |
|
| 132 | + $twoSize = mb_strlen($this->compTwo, 'UTF-8'); |
|
| 133 | + for ($i = 0; $i <= $oneSize; $i += 1) { |
|
| 134 | + $this->matrix[$i][0] = $i > 0 ? $this->matrix[$i - 1][0] + $this->delCost : 0; |
|
| 135 | + } |
|
| 136 | + |
|
| 137 | + for ($i = 0; $i <= $twoSize; $i += 1) { |
|
| 138 | + // Insertion actualy |
|
| 139 | + $this->matrix[0][$i] = $i > 0 ? $this->matrix[0][$i - 1] + $this->insCost : 0; |
|
| 140 | + } |
|
| 141 | + |
|
| 142 | + for ($i = 1; $i <= $oneSize; $i += 1) { |
|
| 143 | + // Curchar for the first string |
|
| 144 | + $cOne = mb_substr($this->compOne, $i - 1, 1, 'UTF-8'); |
|
| 145 | + for ($j = 1; $j <= $twoSize; $j += 1) { |
|
| 146 | + // Curchar for the second string |
|
| 147 | + $cTwo = mb_substr($this->compTwo, $j - 1, 1, 'UTF-8'); |
|
| 148 | + |
|
| 149 | + // Compute substitution cost |
|
| 150 | + if ($this->compare($cOne, $cTwo) == 0) { |
|
| 151 | + $cost = 0; |
|
| 152 | + $trans = 0; |
|
| 153 | + } else { |
|
| 154 | + $cost = $this->subCost; |
|
| 155 | + $trans = $this->transCost; |
|
| 156 | + } |
|
| 157 | + |
|
| 158 | + // Deletion cost |
|
| 159 | + $del = $this->matrix[$i - 1][$j] + $this->delCost; |
|
| 160 | + |
|
| 161 | + // Insertion cost |
|
| 162 | + $ins = $this->matrix[$i][$j - 1] + $this->insCost; |
|
| 163 | + |
|
| 164 | + // Substitution cost, 0 if same |
|
| 165 | + $sub = $this->matrix[$i - 1][$j - 1] + $cost; |
|
| 166 | + |
|
| 167 | + // Compute optimal |
|
| 168 | + $this->matrix[$i][$j] = min($del, $ins, $sub); |
|
| 169 | + |
|
| 170 | + // Transposition cost |
|
| 171 | + if (($i > 1) && ($j > 1)) { |
|
| 172 | + // Last two |
|
| 173 | + $ccOne = mb_substr($this->compOne, $i - 2, 1, 'UTF-8'); |
|
| 174 | + $ccTwo = mb_substr($this->compTwo, $j - 2, 1, 'UTF-8'); |
|
| 175 | + |
|
| 176 | + if ($this->compare($cOne, $ccTwo) == 0 && $this->compare($ccOne, $cTwo) == 0) { |
|
| 177 | + // Transposition cost is computed as minimal of two |
|
| 178 | + $this->matrix[$i][$j] = min( |
|
| 179 | + $this->matrix[$i][$j], |
|
| 180 | + $this->matrix[$i - 2][$j - 2] + $trans |
|
| 181 | + ); |
|
| 182 | + } |
|
| 183 | + } |
|
| 184 | + } |
|
| 185 | + } |
|
| 186 | + |
|
| 187 | + $this->calculated = true; |
|
| 188 | + } |
|
| 189 | + |
|
| 190 | + /** |
|
| 191 | + * Returns maximal possible edit Damerau Levenshtein distance between texts. |
|
| 192 | + * |
|
| 193 | + * On common substring of same length perform substitution / insert + delete |
|
| 194 | + * (depends on what is cheaper), then on extra characters perform insertion / deletion |
|
| 195 | + * |
|
| 196 | + * @return int |
|
| 197 | + */ |
|
| 198 | + public function getMaximalDistance() |
|
| 199 | + { |
|
| 200 | + $oneSize = mb_strlen($this->compOne, 'UTF-8'); |
|
| 201 | + $twoSize = mb_strlen($this->compTwo, 'UTF-8'); |
|
| 202 | + |
|
| 203 | + // Max cost, result value |
|
| 204 | + $maxCost = 0; |
|
| 205 | + |
|
| 206 | + // Is substitution cheaper that delete + insert? |
|
| 207 | + $subCost = min($this->subCost, $this->delCost + $this->insCost); |
|
| 208 | + |
|
| 209 | + // Get common size |
|
| 210 | + $minSize = min($oneSize, $twoSize); |
|
| 211 | + $maxSize = max($oneSize, $twoSize); |
|
| 212 | + $extraSize = $maxSize - $minSize; |
|
| 213 | + |
|
| 214 | + // On common size perform substitution / delete + insert, what is cheaper |
|
| 215 | + $maxCost = $subCost * $minSize; |
|
| 216 | + |
|
| 217 | + // On resulting do insert/delete |
|
| 218 | + if ($oneSize > $twoSize) { |
|
| 219 | + // Delete extra characters |
|
| 220 | + $maxCost += $extraSize * $this->delCost; |
|
| 221 | + } else { |
|
| 222 | + // Insert extra characters |
|
| 223 | + $maxCost += $extraSize * $this->insCost; |
|
| 224 | + } |
|
| 225 | + |
|
| 226 | + return $maxCost; |
|
| 227 | + } |
|
| 228 | + |
|
| 229 | + /** |
|
| 230 | + * Returns relative distance of input strings (computed with maximal possible distance). |
|
| 231 | + * |
|
| 232 | + * @return int |
|
| 233 | + */ |
|
| 234 | + public function getRelativeDistance() |
|
| 235 | + { |
|
| 236 | + if (!$this->calculated) { |
|
| 237 | + $this->setupMatrix(); |
|
| 238 | + } |
|
| 239 | + |
|
| 240 | + return 1 - (($this->getSimilarity()) / $this->getMaximalDistance()); |
|
| 241 | + } |
|
| 242 | + |
|
| 243 | + /** |
|
| 244 | + * Compares two characters from string (this method may be overriden in child class). |
|
| 245 | + * |
|
| 246 | + * @param string $firstCharacter First character |
|
| 247 | + * @param string $secondCharacter Second character |
|
| 248 | + * @return int |
|
| 249 | + */ |
|
| 250 | + protected function compare($firstCharacter, $secondCharacter) |
|
| 251 | + { |
|
| 252 | + return strcmp($firstCharacter, $secondCharacter); |
|
| 253 | + } |
|
| 254 | + |
|
| 255 | + /** |
|
| 256 | + * Returns computed matrix for given input strings (For debugging purposes). |
|
| 257 | + * |
|
| 258 | + * @return string |
|
| 259 | + */ |
|
| 260 | + public function displayMatrix() |
|
| 261 | + { |
|
| 262 | + $this->setupMatrix(); |
|
| 263 | + |
|
| 264 | + $oneSize = mb_strlen($this->compOne, 'UTF-8'); |
|
| 265 | + $twoSize = mb_strlen($this->compTwo, 'UTF-8'); |
|
| 266 | + |
|
| 267 | + $out = ' ' . $this->compOne . "\n"; |
|
| 268 | + for ($y = 0; $y <= $twoSize; $y += 1) { |
|
| 269 | + if ($y - 1 < 0) { |
|
| 270 | + $out .= ' '; |
|
| 271 | + } else { |
|
| 272 | + $out .= (mb_substr($this->compTwo, $y - 1, 1, 'UTF-8')); |
|
| 273 | + } |
|
| 274 | + |
|
| 275 | + for ($x = 0; $x <= $oneSize; $x += 1) { |
|
| 276 | + $out .= $this->matrix[$x][$y]; |
|
| 277 | + } |
|
| 278 | + |
|
| 279 | + $out .= "\n"; |
|
| 280 | + } |
|
| 281 | + |
|
| 282 | + return $out; |
|
| 283 | + } |
|
| 284 | + |
|
| 285 | + /** |
|
| 286 | + * Returns current cost of insertion operation. |
|
| 287 | + * |
|
| 288 | + * @return int |
|
| 289 | + */ |
|
| 290 | + public function getInsCost() |
|
| 291 | + { |
|
| 292 | + return $this->insCost; |
|
| 293 | + } |
|
| 294 | + |
|
| 295 | + /** |
|
| 296 | + * Sets cost of insertion operation (insert characters to first string to match second string). |
|
| 297 | + * |
|
| 298 | + * @param int $insCost Cost of character insertion |
|
| 299 | + * @return void |
|
| 300 | + */ |
|
| 301 | + public function setInsCost($insCost) |
|
| 302 | + { |
|
| 303 | + $this->calculated = ($insCost == $this->insCost) ? $this->calculated : false; |
|
| 304 | + $this->insCost = $insCost; |
|
| 305 | + } |
|
| 306 | + |
|
| 307 | + /** |
|
| 308 | + * Returns current cost of deletion operation. |
|
| 309 | + * |
|
| 310 | + * @return int |
|
| 311 | + */ |
|
| 312 | + public function getDelCost() |
|
| 313 | + { |
|
| 314 | + return $this->delCost; |
|
| 315 | + } |
|
| 316 | + |
|
| 317 | + /** |
|
| 318 | + * Sets cost of deletion operation (delete characters from first string to match second string). |
|
| 319 | + * |
|
| 320 | + * @param int $delCost Cost of character deletion |
|
| 321 | + * @return void |
|
| 322 | + */ |
|
| 323 | + public function setDelCost($delCost) |
|
| 324 | + { |
|
| 325 | + $this->calculated = ($delCost == $this->delCost) ? $this->calculated : false; |
|
| 326 | + $this->delCost = $delCost; |
|
| 327 | + } |
|
| 328 | + |
|
| 329 | + /** |
|
| 330 | + * Returns current cost of substitution operation. |
|
| 331 | + * |
|
| 332 | + * @return int |
|
| 333 | + */ |
|
| 334 | + public function getSubCost() |
|
| 335 | + { |
|
| 336 | + return $this->subCost; |
|
| 337 | + } |
|
| 338 | + |
|
| 339 | + /** |
|
| 340 | + * Sets cost of substitution operation. |
|
| 341 | + * |
|
| 342 | + * @param int $subCost Cost of character substitution |
|
| 343 | + * @return void |
|
| 344 | + */ |
|
| 345 | + public function setSubCost($subCost) |
|
| 346 | + { |
|
| 347 | + $this->calculated = ($subCost == $this->subCost) ? $this->calculated : false; |
|
| 348 | + $this->subCost = $subCost; |
|
| 349 | + } |
|
| 350 | + |
|
| 351 | + /** |
|
| 352 | + * Returns current cost of transposition operation. |
|
| 353 | + * |
|
| 354 | + * @return int |
|
| 355 | + */ |
|
| 356 | + public function getTransCost() |
|
| 357 | + { |
|
| 358 | + return $this->transCost; |
|
| 359 | + } |
|
| 360 | + |
|
| 361 | + /** |
|
| 362 | + * Sets cost of transposition operation. |
|
| 363 | + * |
|
| 364 | + * @param int $transCost Cost of character transposition |
|
| 365 | + * @return void |
|
| 366 | + */ |
|
| 367 | + public function setTransCost($transCost) |
|
| 368 | + { |
|
| 369 | + $this->calculated = ($transCost == $this->transCost) ? $this->calculated : false; |
|
| 370 | + $this->transCost = $transCost; |
|
| 371 | + } |
|
| 372 | 372 | } |