| @@ -7,321 +7,321 @@ | ||
| 7 | 7 | class DamerauLevenshteinTest extends TestCase | 
| 8 | 8 |  { | 
| 9 | 9 | |
| 10 | - /** | |
| 11 | - * Tests `getSimilarity`. | |
| 12 | - * | |
| 13 | - * @return void | |
| 14 | - */ | |
| 15 | - public function testGetSimilarity() | |
| 16 | -    { | |
| 17 | - $inputs = [ | |
| 18 | - ['foo', 'foo'], | |
| 19 | - ['foo', 'fooo'], | |
| 20 | - ['foo', 'bar'], | |
| 21 | - | |
| 22 | - ['123', '12'], | |
| 23 | - ['qwe', 'qwa'], | |
| 24 | - ['awe', 'qwe'], | |
| 25 | - ['фыв', 'фыа'], | |
| 26 | - ['vvvqw', 'vvvwq'], | |
| 27 | - ['qw', 'wq'], | |
| 28 | - ['qq', 'ww'], | |
| 29 | - ['qw', 'qw'], | |
| 30 | - ['пионер', 'плеер'], | |
| 31 | - ['пионер', 'пионеер'], | |
| 32 | - ['пионер', 'поинер'], | |
| 33 | - ['pioner', 'poner'], | |
| 34 | - ['пионер', 'понер'], | |
| 35 | - ]; | |
| 36 | - $outputs = [ | |
| 37 | - 0, | |
| 38 | - 1, | |
| 39 | - 3, | |
| 40 | - | |
| 41 | - 1, | |
| 42 | - 1, | |
| 43 | - 1, | |
| 44 | - 1, | |
| 45 | - 1, | |
| 46 | - 1, | |
| 47 | - 2, | |
| 48 | - 0, | |
| 49 | - 3, | |
| 50 | - 1, | |
| 51 | - 1, | |
| 52 | - 1, | |
| 53 | - 1, | |
| 54 | - ]; | |
| 55 | - | |
| 56 | -        foreach ($inputs as $i => $input) { | |
| 57 | - $DamerauLevenshtein = new DamerauLevenshtein($input[0], $input[1]); | |
| 58 | - $result = $DamerauLevenshtein->getSimilarity(); | |
| 59 | - $expected = $outputs[$i]; | |
| 60 | - | |
| 61 | - $this->assertSame($expected, $result); | |
| 62 | - } | |
| 63 | - } | |
| 64 | - | |
| 65 | - /** | |
| 66 | - * Tests `getInsCost`. | |
| 67 | - * | |
| 68 | - * @return void | |
| 69 | - */ | |
| 70 | - public function testGetInsCost() | |
| 71 | -    { | |
| 72 | - list($firstString, $secondString) = $this->getDefaultStrings(); | |
| 73 | - list($insCost, $delCost, $subCost, $transCost) = $this->getDefaultCosts(); | |
| 74 | - | |
| 75 | - // Default insert cost | |
| 76 | - | |
| 77 | - $DamerauLevenshtein = new DamerauLevenshtein($firstString, $secondString); | |
| 78 | - $result = $DamerauLevenshtein->getInsCost(); | |
| 79 | - $expected = $insCost; | |
| 80 | - | |
| 81 | - $this->assertSame($expected, $result); | |
| 82 | - | |
| 83 | - // Non-default insert cost | |
| 84 | - | |
| 85 | - $insCost = 2; | |
| 86 | - | |
| 87 | - $DamerauLevenshtein = new DamerauLevenshtein( | |
| 88 | - $firstString, | |
| 89 | - $secondString, | |
| 90 | - $insCost, | |
| 91 | - $delCost, | |
| 92 | - $subCost, | |
| 93 | - $transCost | |
| 94 | - ); | |
| 95 | - $result = $DamerauLevenshtein->getInsCost(); | |
| 96 | - $expected = $insCost; | |
| 97 | - | |
| 98 | - $this->assertSame($expected, $result); | |
| 99 | - } | |
| 100 | - | |
| 101 | - /** | |
| 102 | - * Tests `getDelCost`. | |
| 103 | - * | |
| 104 | - * @return void | |
| 105 | - */ | |
| 106 | - public function testGetDelCost() | |
| 107 | -    { | |
| 108 | - list($firstString, $secondString) = $this->getDefaultStrings(); | |
| 109 | - list($insCost, $delCost, $subCost, $transCost) = $this->getDefaultCosts(); | |
| 110 | - | |
| 111 | - // Default delete cost | |
| 112 | - | |
| 113 | - $DamerauLevenshtein = new DamerauLevenshtein($firstString, $secondString); | |
| 114 | - $result = $DamerauLevenshtein->getDelCost(); | |
| 115 | - $expected = $delCost; | |
| 116 | - | |
| 117 | - $this->assertSame($expected, $result); | |
| 118 | - | |
| 119 | - // Non-default delete cost | |
| 120 | - | |
| 121 | - $delCost = 2; | |
| 122 | - | |
| 123 | - $DamerauLevenshtein = new DamerauLevenshtein( | |
| 124 | - $firstString, | |
| 125 | - $secondString, | |
| 126 | - $insCost, | |
| 127 | - $delCost, | |
| 128 | - $subCost, | |
| 129 | - $transCost | |
| 130 | - ); | |
| 131 | - $result = $DamerauLevenshtein->getDelCost(); | |
| 132 | - $expected = $delCost; | |
| 133 | - | |
| 134 | - $this->assertSame($expected, $result); | |
| 135 | - } | |
| 136 | - | |
| 137 | - /** | |
| 138 | - * Tests `getSubCost`. | |
| 139 | - * | |
| 140 | - * @return void | |
| 141 | - */ | |
| 142 | - public function testGetSubCost() | |
| 143 | -    { | |
| 144 | - list($firstString, $secondString) = $this->getDefaultStrings(); | |
| 145 | - list($insCost, $delCost, $subCost, $transCost) = $this->getDefaultCosts(); | |
| 146 | - | |
| 147 | - // Default substitution cost | |
| 148 | - | |
| 149 | - $DamerauLevenshtein = new DamerauLevenshtein($firstString, $secondString); | |
| 150 | - $result = $DamerauLevenshtein->getSubCost(); | |
| 151 | - $expected = $subCost; | |
| 152 | - | |
| 153 | - $this->assertSame($expected, $result); | |
| 154 | - | |
| 155 | - // Non-default substitution cost | |
| 156 | - | |
| 157 | - $subCost = 2; | |
| 158 | - | |
| 159 | - $DamerauLevenshtein = new DamerauLevenshtein( | |
| 160 | - $firstString, | |
| 161 | - $secondString, | |
| 162 | - $insCost, | |
| 163 | - $delCost, | |
| 164 | - $subCost, | |
| 165 | - $transCost | |
| 166 | - ); | |
| 167 | - $result = $DamerauLevenshtein->getSubCost(); | |
| 168 | - $expected = $subCost; | |
| 169 | - | |
| 170 | - $this->assertSame($expected, $result); | |
| 171 | - } | |
| 172 | - | |
| 173 | - /** | |
| 174 | - * Tests `getTransCost`. | |
| 175 | - * | |
| 176 | - * @return void | |
| 177 | - */ | |
| 178 | - public function testGetTransCost() | |
| 179 | -    { | |
| 180 | - list($firstString, $secondString) = $this->getDefaultStrings(); | |
| 181 | - list($insCost, $delCost, $subCost, $transCost) = $this->getDefaultCosts(); | |
| 182 | - | |
| 183 | - // Default transposition cost | |
| 184 | - | |
| 185 | - $DamerauLevenshtein = new DamerauLevenshtein($firstString, $secondString); | |
| 186 | - $result = $DamerauLevenshtein->getTransCost(); | |
| 187 | - $expected = $transCost; | |
| 188 | - | |
| 189 | - $this->assertSame($expected, $result); | |
| 190 | - | |
| 191 | - // Non-default transposition cost | |
| 192 | - | |
| 193 | - $transCost = 2; | |
| 194 | - | |
| 195 | - $DamerauLevenshtein = new DamerauLevenshtein( | |
| 196 | - $firstString, | |
| 197 | - $secondString, | |
| 198 | - $insCost, | |
| 199 | - $delCost, | |
| 200 | - $subCost, | |
| 201 | - $transCost | |
| 202 | - ); | |
| 203 | - $result = $DamerauLevenshtein->getTransCost(); | |
| 204 | - $expected = $transCost; | |
| 205 | - | |
| 206 | - $this->assertSame($expected, $result); | |
| 207 | - } | |
| 208 | - | |
| 209 | - /** | |
| 210 | - * Tests `getRelativeDistance`. | |
| 211 | - * | |
| 212 | - * @return void | |
| 213 | - */ | |
| 214 | - public function testGetRelativeDistance() | |
| 215 | -    { | |
| 216 | - $delta = pow(10, -4); | |
| 217 | - | |
| 218 | - $firstString = 'O\'Callaghan'; | |
| 219 | - $secondString = 'OCallaghan'; | |
| 220 | - | |
| 221 | - $DamerauLevenshtein = new DamerauLevenshtein($firstString, $secondString); | |
| 222 | - $result = $DamerauLevenshtein->getRelativeDistance(); | |
| 223 | - $expected = 0.90909090909091; | |
| 224 | - $this->assertEquals($expected, $result, '', $delta); | |
| 225 | - | |
| 226 | - $firstString = 'Thom'; | |
| 227 | - $secondString = 'Mira'; | |
| 228 | - | |
| 229 | - $DamerauLevenshtein = new DamerauLevenshtein($firstString, $secondString); | |
| 230 | - $result = $DamerauLevenshtein->getRelativeDistance(); | |
| 231 | - $expected = 0.0; | |
| 232 | - $this->assertEquals($expected, $result, '', $delta); | |
| 233 | - | |
| 234 | - $firstString = 'Oldeboom'; | |
| 235 | - $secondString = 'Ven'; | |
| 236 | - | |
| 237 | - $DamerauLevenshtein = new DamerauLevenshtein($firstString, $secondString); | |
| 238 | - $result = $DamerauLevenshtein->getRelativeDistance(); | |
| 239 | - $expected = 0.125; | |
| 240 | - $this->assertEquals($expected, $result, '', $delta); | |
| 241 | - | |
| 242 | - $firstString = 'ven'; | |
| 243 | - $secondString = 'Ven'; | |
| 244 | - | |
| 245 | - $DamerauLevenshtein = new DamerauLevenshtein($firstString, $secondString); | |
| 246 | - $result = $DamerauLevenshtein->getRelativeDistance(); | |
| 247 | - $expected = 0.66666666666667; | |
| 248 | - $this->assertEquals($expected, $result, '', $delta); | |
| 249 | - | |
| 250 | - $firstString = 'enV'; | |
| 251 | - $secondString = 'Ven'; | |
| 252 | - | |
| 253 | - $DamerauLevenshtein = new DamerauLevenshtein($firstString, $secondString); | |
| 254 | - $result = $DamerauLevenshtein->getRelativeDistance(); | |
| 255 | - $expected = 0.33333333333333; | |
| 256 | - $this->assertEquals($expected, $result, '', $delta); | |
| 257 | - } | |
| 258 | - | |
| 259 | - /** | |
| 260 | - * Tests `getMatrix`. | |
| 261 | - * | |
| 262 | - * @return void | |
| 263 | - */ | |
| 264 | - public function testGetMatrix() | |
| 265 | -    { | |
| 266 | - list($firstString, $secondString) = $this->getDefaultStrings(); | |
| 267 | - | |
| 268 | - $DamerauLevenshtein = new DamerauLevenshtein($firstString, $secondString); | |
| 269 | - $actual = $DamerauLevenshtein->getMatrix(); | |
| 270 | - $expected = [ | |
| 271 | - [0, 1, 2, 3], | |
| 272 | - [1, 1, 2, 3], | |
| 273 | - [2, 2, 2, 3], | |
| 274 | - [3, 3, 3, 3] | |
| 275 | - ]; | |
| 276 | - $this->assertSame($expected, $actual); | |
| 277 | - } | |
| 278 | - | |
| 279 | - /** | |
| 280 | - * Tests `displayMatrix`. | |
| 281 | - * | |
| 282 | - * @return void | |
| 283 | - */ | |
| 284 | - public function testDisplayMatrix() | |
| 285 | -    { | |
| 286 | - list($firstString, $secondString) = $this->getDefaultStrings(); | |
| 287 | - | |
| 288 | - $DamerauLevenshtein = new DamerauLevenshtein($firstString, $secondString); | |
| 289 | - $actual = $DamerauLevenshtein->displayMatrix(); | |
| 290 | -        $expected = implode('', [ | |
| 291 | - " foo\n", | |
| 292 | - " 0123\n", | |
| 293 | - "b1123\n", | |
| 294 | - "a2223\n", | |
| 295 | - "r3333\n", | |
| 296 | - ]); | |
| 297 | - $this->assertSame($expected, $actual); | |
| 298 | - } | |
| 299 | - | |
| 300 | - /** | |
| 301 | - * Returns the default costs. | |
| 302 | - * | |
| 303 | - * @return array Costs (insert, delete, substitution, transposition) | |
| 304 | - */ | |
| 305 | - protected function getDefaultCosts() | |
| 306 | -    { | |
| 307 | - $insCost = 1; | |
| 308 | - $delCost = 1; | |
| 309 | - $subCost = 1; | |
| 310 | - $transCost = 1; | |
| 311 | - | |
| 312 | - return [$insCost, $delCost, $subCost, $transCost]; | |
| 313 | - } | |
| 314 | - | |
| 315 | - /** | |
| 316 | - * Returns the default strings. | |
| 317 | - * | |
| 318 | - * @return array Strings (foo, bar) | |
| 319 | - */ | |
| 320 | - protected function getDefaultStrings() | |
| 321 | -    { | |
| 322 | - $firstString = 'foo'; | |
| 323 | - $secondString = 'bar'; | |
| 324 | - | |
| 325 | - return [$firstString, $secondString]; | |
| 326 | - } | |
| 10 | + /** | |
| 11 | + * Tests `getSimilarity`. | |
| 12 | + * | |
| 13 | + * @return void | |
| 14 | + */ | |
| 15 | + public function testGetSimilarity() | |
| 16 | +	{ | |
| 17 | + $inputs = [ | |
| 18 | + ['foo', 'foo'], | |
| 19 | + ['foo', 'fooo'], | |
| 20 | + ['foo', 'bar'], | |
| 21 | + | |
| 22 | + ['123', '12'], | |
| 23 | + ['qwe', 'qwa'], | |
| 24 | + ['awe', 'qwe'], | |
| 25 | + ['фыв', 'фыа'], | |
| 26 | + ['vvvqw', 'vvvwq'], | |
| 27 | + ['qw', 'wq'], | |
| 28 | + ['qq', 'ww'], | |
| 29 | + ['qw', 'qw'], | |
| 30 | + ['пионер', 'плеер'], | |
| 31 | + ['пионер', 'пионеер'], | |
| 32 | + ['пионер', 'поинер'], | |
| 33 | + ['pioner', 'poner'], | |
| 34 | + ['пионер', 'понер'], | |
| 35 | + ]; | |
| 36 | + $outputs = [ | |
| 37 | + 0, | |
| 38 | + 1, | |
| 39 | + 3, | |
| 40 | + | |
| 41 | + 1, | |
| 42 | + 1, | |
| 43 | + 1, | |
| 44 | + 1, | |
| 45 | + 1, | |
| 46 | + 1, | |
| 47 | + 2, | |
| 48 | + 0, | |
| 49 | + 3, | |
| 50 | + 1, | |
| 51 | + 1, | |
| 52 | + 1, | |
| 53 | + 1, | |
| 54 | + ]; | |
| 55 | + | |
| 56 | +		foreach ($inputs as $i => $input) { | |
| 57 | + $DamerauLevenshtein = new DamerauLevenshtein($input[0], $input[1]); | |
| 58 | + $result = $DamerauLevenshtein->getSimilarity(); | |
| 59 | + $expected = $outputs[$i]; | |
| 60 | + | |
| 61 | + $this->assertSame($expected, $result); | |
| 62 | + } | |
| 63 | + } | |
| 64 | + | |
| 65 | + /** | |
| 66 | + * Tests `getInsCost`. | |
| 67 | + * | |
| 68 | + * @return void | |
| 69 | + */ | |
| 70 | + public function testGetInsCost() | |
| 71 | +	{ | |
| 72 | + list($firstString, $secondString) = $this->getDefaultStrings(); | |
| 73 | + list($insCost, $delCost, $subCost, $transCost) = $this->getDefaultCosts(); | |
| 74 | + | |
| 75 | + // Default insert cost | |
| 76 | + | |
| 77 | + $DamerauLevenshtein = new DamerauLevenshtein($firstString, $secondString); | |
| 78 | + $result = $DamerauLevenshtein->getInsCost(); | |
| 79 | + $expected = $insCost; | |
| 80 | + | |
| 81 | + $this->assertSame($expected, $result); | |
| 82 | + | |
| 83 | + // Non-default insert cost | |
| 84 | + | |
| 85 | + $insCost = 2; | |
| 86 | + | |
| 87 | + $DamerauLevenshtein = new DamerauLevenshtein( | |
| 88 | + $firstString, | |
| 89 | + $secondString, | |
| 90 | + $insCost, | |
| 91 | + $delCost, | |
| 92 | + $subCost, | |
| 93 | + $transCost | |
| 94 | + ); | |
| 95 | + $result = $DamerauLevenshtein->getInsCost(); | |
| 96 | + $expected = $insCost; | |
| 97 | + | |
| 98 | + $this->assertSame($expected, $result); | |
| 99 | + } | |
| 100 | + | |
| 101 | + /** | |
| 102 | + * Tests `getDelCost`. | |
| 103 | + * | |
| 104 | + * @return void | |
| 105 | + */ | |
| 106 | + public function testGetDelCost() | |
| 107 | +	{ | |
| 108 | + list($firstString, $secondString) = $this->getDefaultStrings(); | |
| 109 | + list($insCost, $delCost, $subCost, $transCost) = $this->getDefaultCosts(); | |
| 110 | + | |
| 111 | + // Default delete cost | |
| 112 | + | |
| 113 | + $DamerauLevenshtein = new DamerauLevenshtein($firstString, $secondString); | |
| 114 | + $result = $DamerauLevenshtein->getDelCost(); | |
| 115 | + $expected = $delCost; | |
| 116 | + | |
| 117 | + $this->assertSame($expected, $result); | |
| 118 | + | |
| 119 | + // Non-default delete cost | |
| 120 | + | |
| 121 | + $delCost = 2; | |
| 122 | + | |
| 123 | + $DamerauLevenshtein = new DamerauLevenshtein( | |
| 124 | + $firstString, | |
| 125 | + $secondString, | |
| 126 | + $insCost, | |
| 127 | + $delCost, | |
| 128 | + $subCost, | |
| 129 | + $transCost | |
| 130 | + ); | |
| 131 | + $result = $DamerauLevenshtein->getDelCost(); | |
| 132 | + $expected = $delCost; | |
| 133 | + | |
| 134 | + $this->assertSame($expected, $result); | |
| 135 | + } | |
| 136 | + | |
| 137 | + /** | |
| 138 | + * Tests `getSubCost`. | |
| 139 | + * | |
| 140 | + * @return void | |
| 141 | + */ | |
| 142 | + public function testGetSubCost() | |
| 143 | +	{ | |
| 144 | + list($firstString, $secondString) = $this->getDefaultStrings(); | |
| 145 | + list($insCost, $delCost, $subCost, $transCost) = $this->getDefaultCosts(); | |
| 146 | + | |
| 147 | + // Default substitution cost | |
| 148 | + | |
| 149 | + $DamerauLevenshtein = new DamerauLevenshtein($firstString, $secondString); | |
| 150 | + $result = $DamerauLevenshtein->getSubCost(); | |
| 151 | + $expected = $subCost; | |
| 152 | + | |
| 153 | + $this->assertSame($expected, $result); | |
| 154 | + | |
| 155 | + // Non-default substitution cost | |
| 156 | + | |
| 157 | + $subCost = 2; | |
| 158 | + | |
| 159 | + $DamerauLevenshtein = new DamerauLevenshtein( | |
| 160 | + $firstString, | |
| 161 | + $secondString, | |
| 162 | + $insCost, | |
| 163 | + $delCost, | |
| 164 | + $subCost, | |
| 165 | + $transCost | |
| 166 | + ); | |
| 167 | + $result = $DamerauLevenshtein->getSubCost(); | |
| 168 | + $expected = $subCost; | |
| 169 | + | |
| 170 | + $this->assertSame($expected, $result); | |
| 171 | + } | |
| 172 | + | |
| 173 | + /** | |
| 174 | + * Tests `getTransCost`. | |
| 175 | + * | |
| 176 | + * @return void | |
| 177 | + */ | |
| 178 | + public function testGetTransCost() | |
| 179 | +	{ | |
| 180 | + list($firstString, $secondString) = $this->getDefaultStrings(); | |
| 181 | + list($insCost, $delCost, $subCost, $transCost) = $this->getDefaultCosts(); | |
| 182 | + | |
| 183 | + // Default transposition cost | |
| 184 | + | |
| 185 | + $DamerauLevenshtein = new DamerauLevenshtein($firstString, $secondString); | |
| 186 | + $result = $DamerauLevenshtein->getTransCost(); | |
| 187 | + $expected = $transCost; | |
| 188 | + | |
| 189 | + $this->assertSame($expected, $result); | |
| 190 | + | |
| 191 | + // Non-default transposition cost | |
| 192 | + | |
| 193 | + $transCost = 2; | |
| 194 | + | |
| 195 | + $DamerauLevenshtein = new DamerauLevenshtein( | |
| 196 | + $firstString, | |
| 197 | + $secondString, | |
| 198 | + $insCost, | |
| 199 | + $delCost, | |
| 200 | + $subCost, | |
| 201 | + $transCost | |
| 202 | + ); | |
| 203 | + $result = $DamerauLevenshtein->getTransCost(); | |
| 204 | + $expected = $transCost; | |
| 205 | + | |
| 206 | + $this->assertSame($expected, $result); | |
| 207 | + } | |
| 208 | + | |
| 209 | + /** | |
| 210 | + * Tests `getRelativeDistance`. | |
| 211 | + * | |
| 212 | + * @return void | |
| 213 | + */ | |
| 214 | + public function testGetRelativeDistance() | |
| 215 | +	{ | |
| 216 | + $delta = pow(10, -4); | |
| 217 | + | |
| 218 | + $firstString = 'O\'Callaghan'; | |
| 219 | + $secondString = 'OCallaghan'; | |
| 220 | + | |
| 221 | + $DamerauLevenshtein = new DamerauLevenshtein($firstString, $secondString); | |
| 222 | + $result = $DamerauLevenshtein->getRelativeDistance(); | |
| 223 | + $expected = 0.90909090909091; | |
| 224 | + $this->assertEquals($expected, $result, '', $delta); | |
| 225 | + | |
| 226 | + $firstString = 'Thom'; | |
| 227 | + $secondString = 'Mira'; | |
| 228 | + | |
| 229 | + $DamerauLevenshtein = new DamerauLevenshtein($firstString, $secondString); | |
| 230 | + $result = $DamerauLevenshtein->getRelativeDistance(); | |
| 231 | + $expected = 0.0; | |
| 232 | + $this->assertEquals($expected, $result, '', $delta); | |
| 233 | + | |
| 234 | + $firstString = 'Oldeboom'; | |
| 235 | + $secondString = 'Ven'; | |
| 236 | + | |
| 237 | + $DamerauLevenshtein = new DamerauLevenshtein($firstString, $secondString); | |
| 238 | + $result = $DamerauLevenshtein->getRelativeDistance(); | |
| 239 | + $expected = 0.125; | |
| 240 | + $this->assertEquals($expected, $result, '', $delta); | |
| 241 | + | |
| 242 | + $firstString = 'ven'; | |
| 243 | + $secondString = 'Ven'; | |
| 244 | + | |
| 245 | + $DamerauLevenshtein = new DamerauLevenshtein($firstString, $secondString); | |
| 246 | + $result = $DamerauLevenshtein->getRelativeDistance(); | |
| 247 | + $expected = 0.66666666666667; | |
| 248 | + $this->assertEquals($expected, $result, '', $delta); | |
| 249 | + | |
| 250 | + $firstString = 'enV'; | |
| 251 | + $secondString = 'Ven'; | |
| 252 | + | |
| 253 | + $DamerauLevenshtein = new DamerauLevenshtein($firstString, $secondString); | |
| 254 | + $result = $DamerauLevenshtein->getRelativeDistance(); | |
| 255 | + $expected = 0.33333333333333; | |
| 256 | + $this->assertEquals($expected, $result, '', $delta); | |
| 257 | + } | |
| 258 | + | |
| 259 | + /** | |
| 260 | + * Tests `getMatrix`. | |
| 261 | + * | |
| 262 | + * @return void | |
| 263 | + */ | |
| 264 | + public function testGetMatrix() | |
| 265 | +	{ | |
| 266 | + list($firstString, $secondString) = $this->getDefaultStrings(); | |
| 267 | + | |
| 268 | + $DamerauLevenshtein = new DamerauLevenshtein($firstString, $secondString); | |
| 269 | + $actual = $DamerauLevenshtein->getMatrix(); | |
| 270 | + $expected = [ | |
| 271 | + [0, 1, 2, 3], | |
| 272 | + [1, 1, 2, 3], | |
| 273 | + [2, 2, 2, 3], | |
| 274 | + [3, 3, 3, 3] | |
| 275 | + ]; | |
| 276 | + $this->assertSame($expected, $actual); | |
| 277 | + } | |
| 278 | + | |
| 279 | + /** | |
| 280 | + * Tests `displayMatrix`. | |
| 281 | + * | |
| 282 | + * @return void | |
| 283 | + */ | |
| 284 | + public function testDisplayMatrix() | |
| 285 | +	{ | |
| 286 | + list($firstString, $secondString) = $this->getDefaultStrings(); | |
| 287 | + | |
| 288 | + $DamerauLevenshtein = new DamerauLevenshtein($firstString, $secondString); | |
| 289 | + $actual = $DamerauLevenshtein->displayMatrix(); | |
| 290 | +		$expected = implode('', [ | |
| 291 | + " foo\n", | |
| 292 | + " 0123\n", | |
| 293 | + "b1123\n", | |
| 294 | + "a2223\n", | |
| 295 | + "r3333\n", | |
| 296 | + ]); | |
| 297 | + $this->assertSame($expected, $actual); | |
| 298 | + } | |
| 299 | + | |
| 300 | + /** | |
| 301 | + * Returns the default costs. | |
| 302 | + * | |
| 303 | + * @return array Costs (insert, delete, substitution, transposition) | |
| 304 | + */ | |
| 305 | + protected function getDefaultCosts() | |
| 306 | +	{ | |
| 307 | + $insCost = 1; | |
| 308 | + $delCost = 1; | |
| 309 | + $subCost = 1; | |
| 310 | + $transCost = 1; | |
| 311 | + | |
| 312 | + return [$insCost, $delCost, $subCost, $transCost]; | |
| 313 | + } | |
| 314 | + | |
| 315 | + /** | |
| 316 | + * Returns the default strings. | |
| 317 | + * | |
| 318 | + * @return array Strings (foo, bar) | |
| 319 | + */ | |
| 320 | + protected function getDefaultStrings() | |
| 321 | +	{ | |
| 322 | + $firstString = 'foo'; | |
| 323 | + $secondString = 'bar'; | |
| 324 | + | |
| 325 | + return [$firstString, $secondString]; | |
| 326 | + } | |
| 327 | 327 | } | 
| @@ -10,386 +10,386 @@ | ||
| 10 | 10 | class DamerauLevenshtein | 
| 11 | 11 |  { | 
| 12 | 12 | |
| 13 | - /** | |
| 14 | - * First string. | |
| 15 | - * | |
| 16 | - * @var String | |
| 17 | - */ | |
| 18 | - private $compOne; | |
| 19 | - | |
| 20 | - /** | |
| 21 | - * Second string. | |
| 22 | - * | |
| 23 | - * @var String | |
| 24 | - */ | |
| 25 | - private $compTwo; | |
| 26 | - | |
| 27 | - /** | |
| 28 | - * Length of first string. | |
| 29 | - * | |
| 30 | - * @var int | |
| 31 | - */ | |
| 32 | - private $compOneLength = 0; | |
| 33 | - | |
| 34 | - /** | |
| 35 | - * Length of second string. | |
| 36 | - * | |
| 37 | - * @var int | |
| 38 | - */ | |
| 39 | - private $compTwoLength = 0; | |
| 40 | - | |
| 41 | - /** | |
| 42 | - * Matrix for Damerau Levenshtein distance dynamic programming computation. | |
| 43 | - * | |
| 44 | - * @var int[][] | |
| 45 | - */ | |
| 46 | - private $matrix; | |
| 47 | - | |
| 48 | - /** | |
| 49 | - * Boolean flag determining whether is matrix computed for input strings. | |
| 50 | - * | |
| 51 | - * @var bool | |
| 52 | - */ | |
| 53 | - private $calculated = false; | |
| 54 | - | |
| 55 | - /** | |
| 56 | - * Cost of character insertion (to first string to match second string). | |
| 57 | - * | |
| 58 | - * @var int | |
| 59 | - */ | |
| 60 | - private $insCost = 1; | |
| 61 | - | |
| 62 | - /** | |
| 63 | - * Cost of character deletion (from first string to match second string). | |
| 64 | - * | |
| 65 | - * @var int | |
| 66 | - */ | |
| 67 | - private $delCost = 1; | |
| 68 | - | |
| 69 | - /** | |
| 70 | - * Substitution cost. | |
| 71 | - * | |
| 72 | - * @var int | |
| 73 | - */ | |
| 74 | - private $subCost = 1; | |
| 75 | - | |
| 76 | - /** | |
| 77 | - * Transposition cost. | |
| 78 | - * | |
| 79 | - * @var int | |
| 80 | - */ | |
| 81 | - private $transCost = 1; | |
| 82 | - | |
| 83 | - /** | |
| 84 | - * Constructor. | |
| 85 | - * | |
| 86 | - * @param string $firstString first string to compute distance | |
| 87 | - * @param string $secondString second string to compute distance | |
| 88 | - * @param int $insCost Cost of character insertion | |
| 89 | - * @param int $delCost Cost of character deletion | |
| 90 | - * @param int $subCost Substitution cost | |
| 91 | - * @param int $transCost Transposition cost | |
| 92 | - */ | |
| 93 | - public function __construct( | |
| 94 | - string $firstString, | |
| 95 | - string $secondString, | |
| 96 | - int $insCost = 1, | |
| 97 | - int $delCost = 1, | |
| 98 | - int $subCost = 1, | |
| 99 | - int $transCost = 1 | |
| 100 | -    ) { | |
| 101 | -        if (!empty($firstString) || !empty($secondString)) { | |
| 102 | - $this->compOne = $firstString; | |
| 103 | - $this->compOneLength = (int)mb_strlen($this->compOne, 'UTF-8'); | |
| 104 | - $this->compTwo = $secondString; | |
| 105 | - $this->compTwoLength = (int)mb_strlen($this->compTwo, 'UTF-8'); | |
| 106 | - } | |
| 107 | - | |
| 108 | - $this->insCost = $insCost; | |
| 109 | - $this->delCost = $delCost; | |
| 110 | - $this->subCost = $subCost; | |
| 111 | - $this->transCost = $transCost; | |
| 112 | - } | |
| 113 | - | |
| 114 | - /** | |
| 115 | - * Returns computed matrix for given input strings. | |
| 116 | - * | |
| 117 | - * @return int[][] matrix | |
| 118 | - */ | |
| 119 | - public function getMatrix(): array | |
| 120 | -    { | |
| 121 | -        if (!$this->calculated) { | |
| 122 | - $this->setupMatrix(); | |
| 123 | - } | |
| 124 | - | |
| 125 | - return $this->matrix; | |
| 126 | - } | |
| 127 | - | |
| 128 | - /** | |
| 129 | - * Returns similarity of strings, absolute number = Damerau Levenshtein distance. | |
| 130 | - * | |
| 131 | - * @return int | |
| 132 | - */ | |
| 133 | - public function getSimilarity(): int | |
| 134 | -    { | |
| 135 | -        if (!$this->calculated) { | |
| 136 | - $this->setupMatrix(); | |
| 137 | - } | |
| 138 | - | |
| 139 | - return $this->matrix[$this->compOneLength][$this->compTwoLength]; | |
| 140 | - } | |
| 141 | - | |
| 142 | - /** | |
| 143 | - * Procedure to compute matrix for given input strings. | |
| 144 | - * | |
| 145 | - * @return void | |
| 146 | - * @SuppressWarnings(PHPMD.CyclomaticComplexity) | |
| 147 | - */ | |
| 148 | - private function setupMatrix(): void | |
| 149 | -    { | |
| 150 | - $this->matrix = [[]]; | |
| 151 | - | |
| 152 | - $oneSize = $this->compOneLength; | |
| 153 | - $twoSize = $this->compTwoLength; | |
| 154 | - | |
| 155 | -        for ($i = 0; $i <= $oneSize; $i += 1) { | |
| 156 | - // @phan-suppress-next-line PhanTypeInvalidDimOffset | |
| 157 | - $this->matrix[$i][0] = $i > 0 ? $this->matrix[$i - 1][0] + $this->delCost : 0; | |
| 158 | - } | |
| 159 | - | |
| 160 | -        for ($i = 0; $i <= $twoSize; $i += 1) { | |
| 161 | - // Insertion actualy | |
| 162 | - $this->matrix[0][$i] = $i > 0 ? $this->matrix[0][$i - 1] + $this->insCost : 0; | |
| 163 | - } | |
| 164 | - | |
| 165 | -        for ($i = 1; $i <= $oneSize; $i += 1) { | |
| 166 | - // Curchar for the first string | |
| 167 | - $cOne = (string)mb_substr($this->compOne, $i - 1, 1, 'UTF-8'); | |
| 168 | -            for ($j = 1; $j <= $twoSize; $j += 1) { | |
| 169 | - // Curchar for the second string | |
| 170 | - $cTwo = (string)mb_substr($this->compTwo, $j - 1, 1, 'UTF-8'); | |
| 171 | - | |
| 172 | - // Compute substitution cost | |
| 173 | -                if ($this->compare($cOne, $cTwo) === 0) { | |
| 174 | - $cost = 0; | |
| 175 | - $trans = 0; | |
| 176 | -                } else { | |
| 177 | - $cost = $this->subCost; | |
| 178 | - $trans = $this->transCost; | |
| 179 | - } | |
| 180 | - | |
| 181 | - // Deletion cost | |
| 182 | - // @phan-suppress-next-line PhanTypeInvalidDimOffset, PhanTypeInvalidLeftOperandOfAdd | |
| 183 | - $del = $this->matrix[$i - 1][$j] + $this->delCost; | |
| 184 | - | |
| 185 | - // Insertion cost | |
| 186 | - // @phan-suppress-next-line PhanTypeArraySuspiciousNull, PhanTypeInvalidDimOffset, PhanTypeInvalidLeftOperandOfAdd | |
| 187 | - $ins = $this->matrix[$i][$j - 1] + $this->insCost; | |
| 188 | - | |
| 189 | - // Substitution cost, 0 if same | |
| 190 | - $sub = $this->matrix[$i - 1][$j - 1] + $cost; | |
| 191 | - | |
| 192 | - // Compute optimal | |
| 193 | - $this->matrix[$i][$j] = min($del, $ins, $sub); | |
| 194 | - | |
| 195 | - // Transposition cost | |
| 196 | -                if ($i > 1 && $j > 1) { | |
| 197 | - // Last two | |
| 198 | - // @phan-suppress-next-line PhanPartialTypeMismatchArgumentInternal | |
| 199 | - $ccOne = mb_substr($this->compOne, $i - 2, 1, 'UTF-8'); | |
| 200 | - // @phan-suppress-next-line PhanPartialTypeMismatchArgumentInternal | |
| 201 | - $ccTwo = mb_substr($this->compTwo, $j - 2, 1, 'UTF-8'); | |
| 202 | - | |
| 203 | -                    if ($this->compare($cOne, $ccTwo) === 0 && $this->compare($ccOne, $cTwo) === 0) { | |
| 204 | - // Transposition cost is computed as minimal of two | |
| 205 | - $this->matrix[$i][$j] = min($this->matrix[$i][$j], $this->matrix[$i - 2][$j - 2] + $trans); | |
| 206 | - } | |
| 207 | - } | |
| 208 | - } | |
| 209 | - } | |
| 210 | - | |
| 211 | - $this->calculated = true; | |
| 212 | - } | |
| 213 | - | |
| 214 | - /** | |
| 215 | - * Returns maximal possible edit Damerau Levenshtein distance between texts. | |
| 216 | - * | |
| 217 | - * On common substring of same length perform substitution / insert + delete | |
| 218 | - * (depends on what is cheaper), then on extra characters perform insertion / deletion | |
| 219 | - * | |
| 220 | - * @return int | |
| 221 | - */ | |
| 222 | - public function getMaximalDistance(): int | |
| 223 | -    { | |
| 224 | - $oneSize = $this->compOneLength; | |
| 225 | - $twoSize = $this->compTwoLength; | |
| 226 | - | |
| 227 | - // Is substitution cheaper that delete + insert? | |
| 228 | - $subCost = min($this->subCost, $this->delCost + $this->insCost); | |
| 229 | - | |
| 230 | - // Get common size | |
| 231 | - $minSize = min($oneSize, $twoSize); | |
| 232 | - $maxSize = max($oneSize, $twoSize); | |
| 233 | - $extraSize = $maxSize - $minSize; | |
| 234 | - | |
| 235 | - // On common size perform substitution / delete + insert, what is cheaper | |
| 236 | - $maxCost = $subCost * $minSize; | |
| 237 | - | |
| 238 | - // On resulting do insert/delete | |
| 239 | -        if ($oneSize > $twoSize) { | |
| 240 | - // Delete extra characters | |
| 241 | - $maxCost += $extraSize * $this->delCost; | |
| 242 | -        } else { | |
| 243 | - // Insert extra characters | |
| 244 | - $maxCost += $extraSize * $this->insCost; | |
| 245 | - } | |
| 246 | - | |
| 247 | - return (int)$maxCost; | |
| 248 | - } | |
| 249 | - | |
| 250 | - /** | |
| 251 | - * Returns relative distance of input strings (computed with maximal possible distance). | |
| 252 | - * | |
| 253 | - * @return float | |
| 254 | - */ | |
| 255 | - public function getRelativeDistance(): float | |
| 256 | -    { | |
| 257 | -        if (!$this->calculated) { | |
| 258 | - $this->setupMatrix(); | |
| 259 | - } | |
| 260 | - | |
| 261 | - return (float)(1 - ($this->getSimilarity() / $this->getMaximalDistance())); | |
| 262 | - } | |
| 263 | - | |
| 264 | - /** | |
| 265 | - * Compares two characters from string (this method may be overridden in child class). | |
| 266 | - * | |
| 267 | - * @param string $firstCharacter First character | |
| 268 | - * @param string $secondCharacter Second character | |
| 269 | - * @return int | |
| 270 | - */ | |
| 271 | - protected function compare(string $firstCharacter, string $secondCharacter): int | |
| 272 | -    { | |
| 273 | - return strcmp($firstCharacter, $secondCharacter); | |
| 274 | - } | |
| 275 | - | |
| 276 | - /** | |
| 277 | - * Returns computed matrix for given input strings (For debugging purposes). | |
| 278 | - * | |
| 279 | - * @return string | |
| 280 | - */ | |
| 281 | - public function displayMatrix(): string | |
| 282 | -    { | |
| 283 | -        if (!$this->calculated) { | |
| 284 | - $this->setupMatrix(); | |
| 285 | - } | |
| 286 | - | |
| 287 | - $oneSize = $this->compOneLength; | |
| 288 | - $twoSize = $this->compTwoLength; | |
| 289 | - | |
| 290 | - $out = ' ' . $this->compOne . PHP_EOL; | |
| 291 | -        for ($y = 0; $y <= $twoSize; $y += 1) { | |
| 292 | -            if ($y - 1 < 0) { | |
| 293 | - $out .= ' '; | |
| 294 | -            } else { | |
| 295 | - $out .= (string)mb_substr($this->compTwo, $y - 1, 1, 'UTF-8'); | |
| 296 | - } | |
| 297 | - | |
| 298 | -            for ($x = 0; $x <= $oneSize; $x += 1) { | |
| 299 | - $out .= $this->matrix[$x][$y]; | |
| 300 | - } | |
| 301 | - | |
| 302 | - $out .= PHP_EOL; | |
| 303 | - } | |
| 304 | - | |
| 305 | - return $out; | |
| 306 | - } | |
| 307 | - | |
| 308 | - /** | |
| 309 | - * Returns current cost of insertion operation. | |
| 310 | - * | |
| 311 | - * @return int | |
| 312 | - */ | |
| 313 | - public function getInsCost(): int | |
| 314 | -    { | |
| 315 | - return $this->insCost; | |
| 316 | - } | |
| 317 | - | |
| 318 | - /** | |
| 319 | - * Sets cost of insertion operation (insert characters to first string to match second string). | |
| 320 | - * | |
| 321 | - * @param int $insCost Cost of character insertion | |
| 322 | - * @return void | |
| 323 | - */ | |
| 324 | - public function setInsCost(int $insCost): void | |
| 325 | -    { | |
| 326 | - $this->calculated = $insCost === $this->insCost ? $this->calculated : false; | |
| 327 | - $this->insCost = $insCost; | |
| 328 | - } | |
| 329 | - | |
| 330 | - /** | |
| 331 | - * Returns current cost of deletion operation. | |
| 332 | - * | |
| 333 | - * @return int | |
| 334 | - */ | |
| 335 | - public function getDelCost(): int | |
| 336 | -    { | |
| 337 | - return $this->delCost; | |
| 338 | - } | |
| 339 | - | |
| 340 | - /** | |
| 341 | - * Sets cost of deletion operation (delete characters from first string to match second string). | |
| 342 | - * | |
| 343 | - * @param int $delCost Cost of character deletion | |
| 344 | - * @return void | |
| 345 | - */ | |
| 346 | - public function setDelCost(int $delCost): void | |
| 347 | -    { | |
| 348 | - $this->calculated = $delCost === $this->delCost ? $this->calculated : false; | |
| 349 | - $this->delCost = $delCost; | |
| 350 | - } | |
| 351 | - | |
| 352 | - /** | |
| 353 | - * Returns current cost of substitution operation. | |
| 354 | - * | |
| 355 | - * @return int | |
| 356 | - */ | |
| 357 | - public function getSubCost(): int | |
| 358 | -    { | |
| 359 | - return $this->subCost; | |
| 360 | - } | |
| 361 | - | |
| 362 | - /** | |
| 363 | - * Sets cost of substitution operation. | |
| 364 | - * | |
| 365 | - * @param int $subCost Cost of character substitution | |
| 366 | - * @return void | |
| 367 | - */ | |
| 368 | - public function setSubCost(int $subCost): void | |
| 369 | -    { | |
| 370 | - $this->calculated = $subCost === $this->subCost ? $this->calculated : false; | |
| 371 | - $this->subCost = $subCost; | |
| 372 | - } | |
| 373 | - | |
| 374 | - /** | |
| 375 | - * Returns current cost of transposition operation. | |
| 376 | - * | |
| 377 | - * @return int | |
| 378 | - */ | |
| 379 | - public function getTransCost(): int | |
| 380 | -    { | |
| 381 | - return $this->transCost; | |
| 382 | - } | |
| 383 | - | |
| 384 | - /** | |
| 385 | - * Sets cost of transposition operation. | |
| 386 | - * | |
| 387 | - * @param int $transCost Cost of character transposition | |
| 388 | - * @return void | |
| 389 | - */ | |
| 390 | - public function setTransCost(int $transCost): void | |
| 391 | -    { | |
| 392 | - $this->calculated = $transCost === $this->transCost ? $this->calculated : false; | |
| 393 | - $this->transCost = $transCost; | |
| 394 | - } | |
| 13 | + /** | |
| 14 | + * First string. | |
| 15 | + * | |
| 16 | + * @var String | |
| 17 | + */ | |
| 18 | + private $compOne; | |
| 19 | + | |
| 20 | + /** | |
| 21 | + * Second string. | |
| 22 | + * | |
| 23 | + * @var String | |
| 24 | + */ | |
| 25 | + private $compTwo; | |
| 26 | + | |
| 27 | + /** | |
| 28 | + * Length of first string. | |
| 29 | + * | |
| 30 | + * @var int | |
| 31 | + */ | |
| 32 | + private $compOneLength = 0; | |
| 33 | + | |
| 34 | + /** | |
| 35 | + * Length of second string. | |
| 36 | + * | |
| 37 | + * @var int | |
| 38 | + */ | |
| 39 | + private $compTwoLength = 0; | |
| 40 | + | |
| 41 | + /** | |
| 42 | + * Matrix for Damerau Levenshtein distance dynamic programming computation. | |
| 43 | + * | |
| 44 | + * @var int[][] | |
| 45 | + */ | |
| 46 | + private $matrix; | |
| 47 | + | |
| 48 | + /** | |
| 49 | + * Boolean flag determining whether is matrix computed for input strings. | |
| 50 | + * | |
| 51 | + * @var bool | |
| 52 | + */ | |
| 53 | + private $calculated = false; | |
| 54 | + | |
| 55 | + /** | |
| 56 | + * Cost of character insertion (to first string to match second string). | |
| 57 | + * | |
| 58 | + * @var int | |
| 59 | + */ | |
| 60 | + private $insCost = 1; | |
| 61 | + | |
| 62 | + /** | |
| 63 | + * Cost of character deletion (from first string to match second string). | |
| 64 | + * | |
| 65 | + * @var int | |
| 66 | + */ | |
| 67 | + private $delCost = 1; | |
| 68 | + | |
| 69 | + /** | |
| 70 | + * Substitution cost. | |
| 71 | + * | |
| 72 | + * @var int | |
| 73 | + */ | |
| 74 | + private $subCost = 1; | |
| 75 | + | |
| 76 | + /** | |
| 77 | + * Transposition cost. | |
| 78 | + * | |
| 79 | + * @var int | |
| 80 | + */ | |
| 81 | + private $transCost = 1; | |
| 82 | + | |
| 83 | + /** | |
| 84 | + * Constructor. | |
| 85 | + * | |
| 86 | + * @param string $firstString first string to compute distance | |
| 87 | + * @param string $secondString second string to compute distance | |
| 88 | + * @param int $insCost Cost of character insertion | |
| 89 | + * @param int $delCost Cost of character deletion | |
| 90 | + * @param int $subCost Substitution cost | |
| 91 | + * @param int $transCost Transposition cost | |
| 92 | + */ | |
| 93 | + public function __construct( | |
| 94 | + string $firstString, | |
| 95 | + string $secondString, | |
| 96 | + int $insCost = 1, | |
| 97 | + int $delCost = 1, | |
| 98 | + int $subCost = 1, | |
| 99 | + int $transCost = 1 | |
| 100 | +	) { | |
| 101 | +		if (!empty($firstString) || !empty($secondString)) { | |
| 102 | + $this->compOne = $firstString; | |
| 103 | + $this->compOneLength = (int)mb_strlen($this->compOne, 'UTF-8'); | |
| 104 | + $this->compTwo = $secondString; | |
| 105 | + $this->compTwoLength = (int)mb_strlen($this->compTwo, 'UTF-8'); | |
| 106 | + } | |
| 107 | + | |
| 108 | + $this->insCost = $insCost; | |
| 109 | + $this->delCost = $delCost; | |
| 110 | + $this->subCost = $subCost; | |
| 111 | + $this->transCost = $transCost; | |
| 112 | + } | |
| 113 | + | |
| 114 | + /** | |
| 115 | + * Returns computed matrix for given input strings. | |
| 116 | + * | |
| 117 | + * @return int[][] matrix | |
| 118 | + */ | |
| 119 | + public function getMatrix(): array | |
| 120 | +	{ | |
| 121 | +		if (!$this->calculated) { | |
| 122 | + $this->setupMatrix(); | |
| 123 | + } | |
| 124 | + | |
| 125 | + return $this->matrix; | |
| 126 | + } | |
| 127 | + | |
| 128 | + /** | |
| 129 | + * Returns similarity of strings, absolute number = Damerau Levenshtein distance. | |
| 130 | + * | |
| 131 | + * @return int | |
| 132 | + */ | |
| 133 | + public function getSimilarity(): int | |
| 134 | +	{ | |
| 135 | +		if (!$this->calculated) { | |
| 136 | + $this->setupMatrix(); | |
| 137 | + } | |
| 138 | + | |
| 139 | + return $this->matrix[$this->compOneLength][$this->compTwoLength]; | |
| 140 | + } | |
| 141 | + | |
| 142 | + /** | |
| 143 | + * Procedure to compute matrix for given input strings. | |
| 144 | + * | |
| 145 | + * @return void | |
| 146 | + * @SuppressWarnings(PHPMD.CyclomaticComplexity) | |
| 147 | + */ | |
| 148 | + private function setupMatrix(): void | |
| 149 | +	{ | |
| 150 | + $this->matrix = [[]]; | |
| 151 | + | |
| 152 | + $oneSize = $this->compOneLength; | |
| 153 | + $twoSize = $this->compTwoLength; | |
| 154 | + | |
| 155 | +		for ($i = 0; $i <= $oneSize; $i += 1) { | |
| 156 | + // @phan-suppress-next-line PhanTypeInvalidDimOffset | |
| 157 | + $this->matrix[$i][0] = $i > 0 ? $this->matrix[$i - 1][0] + $this->delCost : 0; | |
| 158 | + } | |
| 159 | + | |
| 160 | +		for ($i = 0; $i <= $twoSize; $i += 1) { | |
| 161 | + // Insertion actualy | |
| 162 | + $this->matrix[0][$i] = $i > 0 ? $this->matrix[0][$i - 1] + $this->insCost : 0; | |
| 163 | + } | |
| 164 | + | |
| 165 | +		for ($i = 1; $i <= $oneSize; $i += 1) { | |
| 166 | + // Curchar for the first string | |
| 167 | + $cOne = (string)mb_substr($this->compOne, $i - 1, 1, 'UTF-8'); | |
| 168 | +			for ($j = 1; $j <= $twoSize; $j += 1) { | |
| 169 | + // Curchar for the second string | |
| 170 | + $cTwo = (string)mb_substr($this->compTwo, $j - 1, 1, 'UTF-8'); | |
| 171 | + | |
| 172 | + // Compute substitution cost | |
| 173 | +				if ($this->compare($cOne, $cTwo) === 0) { | |
| 174 | + $cost = 0; | |
| 175 | + $trans = 0; | |
| 176 | +				} else { | |
| 177 | + $cost = $this->subCost; | |
| 178 | + $trans = $this->transCost; | |
| 179 | + } | |
| 180 | + | |
| 181 | + // Deletion cost | |
| 182 | + // @phan-suppress-next-line PhanTypeInvalidDimOffset, PhanTypeInvalidLeftOperandOfAdd | |
| 183 | + $del = $this->matrix[$i - 1][$j] + $this->delCost; | |
| 184 | + | |
| 185 | + // Insertion cost | |
| 186 | + // @phan-suppress-next-line PhanTypeArraySuspiciousNull, PhanTypeInvalidDimOffset, PhanTypeInvalidLeftOperandOfAdd | |
| 187 | + $ins = $this->matrix[$i][$j - 1] + $this->insCost; | |
| 188 | + | |
| 189 | + // Substitution cost, 0 if same | |
| 190 | + $sub = $this->matrix[$i - 1][$j - 1] + $cost; | |
| 191 | + | |
| 192 | + // Compute optimal | |
| 193 | + $this->matrix[$i][$j] = min($del, $ins, $sub); | |
| 194 | + | |
| 195 | + // Transposition cost | |
| 196 | +				if ($i > 1 && $j > 1) { | |
| 197 | + // Last two | |
| 198 | + // @phan-suppress-next-line PhanPartialTypeMismatchArgumentInternal | |
| 199 | + $ccOne = mb_substr($this->compOne, $i - 2, 1, 'UTF-8'); | |
| 200 | + // @phan-suppress-next-line PhanPartialTypeMismatchArgumentInternal | |
| 201 | + $ccTwo = mb_substr($this->compTwo, $j - 2, 1, 'UTF-8'); | |
| 202 | + | |
| 203 | +					if ($this->compare($cOne, $ccTwo) === 0 && $this->compare($ccOne, $cTwo) === 0) { | |
| 204 | + // Transposition cost is computed as minimal of two | |
| 205 | + $this->matrix[$i][$j] = min($this->matrix[$i][$j], $this->matrix[$i - 2][$j - 2] + $trans); | |
| 206 | + } | |
| 207 | + } | |
| 208 | + } | |
| 209 | + } | |
| 210 | + | |
| 211 | + $this->calculated = true; | |
| 212 | + } | |
| 213 | + | |
| 214 | + /** | |
| 215 | + * Returns maximal possible edit Damerau Levenshtein distance between texts. | |
| 216 | + * | |
| 217 | + * On common substring of same length perform substitution / insert + delete | |
| 218 | + * (depends on what is cheaper), then on extra characters perform insertion / deletion | |
| 219 | + * | |
| 220 | + * @return int | |
| 221 | + */ | |
| 222 | + public function getMaximalDistance(): int | |
| 223 | +	{ | |
| 224 | + $oneSize = $this->compOneLength; | |
| 225 | + $twoSize = $this->compTwoLength; | |
| 226 | + | |
| 227 | + // Is substitution cheaper that delete + insert? | |
| 228 | + $subCost = min($this->subCost, $this->delCost + $this->insCost); | |
| 229 | + | |
| 230 | + // Get common size | |
| 231 | + $minSize = min($oneSize, $twoSize); | |
| 232 | + $maxSize = max($oneSize, $twoSize); | |
| 233 | + $extraSize = $maxSize - $minSize; | |
| 234 | + | |
| 235 | + // On common size perform substitution / delete + insert, what is cheaper | |
| 236 | + $maxCost = $subCost * $minSize; | |
| 237 | + | |
| 238 | + // On resulting do insert/delete | |
| 239 | +		if ($oneSize > $twoSize) { | |
| 240 | + // Delete extra characters | |
| 241 | + $maxCost += $extraSize * $this->delCost; | |
| 242 | +		} else { | |
| 243 | + // Insert extra characters | |
| 244 | + $maxCost += $extraSize * $this->insCost; | |
| 245 | + } | |
| 246 | + | |
| 247 | + return (int)$maxCost; | |
| 248 | + } | |
| 249 | + | |
| 250 | + /** | |
| 251 | + * Returns relative distance of input strings (computed with maximal possible distance). | |
| 252 | + * | |
| 253 | + * @return float | |
| 254 | + */ | |
| 255 | + public function getRelativeDistance(): float | |
| 256 | +	{ | |
| 257 | +		if (!$this->calculated) { | |
| 258 | + $this->setupMatrix(); | |
| 259 | + } | |
| 260 | + | |
| 261 | + return (float)(1 - ($this->getSimilarity() / $this->getMaximalDistance())); | |
| 262 | + } | |
| 263 | + | |
| 264 | + /** | |
| 265 | + * Compares two characters from string (this method may be overridden in child class). | |
| 266 | + * | |
| 267 | + * @param string $firstCharacter First character | |
| 268 | + * @param string $secondCharacter Second character | |
| 269 | + * @return int | |
| 270 | + */ | |
| 271 | + protected function compare(string $firstCharacter, string $secondCharacter): int | |
| 272 | +	{ | |
| 273 | + return strcmp($firstCharacter, $secondCharacter); | |
| 274 | + } | |
| 275 | + | |
| 276 | + /** | |
| 277 | + * Returns computed matrix for given input strings (For debugging purposes). | |
| 278 | + * | |
| 279 | + * @return string | |
| 280 | + */ | |
| 281 | + public function displayMatrix(): string | |
| 282 | +	{ | |
| 283 | +		if (!$this->calculated) { | |
| 284 | + $this->setupMatrix(); | |
| 285 | + } | |
| 286 | + | |
| 287 | + $oneSize = $this->compOneLength; | |
| 288 | + $twoSize = $this->compTwoLength; | |
| 289 | + | |
| 290 | + $out = ' ' . $this->compOne . PHP_EOL; | |
| 291 | +		for ($y = 0; $y <= $twoSize; $y += 1) { | |
| 292 | +			if ($y - 1 < 0) { | |
| 293 | + $out .= ' '; | |
| 294 | +			} else { | |
| 295 | + $out .= (string)mb_substr($this->compTwo, $y - 1, 1, 'UTF-8'); | |
| 296 | + } | |
| 297 | + | |
| 298 | +			for ($x = 0; $x <= $oneSize; $x += 1) { | |
| 299 | + $out .= $this->matrix[$x][$y]; | |
| 300 | + } | |
| 301 | + | |
| 302 | + $out .= PHP_EOL; | |
| 303 | + } | |
| 304 | + | |
| 305 | + return $out; | |
| 306 | + } | |
| 307 | + | |
| 308 | + /** | |
| 309 | + * Returns current cost of insertion operation. | |
| 310 | + * | |
| 311 | + * @return int | |
| 312 | + */ | |
| 313 | + public function getInsCost(): int | |
| 314 | +	{ | |
| 315 | + return $this->insCost; | |
| 316 | + } | |
| 317 | + | |
| 318 | + /** | |
| 319 | + * Sets cost of insertion operation (insert characters to first string to match second string). | |
| 320 | + * | |
| 321 | + * @param int $insCost Cost of character insertion | |
| 322 | + * @return void | |
| 323 | + */ | |
| 324 | + public function setInsCost(int $insCost): void | |
| 325 | +	{ | |
| 326 | + $this->calculated = $insCost === $this->insCost ? $this->calculated : false; | |
| 327 | + $this->insCost = $insCost; | |
| 328 | + } | |
| 329 | + | |
| 330 | + /** | |
| 331 | + * Returns current cost of deletion operation. | |
| 332 | + * | |
| 333 | + * @return int | |
| 334 | + */ | |
| 335 | + public function getDelCost(): int | |
| 336 | +	{ | |
| 337 | + return $this->delCost; | |
| 338 | + } | |
| 339 | + | |
| 340 | + /** | |
| 341 | + * Sets cost of deletion operation (delete characters from first string to match second string). | |
| 342 | + * | |
| 343 | + * @param int $delCost Cost of character deletion | |
| 344 | + * @return void | |
| 345 | + */ | |
| 346 | + public function setDelCost(int $delCost): void | |
| 347 | +	{ | |
| 348 | + $this->calculated = $delCost === $this->delCost ? $this->calculated : false; | |
| 349 | + $this->delCost = $delCost; | |
| 350 | + } | |
| 351 | + | |
| 352 | + /** | |
| 353 | + * Returns current cost of substitution operation. | |
| 354 | + * | |
| 355 | + * @return int | |
| 356 | + */ | |
| 357 | + public function getSubCost(): int | |
| 358 | +	{ | |
| 359 | + return $this->subCost; | |
| 360 | + } | |
| 361 | + | |
| 362 | + /** | |
| 363 | + * Sets cost of substitution operation. | |
| 364 | + * | |
| 365 | + * @param int $subCost Cost of character substitution | |
| 366 | + * @return void | |
| 367 | + */ | |
| 368 | + public function setSubCost(int $subCost): void | |
| 369 | +	{ | |
| 370 | + $this->calculated = $subCost === $this->subCost ? $this->calculated : false; | |
| 371 | + $this->subCost = $subCost; | |
| 372 | + } | |
| 373 | + | |
| 374 | + /** | |
| 375 | + * Returns current cost of transposition operation. | |
| 376 | + * | |
| 377 | + * @return int | |
| 378 | + */ | |
| 379 | + public function getTransCost(): int | |
| 380 | +	{ | |
| 381 | + return $this->transCost; | |
| 382 | + } | |
| 383 | + | |
| 384 | + /** | |
| 385 | + * Sets cost of transposition operation. | |
| 386 | + * | |
| 387 | + * @param int $transCost Cost of character transposition | |
| 388 | + * @return void | |
| 389 | + */ | |
| 390 | + public function setTransCost(int $transCost): void | |
| 391 | +	{ | |
| 392 | + $this->calculated = $transCost === $this->transCost ? $this->calculated : false; | |
| 393 | + $this->transCost = $transCost; | |
| 394 | + } | |
| 395 | 395 | } | 
| @@ -100,9 +100,9 @@ discard block | ||
| 100 | 100 |      ) { | 
| 101 | 101 |          if (!empty($firstString) || !empty($secondString)) { | 
| 102 | 102 | $this->compOne = $firstString; | 
| 103 | - $this->compOneLength = (int)mb_strlen($this->compOne, 'UTF-8'); | |
| 103 | + $this->compOneLength = (int) mb_strlen($this->compOne, 'UTF-8'); | |
| 104 | 104 | $this->compTwo = $secondString; | 
| 105 | - $this->compTwoLength = (int)mb_strlen($this->compTwo, 'UTF-8'); | |
| 105 | + $this->compTwoLength = (int) mb_strlen($this->compTwo, 'UTF-8'); | |
| 106 | 106 | } | 
| 107 | 107 | |
| 108 | 108 | $this->insCost = $insCost; | 
| @@ -164,10 +164,10 @@ discard block | ||
| 164 | 164 | |
| 165 | 165 |          for ($i = 1; $i <= $oneSize; $i += 1) { | 
| 166 | 166 | // Curchar for the first string | 
| 167 | - $cOne = (string)mb_substr($this->compOne, $i - 1, 1, 'UTF-8'); | |
| 167 | + $cOne = (string) mb_substr($this->compOne, $i - 1, 1, 'UTF-8'); | |
| 168 | 168 |              for ($j = 1; $j <= $twoSize; $j += 1) { | 
| 169 | 169 | // Curchar for the second string | 
| 170 | - $cTwo = (string)mb_substr($this->compTwo, $j - 1, 1, 'UTF-8'); | |
| 170 | + $cTwo = (string) mb_substr($this->compTwo, $j - 1, 1, 'UTF-8'); | |
| 171 | 171 | |
| 172 | 172 | // Compute substitution cost | 
| 173 | 173 |                  if ($this->compare($cOne, $cTwo) === 0) { | 
| @@ -244,7 +244,7 @@ discard block | ||
| 244 | 244 | $maxCost += $extraSize * $this->insCost; | 
| 245 | 245 | } | 
| 246 | 246 | |
| 247 | - return (int)$maxCost; | |
| 247 | + return (int) $maxCost; | |
| 248 | 248 | } | 
| 249 | 249 | |
| 250 | 250 | /** | 
| @@ -258,7 +258,7 @@ discard block | ||
| 258 | 258 | $this->setupMatrix(); | 
| 259 | 259 | } | 
| 260 | 260 | |
| 261 | - return (float)(1 - ($this->getSimilarity() / $this->getMaximalDistance())); | |
| 261 | + return (float) (1 - ($this->getSimilarity() / $this->getMaximalDistance())); | |
| 262 | 262 | } | 
| 263 | 263 | |
| 264 | 264 | /** | 
| @@ -287,12 +287,12 @@ discard block | ||
| 287 | 287 | $oneSize = $this->compOneLength; | 
| 288 | 288 | $twoSize = $this->compTwoLength; | 
| 289 | 289 | |
| 290 | - $out = ' ' . $this->compOne . PHP_EOL; | |
| 290 | + $out = ' '.$this->compOne.PHP_EOL; | |
| 291 | 291 |          for ($y = 0; $y <= $twoSize; $y += 1) { | 
| 292 | 292 |              if ($y - 1 < 0) { | 
| 293 | 293 | $out .= ' '; | 
| 294 | 294 |              } else { | 
| 295 | - $out .= (string)mb_substr($this->compTwo, $y - 1, 1, 'UTF-8'); | |
| 295 | + $out .= (string) mb_substr($this->compTwo, $y - 1, 1, 'UTF-8'); | |
| 296 | 296 | } | 
| 297 | 297 | |
| 298 | 298 |              for ($x = 0; $x <= $oneSize; $x += 1) { |