|
1
|
|
|
<?php |
|
2
|
|
|
/** |
|
3
|
|
|
* This file is part of dispositif/wikibot application |
|
4
|
|
|
* 2019 : Philippe M. <[email protected]> |
|
5
|
|
|
* For the full copyright and MIT license information, please view the LICENSE file. |
|
6
|
|
|
*/ |
|
7
|
|
|
|
|
8
|
|
|
declare(strict_types=1); |
|
9
|
|
|
|
|
10
|
|
|
namespace App\Domain\Tests; |
|
11
|
|
|
|
|
12
|
|
|
use App\Domain\PredictAuthors; |
|
13
|
|
|
use App\Domain\TypoTokenizer; |
|
14
|
|
|
use PHPUnit\Framework\TestCase; |
|
15
|
|
|
|
|
16
|
|
|
class TypoTokenizerTest extends TestCase |
|
17
|
|
|
{ |
|
18
|
|
|
/** |
|
19
|
|
|
* For TDD. |
|
20
|
|
|
* |
|
21
|
|
|
* @dataProvider patternProvider |
|
22
|
|
|
* |
|
23
|
|
|
* @param string $text |
|
24
|
|
|
* @param string $pattern |
|
25
|
|
|
*/ |
|
26
|
|
|
public function testTokenizeAuthor(string $text, string $pattern) |
|
27
|
|
|
{ |
|
28
|
|
|
$tokenizer = new TypoTokenizer(); |
|
29
|
|
|
$result = $tokenizer->typoPatternFromAuthor($text); |
|
30
|
|
|
$this::assertEquals( |
|
31
|
|
|
$pattern, |
|
32
|
|
|
$result['pattern'] |
|
33
|
|
|
); |
|
34
|
|
|
} |
|
35
|
|
|
|
|
36
|
|
|
public function patternProvider() |
|
37
|
|
|
{ |
|
38
|
|
|
return [ |
|
39
|
|
|
// ['B. Marc (dir.) et Pierre BERGER', 'INITIAL FIRSTUPPER BIBABREV AND FIRSTUPPER ALLUPPER'], |
|
40
|
|
|
['B. Marc dir. et Pierre BERGER', 'INITIAL FIRSTUPPER BIBABREV AND FIRSTUPPER ALLUPPER'], |
|
41
|
|
|
['Renée & Michel Paquet', 'FIRSTUPPER AND FIRSTUPPER FIRSTUPPER'], |
|
42
|
|
|
["Jean-Pierre L'Ardoise", 'MIXED MIXED'], |
|
43
|
|
|
['Penaud, Jean-Pierre', 'FIRSTUPPER COMMA MIXED'], |
|
44
|
|
|
['J. Penaud', 'INITIAL FIRSTUPPER'], |
|
45
|
|
|
['A. B. Penaud', 'INITIAL INITIAL FIRSTUPPER'], |
|
46
|
|
|
['123-234-34323 AC234EF 1234 !', 'DASHNUMBER WITHNUMBER ALLNUMBER PUNCTUATION'], |
|
47
|
|
|
['bla http://google.fr 123', 'ALLLOWER URL ALLNUMBER'], |
|
48
|
|
|
['A. B. Penaud', 'INITIAL INITIAL FIRSTUPPER'], |
|
49
|
|
|
['Jean Truc-Machine', 'FIRSTUPPER MIXED'], |
|
50
|
|
|
['Armin Vit, Bryony Gomez Palacio', 'FIRSTUPPER FIRSTUPPER COMMA FIRSTUPPER FIRSTUPPER FIRSTUPPER'], |
|
51
|
|
|
[ |
|
52
|
|
|
'H. Trevor Clifford, Peter D. Bostock', |
|
53
|
|
|
'INITIAL FIRSTUPPER FIRSTUPPER COMMA FIRSTUPPER INITIAL FIRSTUPPER', |
|
54
|
|
|
], |
|
55
|
|
|
// bug Undefined index: AND in /Users/phil/Work/Wikibot/src/Domain/TypoTokenizer.php on line 75 |
|
56
|
|
|
['BUBBLES, DROPS, AND PARTICLES', 'ALLUPPER COMMA ALLUPPER COMMA AND ALLUPPER'] |
|
57
|
|
|
]; |
|
58
|
|
|
} |
|
59
|
|
|
|
|
60
|
|
|
/** |
|
61
|
|
|
* @dataProvider provideAuthorNames |
|
62
|
|
|
* |
|
63
|
|
|
* @param $string |
|
64
|
|
|
* @param $expected |
|
65
|
|
|
*/ |
|
66
|
|
|
public function testPredictAuthorNames($string, $expected) |
|
67
|
|
|
{ |
|
68
|
|
|
$predic = new PredictAuthors(); |
|
69
|
|
|
|
|
70
|
|
|
$this::assertSame( |
|
71
|
|
|
$expected, |
|
72
|
|
|
$predic->predictAuthorNames($string) |
|
73
|
|
|
); |
|
74
|
|
|
} |
|
75
|
|
|
|
|
76
|
|
|
public function provideAuthorNames() |
|
77
|
|
|
{ |
|
78
|
|
|
return [ |
|
79
|
|
|
['Marc Durand et Pierre Berger', [0 => 'Marc Durand', 1 => 'Pierre Berger']], |
|
80
|
|
|
[ |
|
81
|
|
|
'Marie-Paul Du Breil de Pontbriand', |
|
82
|
|
|
[0 => 'Marie-Paul Du Breil de Pontbriand'], |
|
83
|
|
|
], |
|
84
|
|
|
['Renée et Michel Paquet', [0 => 'Renée Paquet', 1 => 'Michel Paquet']], |
|
85
|
|
|
['Francine Musquère et Jean-Michel Mure', [0 => 'Francine Musquère', 1 => 'Jean-Michel Mure']], |
|
86
|
|
|
['Didier Du Castel, Claude Estebe', [0 => 'Didier Du Castel', 1 => 'Claude Estebe']], |
|
87
|
|
|
]; |
|
88
|
|
|
} |
|
89
|
|
|
} |
|
90
|
|
|
|