Passed
Push — master ( bacc5f...5eccc7 )
by Dispositif
02:22
created

TypoTokenizerTest   A

Complexity

Total Complexity 4

Size/Duplication

Total Lines 71
Duplicated Lines 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 32
c 1
b 0
f 0
dl 0
loc 71
rs 10
wmc 4
1
<?php
2
/**
3
 * This file is part of dispositif/wikibot application
4
 * 2019 : Philippe M. <[email protected]>
5
 * For the full copyright and MIT license information, please view the LICENSE file.
6
 */
7
8
declare(strict_types=1);
9
10
namespace App\Domain\Tests;
11
12
use App\Domain\PredictAuthors;
13
use App\Domain\TypoTokenizer;
14
use PHPUnit\Framework\TestCase;
15
16
class TypoTokenizerTest extends TestCase
17
{
18
    /**
19
     * For TDD.
20
     *
21
     * @dataProvider patternProvider
22
     *
23
     * @param string $text
24
     * @param string $pattern
25
     */
26
    public function testTokenizeAuthor(string $text, string $pattern)
27
    {
28
        $tokenizer = new TypoTokenizer();
29
        $result = $tokenizer->typoPatternFromAuthor($text);
30
        $this::assertEquals(
31
            $pattern,
32
            $result['pattern']
33
        );
34
    }
35
36
    public function patternProvider()
37
    {
38
        return [
39
            //            ['B. Marc (dir.) et Pierre BERGER', 'INITIAL FIRSTUPPER BIBABREV AND FIRSTUPPER ALLUPPER'],
40
            ['B. Marc dir. et Pierre BERGER', 'INITIAL FIRSTUPPER BIBABREV AND FIRSTUPPER ALLUPPER'],
41
            ['Renée & Michel Paquet', 'FIRSTUPPER AND FIRSTUPPER FIRSTUPPER'],
42
            ["Jean-Pierre L'Ardoise", 'MIXED MIXED'],
43
            ['Penaud, Jean-Pierre', 'FIRSTUPPER COMMA MIXED'],
44
            ['J. Penaud', 'INITIAL FIRSTUPPER'],
45
            ['A. B. Penaud', 'INITIAL INITIAL FIRSTUPPER'],
46
            ['123-234-34323 AC234EF 1234 !', 'DASHNUMBER WITHNUMBER ALLNUMBER PUNCTUATION'],
47
            ['bla http://google.fr 123', 'ALLLOWER URL ALLNUMBER'],
48
            ['A. B. Penaud', 'INITIAL INITIAL FIRSTUPPER'],
49
            ['Jean Truc-Machine', 'FIRSTUPPER MIXED'],
50
            ['Armin Vit, Bryony Gomez Palacio', 'FIRSTUPPER FIRSTUPPER COMMA FIRSTUPPER FIRSTUPPER FIRSTUPPER'],
51
            [
52
                'H. Trevor Clifford, Peter D. Bostock',
53
                'INITIAL FIRSTUPPER FIRSTUPPER COMMA FIRSTUPPER INITIAL FIRSTUPPER',
54
            ],
55
            // bug Undefined index: AND in /Users/phil/Work/Wikibot/src/Domain/TypoTokenizer.php on line 75
56
            ['BUBBLES, DROPS, AND PARTICLES', 'ALLUPPER COMMA ALLUPPER COMMA AND ALLUPPER']
57
        ];
58
    }
59
60
    /**
61
     * @dataProvider provideAuthorNames
62
     *
63
     * @param $string
64
     * @param $expected
65
     */
66
    public function testPredictAuthorNames($string, $expected)
67
    {
68
        $predic = new PredictAuthors();
69
70
        $this::assertSame(
71
            $expected,
72
            $predic->predictAuthorNames($string)
73
        );
74
    }
75
76
    public function provideAuthorNames()
77
    {
78
        return [
79
            ['Marc Durand et Pierre Berger', [0 => 'Marc Durand', 1 => 'Pierre Berger']],
80
            [
81
                'Marie-Paul Du Breil de Pontbriand',
82
                [0 => 'Marie-Paul Du Breil de Pontbriand'],
83
            ],
84
            ['Renée et Michel Paquet', [0 => 'Renée Paquet', 1 => 'Michel Paquet']],
85
            ['Francine Musquère et Jean-Michel Mure', [0 => 'Francine Musquère', 1 => 'Jean-Michel Mure']],
86
            ['Didier Du Castel, Claude Estebe', [0 => 'Didier Du Castel', 1 => 'Claude Estebe']],
87
        ];
88
    }
89
}
90