PredictAuthors::patternAndValueToAuthors()   D
last analyzed

Complexity

Conditions 52
Paths 52

Size

Total Lines 165
Code Lines 99

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 99
c 0
b 0
f 0
dl 0
loc 165
rs 4.1666
cc 52
nc 52
nop 1

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
/*
3
 * This file is part of dispositif/wikibot application (@github)
4
 * 2019-2023 © Philippe M./Irønie  <[email protected]>
5
 * For the full copyright and MIT license information, view the license file.
6
 */
7
8
declare(strict_types=1);
9
10
namespace App\Domain\Predict;
11
12
use App\Domain\Utils\WikiTextUtil;
13
14
/**
15
 * Prediction around many authors in same string.
16
 */
17
class PredictAuthors
18
{
19
    private readonly TypoTokenizer $typoPredict;
20
21
    private array $authors = [];
22
23
    public function __construct()
24
    {
25
        $this->typoPredict = new TypoTokenizer();
0 ignored issues
show
Bug introduced by
The property typoPredict is declared read-only in App\Domain\Predict\PredictAuthors.
Loading history...
26
    }
27
28
    /**
29
     * Explode authors from string based on typo pattern recognition.
30
     * See analysis_pattern_auteurs.php for stats and corpus generation.
31
     */
32
    public function predictAuthorNames(string $string): ?array
33
    {
34
        $pattern = $this->typoPredict->typoPatternFromAuthor($string);
35
36
        $this->patternAndValueToAuthors($pattern);
37
38
        return $this->authors;
39
    }
40
41
    private function patternAndValueToAuthors(array $pattern): void
42
    {
43
        $val = $pattern['value'];
44
        switch ($pattern['pattern']) {
45
            case 'FIRSTUPPER':
46
            case 'ALLUPPER':
47
                $this->authors = [0 => $val[0]];
48
49
                break;
50
51
            case 'FIRSTUPPER FIRSTUPPER': // Laurent Croizier
52
            case 'MIXED FIRSTUPPER': // Jean-Paul Marchand
53
            case 'INITIAL FIRSTUPPER': // Christian Le Boutellier
54
            case 'FIRSTUPPER MIXED':
55
            case 'FIRSTUPPER FIRSTUPPER BIBABREV':
56
            case 'FIRSTUPPER FIRSTUPPER PUNCTUATION BIBABREV':
57
                $this->authors = [0 => $val[0].' '.$val[1]];
58
59
                break;
60
61
            case 'FIRSTUPPER FIRSTUPPER FIRSTUPPER':
62
            case 'FIRSTUPPER INITIAL FIRSTUPPER':
63
            case 'INITIAL INITIAL FIRSTUPPER':
64
            case 'FIRSTUPPER ALLLOWER FIRSTUPPER':
65
                $this->authors = [0 => $val[0].' '.$val[1].' '.$val[2]];
66
67
                break;
68
69
            case 'FIRSTUPPER FIRSTUPPER FIRSTUPPER FIRSTUPPER':
70
                // [[Jean Julien Michel Savary]]
71
            case 'FIRSTUPPER FIRSTUPPER ALLLOWER FIRSTUPPER': // Abbé Guillotin de Corson
72
            case 'FIRSTUPPER INITIAL INITIAL FIRSTUPPER':
73
                $this->authors = [0 => $val[0].' '.$val[1].' '.$val[2].' '.$val[3]];
74
75
                break;
76
77
            // NOBLESSE
78
            case 'FIRSTUPPER FIRSTUPPER FIRSTUPPER ALLLOWER FIRSTUPPER':
79
                // [[Toussaint Du Breil de Pontbriand]]
80
            case 'MIXED FIRSTUPPER ALLLOWER FIRSTUPPER FIRSTUPPER':
81
                // [[Pierre-Suzanne Lucas de La Championnière]]
82
            case 'FIRSTUPPER ALLLOWER FIRSTUPPER ALLLOWER FIRSTUPPER':
83
                // [[Toussaint du Breil de Pontbriand]]
84
            case 'MIXED FIRSTUPPER FIRSTUPPER ALLLOWER FIRSTUPPER':
85
                // Marie-Paul Du Breil de Pontbriand
86
            case 'MIXED ALLLOWER FIRSTUPPER ALLLOWER FIRSTUPPER':
87
                // Marie-Paul du Breil de Pontbriand
88
            case 'FIRSTUPPER FIRSTUPPER FIRSTUPPER FIRSTUPPER FIRSTUPPER':
89
                // Mohamed El Aziz Ben Achour
90
                $this->authors = [0 => $val[0].' '.$val[1].' '.$val[2].' '.$val[3].' '.$val[4]];
91
92
                break;
93
94
            /*
95
             *  2 authors
96
             */
97
98
            case 'FIRSTUPPER FIRSTUPPER AND FIRSTUPPER FIRSTUPPER':
99
                // Robert Sablayrolles et Argitxu Beyrie
100
            case 'MIXED FIRSTUPPER AND FIRSTUPPER FIRSTUPPER':
101
            case 'FIRSTUPPER FIRSTUPPER AND MIXED FIRSTUPPER':
102
                // Francine Musquère et Jean-Michel Mure
103
            case 'FIRSTUPPER FIRSTUPPER COMMA FIRSTUPPER FIRSTUPPER':
104
                // Annie Lagueyrie, Philippe Maviel
105
            case 'FIRSTUPPER FIRSTUPPER COMMA MIXED FIRSTUPPER':
106
            case 'FIRSTUPPER FIRSTUPPER COMMA FIRSTUPPER MIXED':
107
            case 'MIXED FIRSTUPPER COMMA FIRSTUPPER FIRSTUPPER':
108
            case 'INITIAL FIRSTUPPER COMMA INITIAL FIRSTUPPER':
109
            case 'FIRSTUPPER MIXED AND FIRSTUPPER FIRSTUPPER':
110
                $this->authors = [
111
                    0 => $val[0].' '.$val[1],
112
                    1 => $val[3].' '.$val[4],
113
                ];
114
115
                break;
116
117
            // COUPLE
118
            case 'FIRSTUPPER AND FIRSTUPPER FIRSTUPPER':
119
                // Renée & Michel Paquet
120
                $this->authors = [
121
                    0 => $val[0].' '.$val[3],
122
                    1 => $val[2].' '.$val[3],
123
                ];
124
125
                break;
126
127
            case 'FIRSTUPPER FIRSTUPPER FIRSTUPPER COMMA FIRSTUPPER FIRSTUPPER':
128
            case 'FIRSTUPPER FIRSTUPPER FIRSTUPPER AND FIRSTUPPER FIRSTUPPER':
129
                // Didier Du Castel, Claude Estebe
130
            case 'FIRSTUPPER INITIAL FIRSTUPPER COMMA FIRSTUPPER FIRSTUPPER':
131
                $this->authors = [
132
                    0 => $val[0].' '.$val[1].' '.$val[2],
133
                    1 => $val[4].' '.$val[5],
134
                ];
135
136
                break;
137
138
            case 'FIRSTUPPER FIRSTUPPER COMMA FIRSTUPPER FIRSTUPPER FIRSTUPPER':
139
                // Armin Vit, Bryony Gomez Palacio
140
            case 'FIRSTUPPER FIRSTUPPER COMMA FIRSTUPPER INITIAL FIRSTUPPER':
141
            case 'FIRSTUPPER FIRSTUPPER AND FIRSTUPPER FIRSTUPPER FIRSTUPPER':
142
            case 'FIRSTUPPER FIRSTUPPER COMMA FIRSTUPPER ALLLOWER MIXED':
143
            case 'FIRSTUPPER FIRSTUPPER AND FIRSTUPPER INITIAL FIRSTUPPER':
144
                // François Clément, Viton de Saint-Allais
145
                $this->authors = [
146
                    0 => $val[0].' '.$val[1],
147
                    1 => $val[3].' '.$val[4].' '.$val[5],
148
                ];
149
150
                break;
151
152
            case 'FIRSTUPPER INITIAL FIRSTUPPER COMMA FIRSTUPPER INITIAL FIRSTUPPER':
153
            case 'INITIAL INITIAL FIRSTUPPER COMMA INITIAL INITIAL FIRSTUPPER':
154
                // Eugene P. Kiver, David V. Harris
155
            case 'INITIAL FIRSTUPPER FIRSTUPPER COMMA FIRSTUPPER INITIAL FIRSTUPPER':
156
                // H. Trevor Clifford, Peter D. Bostock
157
            case 'FIRSTUPPER INITIAL FIRSTUPPER AND FIRSTUPPER INITIAL FIRSTUPPER':
158
            case 'INITIAL INITIAL FIRSTUPPER AND INITIAL INITIAL FIRSTUPPER':
159
                $this->authors = [
160
                    0 => $val[0].' '.$val[1].' '.$val[2],
161
                    1 => $val[4].' '.$val[5].' '.$val[6],
162
                ];
163
164
                break;
165
166
            /*
167
             *  3 authors
168
             */ // Geddes, Czapor, Labahn
169
            case 'FIRSTUPPER COMMA FIRSTUPPER COMMA FIRSTUPPER':
170
                $this->authors = [
171
                    0 => $val[0],
172
                    1 => $val[2],
173
                    2 => $val[4],
174
                ];
175
176
                break;
177
178
            case 'FIRSTUPPER FIRSTUPPER COMMA FIRSTUPPER FIRSTUPPER COMMA MIXED FIRSTUPPER':
179
                // Andrzej Suchcitz, Ludwik Maik, Wojciech Rojek
180
            case 'FIRSTUPPER FIRSTUPPER COMMA FIRSTUPPER FIRSTUPPER COMMA FIRSTUPPER FIRSTUPPER':
181
            case 'FIRSTUPPER FIRSTUPPER COMMA FIRSTUPPER FIRSTUPPER AND FIRSTUPPER FIRSTUPPER':
182
                // [[Arnaud Bédat]], Gilles Bouleau et Bernard Nicolas
183
            case 'FIRSTUPPER FIRSTUPPER COMMA MIXED FIRSTUPPER COMMA FIRSTUPPER FIRSTUPPER':
184
                $this->authors = [
185
                    0 => $val[0].' '.$val[1],
186
                    1 => $val[3].' '.$val[4],
187
                    2 => $val[6].' '.$val[7],
188
                ];
189
190
                break;
191
192
            /*
193
             * 4 authors
194
             */
195
196
            case 'FIRSTUPPER FIRSTUPPER COMMA MIXED FIRSTUPPER COMMA MIXED FIRSTUPPER COMMA FIRSTUPPER FIRSTUPPER':
197
            case 'FIRSTUPPER FIRSTUPPER COMMA FIRSTUPPER FIRSTUPPER COMMA FIRSTUPPER FIRSTUPPER COMMA FIRSTUPPER FIRSTUPPER':
198
                $this->authors = [
199
                    0 => $val[0].' '.$val[1],
200
                    1 => $val[3].' '.$val[4],
201
                    2 => $val[6].' '.$val[7],
202
                    3 => $val[9].' '.$val[10],
203
                ];
204
205
                break;
206
        }
207
    }
208
209
    /**
210
     * From underTwoAuthors() by MartinS@Wikipedia
211
     * Return true if 0 or 1 author in $author; false otherwise.
212
     */
213
    public static function hasManyAuthors(string $author): bool
214
    {
215
        $author = WikiTextUtil::unWikify($author);
216
        $chars = count_chars(trim($author));
217
218
        // todo : "et" + "and" ?
219
        return $chars[ord('&')] > 0 || $chars[ord(';')] > 0 || $chars[ord(' ')] >= 3 || $chars[ord(',')] > 1;
220
    }
221
}
222