genderizeStudents::remove_accents()   B
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 332
Code Lines 311

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 1
eloc 311
c 1
b 0
f 0
nc 1
nop 1
dl 0
loc 332
rs 8

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace App\Console\Commands;
4
5
use App\Models\Student;
6
use Illuminate\Console\Command;
7
use Illuminate\Support\Facades\Http;
8
9
class genderizeStudents extends Command
10
{
11
    protected $signature = 'academico:genderize-students';
12
13
    protected $description = 'Guess gender for existing students, based on their firstname';
14
15
    // wordpress implementation, from https://github.com/WordPress/WordPress/blob/a2693fd8602e3263b5925b9d799ddd577202167d/wp-includes/formatting.php#L1528
16
    private function remove_accents($string)
17
    {
18
        $chars = [
19
            // Decompositions for Latin-1 Supplement
20
            'ª' => 'a',
21
            'º' => 'o',
22
            'À' => 'A',
23
            'Á' => 'A',
24
            'Â' => 'A',
25
            'Ã' => 'A',
26
            'Ä' => 'A',
27
            'Å' => 'A',
28
            'Æ' => 'AE',
29
            'Ç' => 'C',
30
            'È' => 'E',
31
            'É' => 'E',
32
            'Ê' => 'E',
33
            'Ë' => 'E',
34
            'Ì' => 'I',
35
            'Í' => 'I',
36
            'Î' => 'I',
37
            'Ï' => 'I',
38
            'Ð' => 'D',
39
            'Ñ' => 'N',
40
            'Ò' => 'O',
41
            'Ó' => 'O',
42
            'Ô' => 'O',
43
            'Õ' => 'O',
44
            'Ö' => 'O',
45
            'Ù' => 'U',
46
            'Ú' => 'U',
47
            'Û' => 'U',
48
            'Ü' => 'U',
49
            'Ý' => 'Y',
50
            'Þ' => 'TH',
51
            'ß' => 's',
52
            'à' => 'a',
53
            'á' => 'a',
54
            'â' => 'a',
55
            'ã' => 'a',
56
            'ä' => 'a',
57
            'å' => 'a',
58
            'æ' => 'ae',
59
            'ç' => 'c',
60
            'è' => 'e',
61
            'é' => 'e',
62
            'ê' => 'e',
63
            'ë' => 'e',
64
            'ì' => 'i',
65
            'í' => 'i',
66
            'î' => 'i',
67
            'ï' => 'i',
68
            'ð' => 'd',
69
            'ñ' => 'n',
70
            'ò' => 'o',
71
            'ó' => 'o',
72
            'ô' => 'o',
73
            'õ' => 'o',
74
            'ö' => 'o',
75
            'ø' => 'o',
76
            'ù' => 'u',
77
            'ú' => 'u',
78
            'û' => 'u',
79
            'ü' => 'u',
80
            'ý' => 'y',
81
            'þ' => 'th',
82
            'ÿ' => 'y',
83
            'Ø' => 'O',
84
            // Decompositions for Latin Extended-A
85
            'Ā' => 'A',
86
            'ā' => 'a',
87
            'Ă' => 'A',
88
            'ă' => 'a',
89
            'Ą' => 'A',
90
            'ą' => 'a',
91
            'Ć' => 'C',
92
            'ć' => 'c',
93
            'Ĉ' => 'C',
94
            'ĉ' => 'c',
95
            'Ċ' => 'C',
96
            'ċ' => 'c',
97
            'Č' => 'C',
98
            'č' => 'c',
99
            'Ď' => 'D',
100
            'ď' => 'd',
101
            'Đ' => 'D',
102
            'đ' => 'd',
103
            'Ē' => 'E',
104
            'ē' => 'e',
105
            'Ĕ' => 'E',
106
            'ĕ' => 'e',
107
            'Ė' => 'E',
108
            'ė' => 'e',
109
            'Ę' => 'E',
110
            'ę' => 'e',
111
            'Ě' => 'E',
112
            'ě' => 'e',
113
            'Ĝ' => 'G',
114
            'ĝ' => 'g',
115
            'Ğ' => 'G',
116
            'ğ' => 'g',
117
            'Ġ' => 'G',
118
            'ġ' => 'g',
119
            'Ģ' => 'G',
120
            'ģ' => 'g',
121
            'Ĥ' => 'H',
122
            'ĥ' => 'h',
123
            'Ħ' => 'H',
124
            'ħ' => 'h',
125
            'Ĩ' => 'I',
126
            'ĩ' => 'i',
127
            'Ī' => 'I',
128
            'ī' => 'i',
129
            'Ĭ' => 'I',
130
            'ĭ' => 'i',
131
            'Į' => 'I',
132
            'į' => 'i',
133
            'İ' => 'I',
134
            'ı' => 'i',
135
            'IJ' => 'IJ',
136
            'ij' => 'ij',
137
            'Ĵ' => 'J',
138
            'ĵ' => 'j',
139
            'Ķ' => 'K',
140
            'ķ' => 'k',
141
            'ĸ' => 'k',
142
            'Ĺ' => 'L',
143
            'ĺ' => 'l',
144
            'Ļ' => 'L',
145
            'ļ' => 'l',
146
            'Ľ' => 'L',
147
            'ľ' => 'l',
148
            'Ŀ' => 'L',
149
            'ŀ' => 'l',
150
            'Ł' => 'L',
151
            'ł' => 'l',
152
            'Ń' => 'N',
153
            'ń' => 'n',
154
            'Ņ' => 'N',
155
            'ņ' => 'n',
156
            'Ň' => 'N',
157
            'ň' => 'n',
158
            'ʼn' => 'n',
159
            'Ŋ' => 'N',
160
            'ŋ' => 'n',
161
            'Ō' => 'O',
162
            'ō' => 'o',
163
            'Ŏ' => 'O',
164
            'ŏ' => 'o',
165
            'Ő' => 'O',
166
            'ő' => 'o',
167
            'Œ' => 'OE',
168
            'œ' => 'oe',
169
            'Ŕ' => 'R',
170
            'ŕ' => 'r',
171
            'Ŗ' => 'R',
172
            'ŗ' => 'r',
173
            'Ř' => 'R',
174
            'ř' => 'r',
175
            'Ś' => 'S',
176
            'ś' => 's',
177
            'Ŝ' => 'S',
178
            'ŝ' => 's',
179
            'Ş' => 'S',
180
            'ş' => 's',
181
            'Š' => 'S',
182
            'š' => 's',
183
            'Ţ' => 'T',
184
            'ţ' => 't',
185
            'Ť' => 'T',
186
            'ť' => 't',
187
            'Ŧ' => 'T',
188
            'ŧ' => 't',
189
            'Ũ' => 'U',
190
            'ũ' => 'u',
191
            'Ū' => 'U',
192
            'ū' => 'u',
193
            'Ŭ' => 'U',
194
            'ŭ' => 'u',
195
            'Ů' => 'U',
196
            'ů' => 'u',
197
            'Ű' => 'U',
198
            'ű' => 'u',
199
            'Ų' => 'U',
200
            'ų' => 'u',
201
            'Ŵ' => 'W',
202
            'ŵ' => 'w',
203
            'Ŷ' => 'Y',
204
            'ŷ' => 'y',
205
            'Ÿ' => 'Y',
206
            'Ź' => 'Z',
207
            'ź' => 'z',
208
            'Ż' => 'Z',
209
            'ż' => 'z',
210
            'Ž' => 'Z',
211
            'ž' => 'z',
212
            'ſ' => 's',
213
            // Decompositions for Latin Extended-B
214
            'Ș' => 'S',
215
            'ș' => 's',
216
            'Ț' => 'T',
217
            'ț' => 't',
218
            // Euro Sign
219
            '€' => 'E',
220
            // GBP (Pound) Sign
221
            '£' => '',
222
            // Vowels with diacritic (Vietnamese)
223
            // unmarked
224
            'Ơ' => 'O',
225
            'ơ' => 'o',
226
            'Ư' => 'U',
227
            'ư' => 'u',
228
            // grave accent
229
            'Ầ' => 'A',
230
            'ầ' => 'a',
231
            'Ằ' => 'A',
232
            'ằ' => 'a',
233
            'Ề' => 'E',
234
            'ề' => 'e',
235
            'Ồ' => 'O',
236
            'ồ' => 'o',
237
            'Ờ' => 'O',
238
            'ờ' => 'o',
239
            'Ừ' => 'U',
240
            'ừ' => 'u',
241
            'Ỳ' => 'Y',
242
            'ỳ' => 'y',
243
            // hook
244
            'Ả' => 'A',
245
            'ả' => 'a',
246
            'Ẩ' => 'A',
247
            'ẩ' => 'a',
248
            'Ẳ' => 'A',
249
            'ẳ' => 'a',
250
            'Ẻ' => 'E',
251
            'ẻ' => 'e',
252
            'Ể' => 'E',
253
            'ể' => 'e',
254
            'Ỉ' => 'I',
255
            'ỉ' => 'i',
256
            'Ỏ' => 'O',
257
            'ỏ' => 'o',
258
            'Ổ' => 'O',
259
            'ổ' => 'o',
260
            'Ở' => 'O',
261
            'ở' => 'o',
262
            'Ủ' => 'U',
263
            'ủ' => 'u',
264
            'Ử' => 'U',
265
            'ử' => 'u',
266
            'Ỷ' => 'Y',
267
            'ỷ' => 'y',
268
            // tilde
269
            'Ẫ' => 'A',
270
            'ẫ' => 'a',
271
            'Ẵ' => 'A',
272
            'ẵ' => 'a',
273
            'Ẽ' => 'E',
274
            'ẽ' => 'e',
275
            'Ễ' => 'E',
276
            'ễ' => 'e',
277
            'Ỗ' => 'O',
278
            'ỗ' => 'o',
279
            'Ỡ' => 'O',
280
            'ỡ' => 'o',
281
            'Ữ' => 'U',
282
            'ữ' => 'u',
283
            'Ỹ' => 'Y',
284
            'ỹ' => 'y',
285
            // acute accent
286
            'Ấ' => 'A',
287
            'ấ' => 'a',
288
            'Ắ' => 'A',
289
            'ắ' => 'a',
290
            'Ế' => 'E',
291
            'ế' => 'e',
292
            'Ố' => 'O',
293
            'ố' => 'o',
294
            'Ớ' => 'O',
295
            'ớ' => 'o',
296
            'Ứ' => 'U',
297
            'ứ' => 'u',
298
            // dot below
299
            'Ạ' => 'A',
300
            'ạ' => 'a',
301
            'Ậ' => 'A',
302
            'ậ' => 'a',
303
            'Ặ' => 'A',
304
            'ặ' => 'a',
305
            'Ẹ' => 'E',
306
            'ẹ' => 'e',
307
            'Ệ' => 'E',
308
            'ệ' => 'e',
309
            'Ị' => 'I',
310
            'ị' => 'i',
311
            'Ọ' => 'O',
312
            'ọ' => 'o',
313
            'Ộ' => 'O',
314
            'ộ' => 'o',
315
            'Ợ' => 'O',
316
            'ợ' => 'o',
317
            'Ụ' => 'U',
318
            'ụ' => 'u',
319
            'Ự' => 'U',
320
            'ự' => 'u',
321
            'Ỵ' => 'Y',
322
            'ỵ' => 'y',
323
            // Vowels with diacritic (Chinese, Hanyu Pinyin)
324
            'ɑ' => 'a',
325
            // macron
326
            'Ǖ' => 'U',
327
            'ǖ' => 'u',
328
            // acute accent
329
            'Ǘ' => 'U',
330
            'ǘ' => 'u',
331
            // caron
332
            'Ǎ' => 'A',
333
            'ǎ' => 'a',
334
            'Ǐ' => 'I',
335
            'ǐ' => 'i',
336
            'Ǒ' => 'O',
337
            'ǒ' => 'o',
338
            'Ǔ' => 'U',
339
            'ǔ' => 'u',
340
            'Ǚ' => 'U',
341
            'ǚ' => 'u',
342
            // grave accent
343
            'Ǜ' => 'U',
344
            'ǜ' => 'u',
345
        ];
346
347
        return  strtr($string, $chars);
348
    }
349
350
    public function handle()
351
    {
352
        Student::whereNull('gender_id')->chunkById(10, function ($students) {
353
            $query = $students->map(fn ($student) => ['id' => $student->id,
354
                'name' => $this->remove_accents(strtok($student->firstname, ' ')), ]);
355
356
            $response = Http::get('https://api.genderize.io/?name[]='.$query->pluck('name')->implode('&name[]='));
357
            foreach ($students as $student) {
358
                $firstname = $query->firstWhere('id', $student->id)['name'];
359
360
                $student->update([
361
                    'gender_id' => match ($response->collect()->where('name', $firstname)->where('probability', '>', 0.9)->where('count', '>', 10)->first()['gender'] ?? '-') {
362
                        'male' => 2,
363
                        'female' => 1,
364
                        default => null,
365
                    },
366
                ]);
367
            }
368
        });
369
    }
370
}
371