Passed
Push — master ( dc389c...49a2c9 )
by Thomas
11:35
created

genderizeStudents::remove_accents()   B

Complexity

Conditions 1
Paths 1

Size

Total Lines 179
Code Lines 158

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 1
eloc 158
c 1
b 0
f 0
nc 1
nop 1
dl 0
loc 179
rs 8

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace App\Console\Commands;
4
5
use App\Models\Student;
6
use Illuminate\Console\Command;
7
use Illuminate\Support\Facades\Http;
8
9
class genderizeStudents extends Command
10
{
11
    protected $signature = 'academico:genderize-students';
12
13
    protected $description = 'Guess gender for existing students, based on their firstname';
14
15
    // wordpress implementation, from https://github.com/WordPress/WordPress/blob/a2693fd8602e3263b5925b9d799ddd577202167d/wp-includes/formatting.php#L1528
16
    private function remove_accents( $string ) {
17
18
		$chars = array(
19
		// Decompositions for Latin-1 Supplement
20
		'ª' => 'a', 'º' => 'o',
21
		'À' => 'A', 'Á' => 'A',
22
		'Â' => 'A', 'Ã' => 'A',
23
		'Ä' => 'A', 'Å' => 'A',
24
		'Æ' => 'AE','Ç' => 'C',
25
		'È' => 'E', 'É' => 'E',
26
		'Ê' => 'E', 'Ë' => 'E',
27
		'Ì' => 'I', 'Í' => 'I',
28
		'Î' => 'I', 'Ï' => 'I',
29
		'Ð' => 'D', 'Ñ' => 'N',
30
		'Ò' => 'O', 'Ó' => 'O',
31
		'Ô' => 'O', 'Õ' => 'O',
32
		'Ö' => 'O', 'Ù' => 'U',
33
		'Ú' => 'U', 'Û' => 'U',
34
		'Ü' => 'U', 'Ý' => 'Y',
35
		'Þ' => 'TH','ß' => 's',
36
		'à' => 'a', 'á' => 'a',
37
		'â' => 'a', 'ã' => 'a',
38
		'ä' => 'a', 'å' => 'a',
39
		'æ' => 'ae','ç' => 'c',
40
		'è' => 'e', 'é' => 'e',
41
		'ê' => 'e', 'ë' => 'e',
42
		'ì' => 'i', 'í' => 'i',
43
		'î' => 'i', 'ï' => 'i',
44
		'ð' => 'd', 'ñ' => 'n',
45
		'ò' => 'o', 'ó' => 'o',
46
		'ô' => 'o', 'õ' => 'o',
47
		'ö' => 'o', 'ø' => 'o',
48
		'ù' => 'u', 'ú' => 'u',
49
		'û' => 'u', 'ü' => 'u',
50
		'ý' => 'y', 'þ' => 'th',
51
		'ÿ' => 'y', 'Ø' => 'O',
52
		// Decompositions for Latin Extended-A
53
		'Ā' => 'A', 'ā' => 'a',
54
		'Ă' => 'A', 'ă' => 'a',
55
		'Ą' => 'A', 'ą' => 'a',
56
		'Ć' => 'C', 'ć' => 'c',
57
		'Ĉ' => 'C', 'ĉ' => 'c',
58
		'Ċ' => 'C', 'ċ' => 'c',
59
		'Č' => 'C', 'č' => 'c',
60
		'Ď' => 'D', 'ď' => 'd',
61
		'Đ' => 'D', 'đ' => 'd',
62
		'Ē' => 'E', 'ē' => 'e',
63
		'Ĕ' => 'E', 'ĕ' => 'e',
64
		'Ė' => 'E', 'ė' => 'e',
65
		'Ę' => 'E', 'ę' => 'e',
66
		'Ě' => 'E', 'ě' => 'e',
67
		'Ĝ' => 'G', 'ĝ' => 'g',
68
		'Ğ' => 'G', 'ğ' => 'g',
69
		'Ġ' => 'G', 'ġ' => 'g',
70
		'Ģ' => 'G', 'ģ' => 'g',
71
		'Ĥ' => 'H', 'ĥ' => 'h',
72
		'Ħ' => 'H', 'ħ' => 'h',
73
		'Ĩ' => 'I', 'ĩ' => 'i',
74
		'Ī' => 'I', 'ī' => 'i',
75
		'Ĭ' => 'I', 'ĭ' => 'i',
76
		'Į' => 'I', 'į' => 'i',
77
		'İ' => 'I', 'ı' => 'i',
78
		'IJ' => 'IJ','ij' => 'ij',
79
		'Ĵ' => 'J', 'ĵ' => 'j',
80
		'Ķ' => 'K', 'ķ' => 'k',
81
		'ĸ' => 'k', 'Ĺ' => 'L',
82
		'ĺ' => 'l', 'Ļ' => 'L',
83
		'ļ' => 'l', 'Ľ' => 'L',
84
		'ľ' => 'l', 'Ŀ' => 'L',
85
		'ŀ' => 'l', 'Ł' => 'L',
86
		'ł' => 'l', 'Ń' => 'N',
87
		'ń' => 'n', 'Ņ' => 'N',
88
		'ņ' => 'n', 'Ň' => 'N',
89
		'ň' => 'n', 'ʼn' => 'n',
90
		'Ŋ' => 'N', 'ŋ' => 'n',
91
		'Ō' => 'O', 'ō' => 'o',
92
		'Ŏ' => 'O', 'ŏ' => 'o',
93
		'Ő' => 'O', 'ő' => 'o',
94
		'Œ' => 'OE','œ' => 'oe',
95
		'Ŕ' => 'R','ŕ' => 'r',
96
		'Ŗ' => 'R','ŗ' => 'r',
97
		'Ř' => 'R','ř' => 'r',
98
		'Ś' => 'S','ś' => 's',
99
		'Ŝ' => 'S','ŝ' => 's',
100
		'Ş' => 'S','ş' => 's',
101
		'Š' => 'S', 'š' => 's',
102
		'Ţ' => 'T', 'ţ' => 't',
103
		'Ť' => 'T', 'ť' => 't',
104
		'Ŧ' => 'T', 'ŧ' => 't',
105
		'Ũ' => 'U', 'ũ' => 'u',
106
		'Ū' => 'U', 'ū' => 'u',
107
		'Ŭ' => 'U', 'ŭ' => 'u',
108
		'Ů' => 'U', 'ů' => 'u',
109
		'Ű' => 'U', 'ű' => 'u',
110
		'Ų' => 'U', 'ų' => 'u',
111
		'Ŵ' => 'W', 'ŵ' => 'w',
112
		'Ŷ' => 'Y', 'ŷ' => 'y',
113
		'Ÿ' => 'Y', 'Ź' => 'Z',
114
		'ź' => 'z', 'Ż' => 'Z',
115
		'ż' => 'z', 'Ž' => 'Z',
116
		'ž' => 'z', 'ſ' => 's',
117
		// Decompositions for Latin Extended-B
118
		'Ș' => 'S', 'ș' => 's',
119
		'Ț' => 'T', 'ț' => 't',
120
		// Euro Sign
121
		'€' => 'E',
122
		// GBP (Pound) Sign
123
		'£' => '',
124
		// Vowels with diacritic (Vietnamese)
125
		// unmarked
126
		'Ơ' => 'O', 'ơ' => 'o',
127
		'Ư' => 'U', 'ư' => 'u',
128
		// grave accent
129
		'Ầ' => 'A', 'ầ' => 'a',
130
		'Ằ' => 'A', 'ằ' => 'a',
131
		'Ề' => 'E', 'ề' => 'e',
132
		'Ồ' => 'O', 'ồ' => 'o',
133
		'Ờ' => 'O', 'ờ' => 'o',
134
		'Ừ' => 'U', 'ừ' => 'u',
135
		'Ỳ' => 'Y', 'ỳ' => 'y',
136
		// hook
137
		'Ả' => 'A', 'ả' => 'a',
138
		'Ẩ' => 'A', 'ẩ' => 'a',
139
		'Ẳ' => 'A', 'ẳ' => 'a',
140
		'Ẻ' => 'E', 'ẻ' => 'e',
141
		'Ể' => 'E', 'ể' => 'e',
142
		'Ỉ' => 'I', 'ỉ' => 'i',
143
		'Ỏ' => 'O', 'ỏ' => 'o',
144
		'Ổ' => 'O', 'ổ' => 'o',
145
		'Ở' => 'O', 'ở' => 'o',
146
		'Ủ' => 'U', 'ủ' => 'u',
147
		'Ử' => 'U', 'ử' => 'u',
148
		'Ỷ' => 'Y', 'ỷ' => 'y',
149
		// tilde
150
		'Ẫ' => 'A', 'ẫ' => 'a',
151
		'Ẵ' => 'A', 'ẵ' => 'a',
152
		'Ẽ' => 'E', 'ẽ' => 'e',
153
		'Ễ' => 'E', 'ễ' => 'e',
154
		'Ỗ' => 'O', 'ỗ' => 'o',
155
		'Ỡ' => 'O', 'ỡ' => 'o',
156
		'Ữ' => 'U', 'ữ' => 'u',
157
		'Ỹ' => 'Y', 'ỹ' => 'y',
158
		// acute accent
159
		'Ấ' => 'A', 'ấ' => 'a',
160
		'Ắ' => 'A', 'ắ' => 'a',
161
		'Ế' => 'E', 'ế' => 'e',
162
		'Ố' => 'O', 'ố' => 'o',
163
		'Ớ' => 'O', 'ớ' => 'o',
164
		'Ứ' => 'U', 'ứ' => 'u',
165
		// dot below
166
		'Ạ' => 'A', 'ạ' => 'a',
167
		'Ậ' => 'A', 'ậ' => 'a',
168
		'Ặ' => 'A', 'ặ' => 'a',
169
		'Ẹ' => 'E', 'ẹ' => 'e',
170
		'Ệ' => 'E', 'ệ' => 'e',
171
		'Ị' => 'I', 'ị' => 'i',
172
		'Ọ' => 'O', 'ọ' => 'o',
173
		'Ộ' => 'O', 'ộ' => 'o',
174
		'Ợ' => 'O', 'ợ' => 'o',
175
		'Ụ' => 'U', 'ụ' => 'u',
176
		'Ự' => 'U', 'ự' => 'u',
177
		'Ỵ' => 'Y', 'ỵ' => 'y',
178
		// Vowels with diacritic (Chinese, Hanyu Pinyin)
179
		'ɑ' => 'a',
180
		// macron
181
		'Ǖ' => 'U', 'ǖ' => 'u',
182
		// acute accent
183
		'Ǘ' => 'U', 'ǘ' => 'u',
184
		// caron
185
		'Ǎ' => 'A', 'ǎ' => 'a',
186
		'Ǐ' => 'I', 'ǐ' => 'i',
187
		'Ǒ' => 'O', 'ǒ' => 'o',
188
		'Ǔ' => 'U', 'ǔ' => 'u',
189
		'Ǚ' => 'U', 'ǚ' => 'u',
190
		// grave accent
191
		'Ǜ' => 'U', 'ǜ' => 'u',
192
		);
193
194
		return  strtr($string, $chars);
195
	}
196
197
198
    public function handle()
199
    {
200
        Student::whereNull('gender_id')->chunkById(10, function ($students) {
201
            $query = $students->map(fn($student) => ['id' => $student->id, 'name' => $this->remove_accents(strtok($student->firstname, " "))]);
202
203
            $response = Http::get('https://api.genderize.io/?name[]=' . $query->pluck('name')->implode('&name[]='));
204
            foreach ($students as $student) {
205
                $firstname = $query->firstWhere('id', $student->id)['name'];
206
207
                $student->update([
208
                    'gender_id' => match($response->collect()->where('name', $firstname)->where('probability', '>', 0.9)->where('count', '>', 10)->first()['gender'] ?? '-') {
209
                        "male" => 2,
210
                        "female" => 1,
211
                        default => null,
212
                    },
213
                ]);
214
            }
215
        });
216
    }
217
218
219
}
220