URLify::filter()   A
last analyzed

Complexity

Conditions 5
Paths 16

Size

Total Lines 29
Code Lines 12

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 1
Metric Value
cc 5
eloc 12
c 1
b 0
f 1
nc 16
nop 7
dl 0
loc 29
rs 9.5555
1
<?php
2
3
namespace Yaro\Jarboe\Helpers;
4
5
/* port of https://github.com/jbroadway/urlify */
6
7
/*
8
Copyright (c) Django Software Foundation and individual contributors.
9
All rights reserved.
10
11
Redistribution and use in source and binary forms, with or without modification,
12
are permitted provided that the following conditions are met:
13
14
    1. Redistributions of source code must retain the above copyright notice,
15
       this list of conditions and the following disclaimer.
16
17
    2. Redistributions in binary form must reproduce the above copyright
18
       notice, this list of conditions and the following disclaimer in the
19
       documentation and/or other materials provided with the distribution.
20
21
    3. Neither the name of Django nor the names of its contributors may be used
22
       to endorse or promote products derived from this software without
23
       specific prior written permission.
24
25
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
26
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
27
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
28
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
29
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
30
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
31
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
32
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
33
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
34
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35
© 2019 GitHub, Inc.
36
*/
37
38
/**
39
 * A PHP port of URLify.js from the Django project
40
 * (https://github.com/django/django/blob/master/django/contrib/admin/static/admin/js/urlify.js).
41
 * Handles symbols from Latin languages, Greek, Turkish, Bulgarian, Russian,
42
 * Ukrainian, Czech, Polish, Romanian, Latvian, Lithuanian, Vietnamese, Arabic,
43
 * Serbian, Azerbaijani, Kazakh and Slovak. Symbols it cannot transliterate
44
 * it will simply omit.
45
 *
46
 * Usage:
47
 *
48
 *     echo URLify::filter (' J\'étudie le français ');
49
 *     // "jetudie-le-francais"
50
 *
51
 *     echo URLify::filter ('Lo siento, no hablo español.');
52
 *     // "lo-siento-no-hablo-espanol"
53
 */
54
class URLify
55
{
56
    public static $maps = [
57
        'de' => [ /* German */
58
            'Ä' => 'Ae',
59
            'Ö' => 'Oe',
60
            'Ü' => 'Ue',
61
            'ä' => 'ae',
62
            'ö' => 'oe',
63
            'ü' => 'ue',
64
            'ß' => 'ss',
65
            'ẞ' => 'SS',
66
        ],
67
        'latin' => [
68
            'À' => 'A',
69
            'Á' => 'A',
70
            'Â' => 'A',
71
            'Ã' => 'A',
72
            'Ä' => 'A',
73
            'Å' => 'A',
74
            'Ă' => 'A',
75
            'Æ' => 'AE',
76
            'Ç' => 'C',
77
            'È' => 'E',
78
            'É' => 'E',
79
            'Ê' => 'E',
80
            'Ë' => 'E',
81
            'Ì' => 'I',
82
            'Í' => 'I',
83
            'Î' => 'I',
84
            'Ï' => 'I',
85
            'Ð' => 'D',
86
            'Ñ' => 'N',
87
            'Ò' => 'O',
88
            'Ó' => 'O',
89
            'Ô' => 'O',
90
            'Õ' => 'O',
91
            'Ö' => 'O',
92
            'Ő' => 'O',
93
            'Ø' => 'O',
94
            'Œ' => 'OE',
95
            'Ș' => 'S',
96
            'Ț' => 'T',
97
            'Ù' => 'U',
98
            'Ú' => 'U',
99
            'Û' => 'U',
100
            'Ü' => 'U',
101
            'Ű' => 'U',
102
            'Ý' => 'Y',
103
            'Þ' => 'TH',
104
            'ß' => 'ss',
105
            'à' => 'a',
106
            'á' => 'a',
107
            'â' => 'a',
108
            'ã' => 'a',
109
            'ä' => 'a',
110
            'å' => 'a',
111
            'ă' => 'a',
112
            'æ' => 'ae',
113
            'ç' => 'c',
114
            'è' => 'e',
115
            'é' => 'e',
116
            'ê' => 'e',
117
            'ë' => 'e',
118
            'ì' => 'i',
119
            'í' => 'i',
120
            'î' => 'i',
121
            'ï' => 'i',
122
            'ð' => 'd',
123
            'ñ' => 'n',
124
            'ò' => 'o',
125
            'ó' => 'o',
126
            'ô' => 'o',
127
            'õ' => 'o',
128
            'ö' => 'o',
129
            'ő' => 'o',
130
            'ø' => 'o',
131
            'œ' => 'oe',
132
            'ș' => 's',
133
            'ț' => 't',
134
            'ù' => 'u',
135
            'ú' => 'u',
136
            'û' => 'u',
137
            'ü' => 'u',
138
            'ű' => 'u',
139
            'ý' => 'y',
140
            'þ' => 'th',
141
            'ÿ' => 'y',
142
        ],
143
        'latin_symbols' => [
144
            '©' => '(c)',
145
        ],
146
        'el' => [ /* Greek */
147
            'α' => 'a',
148
            'β' => 'b',
149
            'γ' => 'g',
150
            'δ' => 'd',
151
            'ε' => 'e',
152
            'ζ' => 'z',
153
            'η' => 'h',
154
            'θ' => '8',
155
            'ι' => 'i',
156
            'κ' => 'k',
157
            'λ' => 'l',
158
            'μ' => 'm',
159
            'ν' => 'n',
160
            'ξ' => '3',
161
            'ο' => 'o',
162
            'π' => 'p',
163
            'ρ' => 'r',
164
            'σ' => 's',
165
            'τ' => 't',
166
            'υ' => 'y',
167
            'φ' => 'f',
168
            'χ' => 'x',
169
            'ψ' => 'ps',
170
            'ω' => 'w',
171
            'ά' => 'a',
172
            'έ' => 'e',
173
            'ί' => 'i',
174
            'ό' => 'o',
175
            'ύ' => 'y',
176
            'ή' => 'h',
177
            'ώ' => 'w',
178
            'ς' => 's',
179
            'ϊ' => 'i',
180
            'ΰ' => 'y',
181
            'ϋ' => 'y',
182
            'ΐ' => 'i',
183
            'Α' => 'A',
184
            'Β' => 'B',
185
            'Γ' => 'G',
186
            'Δ' => 'D',
187
            'Ε' => 'E',
188
            'Ζ' => 'Z',
189
            'Η' => 'H',
190
            'Θ' => '8',
191
            'Ι' => 'I',
192
            'Κ' => 'K',
193
            'Λ' => 'L',
194
            'Μ' => 'M',
195
            'Ν' => 'N',
196
            'Ξ' => '3',
197
            'Ο' => 'O',
198
            'Π' => 'P',
199
            'Ρ' => 'R',
200
            'Σ' => 'S',
201
            'Τ' => 'T',
202
            'Υ' => 'Y',
203
            'Φ' => 'F',
204
            'Χ' => 'X',
205
            'Ψ' => 'PS',
206
            'Ω' => 'W',
207
            'Ά' => 'A',
208
            'Έ' => 'E',
209
            'Ί' => 'I',
210
            'Ό' => 'O',
211
            'Ύ' => 'Y',
212
            'Ή' => 'H',
213
            'Ώ' => 'W',
214
            'Ϊ' => 'I',
215
            'Ϋ' => 'Y',
216
        ],
217
        'tr' => [ /* Turkish */
218
            'ş' => 's',
219
            'Ş' => 'S',
220
            'ı' => 'i',
221
            'İ' => 'I',
222
            'ç' => 'c',
223
            'Ç' => 'C',
224
            'ü' => 'u',
225
            'Ü' => 'U',
226
            'ö' => 'o',
227
            'Ö' => 'O',
228
            'ğ' => 'g',
229
            'Ğ' => 'G',
230
        ],
231
        'bg' => [ /* Bulgarian */
232
            'Щ' => 'Sht',
233
            'Ш' => 'Sh',
234
            'Ч' => 'Ch',
235
            'Ц' => 'C',
236
            'Ю' => 'Yu',
237
            'Я' => 'Ya',
238
            'Ж' => 'J',
239
            'А' => 'A',
240
            'Б' => 'B',
241
            'В' => 'V',
242
            'Г' => 'G',
243
            'Д' => 'D',
244
            'Е' => 'E',
245
            'З' => 'Z',
246
            'И' => 'I',
247
            'Й' => 'Y',
248
            'К' => 'K',
249
            'Л' => 'L',
250
            'М' => 'M',
251
            'Н' => 'N',
252
            'О' => 'O',
253
            'П' => 'P',
254
            'Р' => 'R',
255
            'С' => 'S',
256
            'Т' => 'T',
257
            'У' => 'U',
258
            'Ф' => 'F',
259
            'Х' => 'H',
260
            'Ь' => '',
261
            'Ъ' => 'A',
262
            'щ' => 'sht',
263
            'ш' => 'sh',
264
            'ч' => 'ch',
265
            'ц' => 'c',
266
            'ю' => 'yu',
267
            'я' => 'ya',
268
            'ж' => 'j',
269
            'а' => 'a',
270
            'б' => 'b',
271
            'в' => 'v',
272
            'г' => 'g',
273
            'д' => 'd',
274
            'е' => 'e',
275
            'з' => 'z',
276
            'и' => 'i',
277
            'й' => 'y',
278
            'к' => 'k',
279
            'л' => 'l',
280
            'м' => 'm',
281
            'н' => 'n',
282
            'о' => 'o',
283
            'п' => 'p',
284
            'р' => 'r',
285
            'с' => 's',
286
            'т' => 't',
287
            'у' => 'u',
288
            'ф' => 'f',
289
            'х' => 'h',
290
            'ь' => '',
291
            'ъ' => 'a',
292
        ],
293
        'ru' => [ /* Russian */
294
            'а' => 'a',
295
            'б' => 'b',
296
            'в' => 'v',
297
            'г' => 'g',
298
            'д' => 'd',
299
            'е' => 'e',
300
            'ё' => 'yo',
301
            'ж' => 'zh',
302
            'з' => 'z',
303
            'и' => 'i',
304
            'й' => 'i',
305
            'к' => 'k',
306
            'л' => 'l',
307
            'м' => 'm',
308
            'н' => 'n',
309
            'о' => 'o',
310
            'п' => 'p',
311
            'р' => 'r',
312
            'с' => 's',
313
            'т' => 't',
314
            'у' => 'u',
315
            'ф' => 'f',
316
            'х' => 'h',
317
            'ц' => 'c',
318
            'ч' => 'ch',
319
            'ш' => 'sh',
320
            'щ' => 'sh',
321
            'ъ' => '',
322
            'ы' => 'y',
323
            'ь' => '',
324
            'э' => 'e',
325
            'ю' => 'yu',
326
            'я' => 'ya',
327
            'А' => 'A',
328
            'Б' => 'B',
329
            'В' => 'V',
330
            'Г' => 'G',
331
            'Д' => 'D',
332
            'Е' => 'E',
333
            'Ё' => 'Yo',
334
            'Ж' => 'Zh',
335
            'З' => 'Z',
336
            'И' => 'I',
337
            'Й' => 'I',
338
            'К' => 'K',
339
            'Л' => 'L',
340
            'М' => 'M',
341
            'Н' => 'N',
342
            'О' => 'O',
343
            'П' => 'P',
344
            'Р' => 'R',
345
            'С' => 'S',
346
            'Т' => 'T',
347
            'У' => 'U',
348
            'Ф' => 'F',
349
            'Х' => 'H',
350
            'Ц' => 'C',
351
            'Ч' => 'Ch',
352
            'Ш' => 'Sh',
353
            'Щ' => 'Sh',
354
            'Ъ' => '',
355
            'Ы' => 'Y',
356
            'Ь' => '',
357
            'Э' => 'E',
358
            'Ю' => 'Yu',
359
            'Я' => 'Ya',
360
            '№' => '',
361
        ],
362
        'uk' => [ /* Ukrainian */
363
            'Є' => 'Ye',
364
            'І' => 'I',
365
            'Ї' => 'Yi',
366
            'Ґ' => 'G',
367
            'є' => 'ye',
368
            'і' => 'i',
369
            'ї' => 'yi',
370
            'ґ' => 'g',
371
        ],
372
        'kk' => [ /* Kazakh */
373
            'Ә' => 'A',
374
            'Ғ' => 'G',
375
            'Қ' => 'Q',
376
            'Ң' => 'N',
377
            'Ө' => 'O',
378
            'Ұ' => 'U',
379
            'Ү' => 'U',
380
            'Һ' => 'H',
381
            'ә' => 'a',
382
            'ғ' => 'g',
383
            'қ' => 'q',
384
            'ң' => 'n',
385
            'ө' => 'o',
386
            'ұ' => 'u',
387
            'ү' => 'u',
388
            'һ' => 'h',
389
        ],
390
        'cs' => [ /* Czech */
391
            'č' => 'c',
392
            'ď' => 'd',
393
            'ě' => 'e',
394
            'ň' => 'n',
395
            'ř' => 'r',
396
            'š' => 's',
397
            'ť' => 't',
398
            'ů' => 'u',
399
            'ž' => 'z',
400
            'Č' => 'C',
401
            'Ď' => 'D',
402
            'Ě' => 'E',
403
            'Ň' => 'N',
404
            'Ř' => 'R',
405
            'Š' => 'S',
406
            'Ť' => 'T',
407
            'Ů' => 'U',
408
            'Ž' => 'Z',
409
        ],
410
        'pl' => [ /* Polish */
411
            'ą' => 'a',
412
            'ć' => 'c',
413
            'ę' => 'e',
414
            'ł' => 'l',
415
            'ń' => 'n',
416
            'ó' => 'o',
417
            'ś' => 's',
418
            'ź' => 'z',
419
            'ż' => 'z',
420
            'Ą' => 'A',
421
            'Ć' => 'C',
422
            'Ę' => 'e',
423
            'Ł' => 'L',
424
            'Ń' => 'N',
425
            'Ó' => 'O',
426
            'Ś' => 'S',
427
            'Ź' => 'Z',
428
            'Ż' => 'Z',
429
        ],
430
        'ro' => [ /* Romanian */
431
            'ă' => 'a',
432
            'â' => 'a',
433
            'î' => 'i',
434
            'ș' => 's',
435
            'ț' => 't',
436
            'Ţ' => 'T',
437
            'ţ' => 't',
438
        ],
439
        'lv' => [ /* Latvian */
440
            'ā' => 'a',
441
            'č' => 'c',
442
            'ē' => 'e',
443
            'ģ' => 'g',
444
            'ī' => 'i',
445
            'ķ' => 'k',
446
            'ļ' => 'l',
447
            'ņ' => 'n',
448
            'š' => 's',
449
            'ū' => 'u',
450
            'ž' => 'z',
451
            'Ā' => 'A',
452
            'Č' => 'C',
453
            'Ē' => 'E',
454
            'Ģ' => 'G',
455
            'Ī' => 'i',
456
            'Ķ' => 'k',
457
            'Ļ' => 'L',
458
            'Ņ' => 'N',
459
            'Š' => 'S',
460
            'Ū' => 'u',
461
            'Ž' => 'Z',
462
        ],
463
        'lt' => [ /* Lithuanian */
464
            'ą' => 'a',
465
            'č' => 'c',
466
            'ę' => 'e',
467
            'ė' => 'e',
468
            'į' => 'i',
469
            'š' => 's',
470
            'ų' => 'u',
471
            'ū' => 'u',
472
            'ž' => 'z',
473
            'Ą' => 'A',
474
            'Č' => 'C',
475
            'Ę' => 'E',
476
            'Ė' => 'E',
477
            'Į' => 'I',
478
            'Š' => 'S',
479
            'Ų' => 'U',
480
            'Ū' => 'U',
481
            'Ž' => 'Z',
482
        ],
483
        'vi' => [ /* Vietnamese */
484
            'Á' => 'A',
485
            'À' => 'A',
486
            'Ả' => 'A',
487
            'Ã' => 'A',
488
            'Ạ' => 'A',
489
            'Ă' => 'A',
490
            'Ắ' => 'A',
491
            'Ằ' => 'A',
492
            'Ẳ' => 'A',
493
            'Ẵ' => 'A',
494
            'Ặ' => 'A',
495
            'Â' => 'A',
496
            'Ấ' => 'A',
497
            'Ầ' => 'A',
498
            'Ẩ' => 'A',
499
            'Ẫ' => 'A',
500
            'Ậ' => 'A',
501
            'á' => 'a',
502
            'à' => 'a',
503
            'ả' => 'a',
504
            'ã' => 'a',
505
            'ạ' => 'a',
506
            'ă' => 'a',
507
            'ắ' => 'a',
508
            'ằ' => 'a',
509
            'ẳ' => 'a',
510
            'ẵ' => 'a',
511
            'ặ' => 'a',
512
            'â' => 'a',
513
            'ấ' => 'a',
514
            'ầ' => 'a',
515
            'ẩ' => 'a',
516
            'ẫ' => 'a',
517
            'ậ' => 'a',
518
            'É' => 'E',
519
            'È' => 'E',
520
            'Ẻ' => 'E',
521
            'Ẽ' => 'E',
522
            'Ẹ' => 'E',
523
            'Ê' => 'E',
524
            'Ế' => 'E',
525
            'Ề' => 'E',
526
            'Ể' => 'E',
527
            'Ễ' => 'E',
528
            'Ệ' => 'E',
529
            'é' => 'e',
530
            'è' => 'e',
531
            'ẻ' => 'e',
532
            'ẽ' => 'e',
533
            'ẹ' => 'e',
534
            'ê' => 'e',
535
            'ế' => 'e',
536
            'ề' => 'e',
537
            'ể' => 'e',
538
            'ễ' => 'e',
539
            'ệ' => 'e',
540
            'Í' => 'I',
541
            'Ì' => 'I',
542
            'Ỉ' => 'I',
543
            'Ĩ' => 'I',
544
            'Ị' => 'I',
545
            'í' => 'i',
546
            'ì' => 'i',
547
            'ỉ' => 'i',
548
            'ĩ' => 'i',
549
            'ị' => 'i',
550
            'Ó' => 'O',
551
            'Ò' => 'O',
552
            'Ỏ' => 'O',
553
            'Õ' => 'O',
554
            'Ọ' => 'O',
555
            'Ô' => 'O',
556
            'Ố' => 'O',
557
            'Ồ' => 'O',
558
            'Ổ' => 'O',
559
            'Ỗ' => 'O',
560
            'Ộ' => 'O',
561
            'Ơ' => 'O',
562
            'Ớ' => 'O',
563
            'Ờ' => 'O',
564
            'Ở' => 'O',
565
            'Ỡ' => 'O',
566
            'Ợ' => 'O',
567
            'ó' => 'o',
568
            'ò' => 'o',
569
            'ỏ' => 'o',
570
            'õ' => 'o',
571
            'ọ' => 'o',
572
            'ô' => 'o',
573
            'ố' => 'o',
574
            'ồ' => 'o',
575
            'ổ' => 'o',
576
            'ỗ' => 'o',
577
            'ộ' => 'o',
578
            'ơ' => 'o',
579
            'ớ' => 'o',
580
            'ờ' => 'o',
581
            'ở' => 'o',
582
            'ỡ' => 'o',
583
            'ợ' => 'o',
584
            'Ú' => 'U',
585
            'Ù' => 'U',
586
            'Ủ' => 'U',
587
            'Ũ' => 'U',
588
            'Ụ' => 'U',
589
            'Ư' => 'U',
590
            'Ứ' => 'U',
591
            'Ừ' => 'U',
592
            'Ử' => 'U',
593
            'Ữ' => 'U',
594
            'Ự' => 'U',
595
            'ú' => 'u',
596
            'ù' => 'u',
597
            'ủ' => 'u',
598
            'ũ' => 'u',
599
            'ụ' => 'u',
600
            'ư' => 'u',
601
            'ứ' => 'u',
602
            'ừ' => 'u',
603
            'ử' => 'u',
604
            'ữ' => 'u',
605
            'ự' => 'u',
606
            'Ý' => 'Y',
607
            'Ỳ' => 'Y',
608
            'Ỷ' => 'Y',
609
            'Ỹ' => 'Y',
610
            'Ỵ' => 'Y',
611
            'ý' => 'y',
612
            'ỳ' => 'y',
613
            'ỷ' => 'y',
614
            'ỹ' => 'y',
615
            'ỵ' => 'y',
616
            'Đ' => 'D',
617
            'đ' => 'd',
618
        ],
619
        'ar' => [ /* Arabic */
620
            'أ' => 'a',
621
            'ب' => 'b',
622
            'ت' => 't',
623
            'ث' => 'th',
624
            'ج' => 'g',
625
            'ح' => 'h',
626
            'خ' => 'kh',
627
            'د' => 'd',
628
            'ذ' => 'th',
629
            'ر' => 'r',
630
            'ز' => 'z',
631
            'س' => 's',
632
            'ش' => 'sh',
633
            'ص' => 's',
634
            'ض' => 'd',
635
            'ط' => 't',
636
            'ظ' => 'th',
637
            'ع' => 'aa',
638
            'غ' => 'gh',
639
            'ف' => 'f',
640
            'ق' => 'k',
641
            'ك' => 'k',
642
            'ل' => 'l',
643
            'م' => 'm',
644
            'ن' => 'n',
645
            'ه' => 'h',
646
            'و' => 'o',
647
            'ي' => 'y',
648
            'ا' => 'a',
649
            'إ' => 'a',
650
            'آ' => 'a',
651
            'ؤ' => 'o',
652
            'ئ' => 'y',
653
            'ء' => 'aa',
654
            '٠' => '0',
655
            '١' => '1',
656
            '٢' => '2',
657
            '٣' => '3',
658
            '٤' => '4',
659
            '٥' => '5',
660
            '٦' => '6',
661
            '٧' => '7',
662
            '٨' => '8',
663
            '٩' => '9',
664
        ],
665
        'fa' => [ /* Persian */
666
            'گ' => 'g',
667
            'ژ' => 'j',
668
            'پ' => 'p',
669
            'چ' => 'ch',
670
            'ی' => 'y',
671
            'ک' => 'k',
672
            '۰' => '0',
673
            '۱' => '1',
674
            '۲' => '2',
675
            '۳' => '3',
676
            '۴' => '4',
677
            '۵' => '5',
678
            '۶' => '6',
679
            '۷' => '7',
680
            '۸' => '8',
681
            '۹' => '9',
682
        ],
683
        'sr' => [ /* Serbian */
684
            'ђ' => 'dj',
685
            'ј' => 'j',
686
            'љ' => 'lj',
687
            'њ' => 'nj',
688
            'ћ' => 'c',
689
            'џ' => 'dz',
690
            'đ' => 'dj',
691
            'Ђ' => 'Dj',
692
            'Ј' => 'j',
693
            'Љ' => 'Lj',
694
            'Њ' => 'Nj',
695
            'Ћ' => 'C',
696
            'Џ' => 'Dz',
697
            'Đ' => 'Dj',
698
        ],
699
        'az' => [ /* Azerbaijani */
700
            'ç' => 'c',
701
            'ə' => 'e',
702
            'ğ' => 'g',
703
            'ı' => 'i',
704
            'ö' => 'o',
705
            'ş' => 's',
706
            'ü' => 'u',
707
            'Ç' => 'C',
708
            'Ə' => 'E',
709
            'Ğ' => 'G',
710
            'İ' => 'I',
711
            'Ö' => 'O',
712
            'Ş' => 'S',
713
            'Ü' => 'U',
714
        ],
715
        'sk' => [ /* Slovak */
716
            'ĺ' => 'l',
717
            'ľ' => 'l',
718
            'ŕ' => 'r',
719
        ],
720
        'other' => [
721
            'Ÿ' => 'Y',
722
            'µ' => 'u',
723
            '¥' => 'Y',
724
            'Ĉ' => 'C',
725
            'ĉ' => 'c',
726
            'Ċ' => 'C',
727
            'ċ' => 'c',
728
            'Ĝ' => 'G',
729
            'ĝ' => 'g',
730
            'Ġ' => 'G',
731
            'ġ' => 'g',
732
            'Ĥ' => 'H',
733
            'ĥ' => 'h',
734
            'Ħ' => 'H',
735
            'ħ' => 'h',
736
            'Ĕ' => 'E',
737
            'ĕ' => 'e',
738
            'Ĭ' => 'I',
739
            'ĭ' => 'i',
740
            'Ĵ' => 'J',
741
            'ĵ' => 'j',
742
            'Ĺ' => 'L',
743
            'ĺ' => 'l',
744
            'Ľ' => 'L',
745
            'ľ' => 'l',
746
            'Ŀ' => 'L',
747
            'ŀ' => 'l',
748
            'ʼn' => 'n',
749
            'Ō' => 'O',
750
            'ō' => 'o',
751
            'Ŏ' => 'O',
752
            'ŏ' => 'o',
753
            'Ŕ' => 'R',
754
            'ŕ' => 'r',
755
            'Ŗ' => 'R',
756
            'ŗ' => 'r',
757
            'Ŝ' => 'S',
758
            'ŝ' => 's',
759
            'Ŧ' => 'T',
760
            'ŧ' => 't',
761
            'Ŭ' => 'U',
762
            'ŭ' => 'u',
763
            'Ŵ' => 'W',
764
            'ŵ' => 'w',
765
            'Ŷ' => 'Y',
766
            'ŷ' => 'y',
767
            'ſ' => 'i',
768
            'ƒ' => 'f',
769
            'O' => 'O',
770
            'o' => 'o',
771
            'U' => 'U',
772
            'u' => 'u',
773
            'Ǎ' => 'A',
774
            'ǎ' => 'a',
775
            'Ǐ' => 'I',
776
            'ǐ' => 'i',
777
            'Ǒ' => 'O',
778
            'ǒ' => 'o',
779
            'Ǔ' => 'U',
780
            'ǔ' => 'u',
781
            'Ǖ' => 'U',
782
            'ǖ' => 'u',
783
            'Ǘ' => 'U',
784
            'ǘ' => 'u',
785
            'Ǚ' => 'U',
786
            'ǚ' => 'u',
787
            'Ǜ' => 'U',
788
            'ǜ' => 'u',
789
            'Ǻ' => 'A',
790
            'ǻ' => 'a',
791
            'Ǿ' => 'O',
792
            'ǿ' => 'o',
793
            'Ǽ' => 'Ae',
794
            'ǽ' => 'ae',
795
            'IJ' => 'IJ',
796
            'ij' => 'ij',
797
            'J' => 'J',
798
            'ĸ' => 'k',
799
            'Ŋ' => 'N',
800
            'ŋ' => 'n',
801
            'Ẁ' => 'W',
802
            'ẁ' => 'w',
803
            'Ẃ' => 'W',
804
            'ẃ' => 'w',
805
            'Ẅ' => 'W',
806
            'ẅ' => 'w',
807
        ],
808
    ];
809
810
    /**
811
     * List of words to remove from URLs.
812
     */
813
    public static $remove_list = [
814
        'a',
815
        'an',
816
        'as',
817
        'at',
818
        'before',
819
        'but',
820
        'by',
821
        'for',
822
        'from',
823
        'is',
824
        'in',
825
        'into',
826
        'like',
827
        'of',
828
        'off',
829
        'on',
830
        'onto',
831
        'per',
832
        'since',
833
        'than',
834
        'the',
835
        'this',
836
        'that',
837
        'to',
838
        'up',
839
        'via',
840
        'with',
841
    ];
842
843
    /**
844
     * The character map.
845
     */
846
    private static $map = [];
847
848
    /**
849
     * The character list as a string.
850
     */
851
    private static $chars = '';
852
853
    /**
854
     * The character list as a regular expression.
855
     */
856
    private static $regex = '';
857
858
    /**
859
     * The current language
860
     */
861
    private static $language = '';
862
863
    /**
864
     * Initializes the character map.
865
     * @param string $language
866
     */
867
    private static function init($language = "")
868
    {
869
        if (count(self::$map) > 0 && (($language == "") || ($language == self::$language))) {
870
            return;
871
        }
872
873
        /* Is a specific map associated with $language ? */
874
        if (isset(self::$maps[$language]) && is_array(self::$maps[$language])) {
875
            /* Move this map to end. This means it will have priority over others */
876
            $m = self::$maps[$language];
877
            unset(self::$maps[$language]);
878
            self::$maps[$language] = $m;
879
        }
880
        /* Reset static vars */
881
        self::$language = $language;
882
        self::$map = [];
883
        self::$chars = '';
884
885
        foreach (self::$maps as $map) {
886
            foreach ($map as $orig => $conv) {
887
                self::$map[$orig] = $conv;
888
                self::$chars .= $orig;
889
            }
890
        }
891
892
        self::$regex = '/[' . preg_quote(self::$chars, '/') . ']/u';
893
    }
894
895
    /**
896
     * Add new characters to the list. `$map` should be a hash.
897
     * @param array $map
898
     */
899
    public static function add_chars($map)
900
    {
901
        if (!is_array($map)) {
0 ignored issues
show
introduced by
The condition is_array($map) is always true.
Loading history...
902
            throw new \LogicException('$map must be an associative array.');
903
        }
904
        self::$maps[] = $map;
905
        self::$map = [];
906
        self::$chars = '';
907
    }
908
909
    /**
910
     * Append words to the remove list. Accepts either single words
911
     * or an array of words.
912
     * @param mixed $words
913
     */
914
    public static function remove_words($words)
915
    {
916
        $words = is_array($words) ? $words : [$words];
917
        self::$remove_list = array_unique(array_merge(self::$remove_list, $words));
918
    }
919
920
    /**
921
     * Transliterates characters to their ASCII equivalents.
922
     * $language specifies a priority for a specific language.
923
     * The latter is useful if languages have different rules for the same character.
924
     * @param string $text
925
     * @param string $language
926
     * @return string
927
     */
928
    public static function downcode($text, $language = "")
929
    {
930
        self::init($language);
931
932
        if (preg_match_all(self::$regex, $text, $matches)) {
933
            for ($i = 0; $i < count($matches[0]); $i++) {
0 ignored issues
show
Performance Best Practice introduced by
It seems like you are calling the size function count() as part of the test condition. You might want to compute the size beforehand, and not on each iteration.

If the size of the collection does not change during the iteration, it is generally a good practice to compute it beforehand, and not on each iteration:

for ($i=0; $i<count($array); $i++) { // calls count() on each iteration
}

// Better
for ($i=0, $c=count($array); $i<$c; $i++) { // calls count() just once
}
Loading history...
934
                $char = $matches[0][$i];
935
                if (isset(self::$map[$char])) {
936
                    $text = str_replace($char, self::$map[$char], $text);
937
                }
938
            }
939
        }
940
        return $text;
941
    }
942
943
    /**
944
     * Filters a string, e.g., "Petty theft" to "petty-theft"
945
     * @param string $text The text to return filtered
946
     * @param int $length The length (after filtering) of the string to be returned
947
     * @param string $language The transliteration language, passed down to downcode()
948
     * @param bool $file_name Whether there should be and additional filter considering this is a filename
949
     * @param bool $use_remove_list Whether you want to remove specific elements previously set in self::$remove_list
950
     * @param bool $lower_case Whether you want the filter to maintain casing or lowercase everything (default)
951
     * @param bool $treat_underscore_as_space Treat underscore as space, so it will replaced with "-"
952
     * @return string
953
     */
954
    public static function filter(
955
        $text,
956
        $length = 60,
957
        $language = "",
958
        $file_name = false,
959
        $use_remove_list = true,
960
        $lower_case = true,
961
        $treat_underscore_as_space = true
962
    ) {
963
        $text = self::downcode($text, $language);
964
965
        if ($use_remove_list) {
966
            // remove all these words from the string before urlifying
967
            $text = preg_replace('/\b(' . join('|', self::$remove_list) . ')\b/i', '', $text);
968
        }
969
970
        // if downcode doesn't hit, the char will be stripped here
971
        $remove_pattern = ($file_name) ? '/[^_\-.\-a-zA-Z0-9\s]/u' : '/[^\s_\-a-zA-Z0-9]/u';
972
        $text = preg_replace($remove_pattern, '', $text); // remove unneeded chars
973
        if ($treat_underscore_as_space) {
974
            $text = str_replace('_', ' ', $text);             // treat underscores as spaces
975
        }
976
        $text = preg_replace('/^\s+|\s+$/u', '', $text);  // trim leading/trailing spaces
977
        $text = preg_replace('/[-\s]+/u', '-', $text);    // convert spaces to hyphens
978
        if ($lower_case) {
979
            $text = strtolower($text);                        // convert to lowercase
980
        }
981
982
        return trim(substr($text, 0, $length), '-');     // trim to first $length chars
983
    }
984
985
    /**
986
     * Alias of `URLify::downcode()`.
987
     */
988
    public static function transliterate($text)
989
    {
990
        return self::downcode($text);
991
    }
992
}
993