Conditions | 14 |
Paths | 12 |
Total Lines | 79 |
Code Lines | 51 |
Lines | 0 |
Ratio | 0 % |
Changes | 0 |
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
1 | <?php |
||
34 | public function typoPatternFromAuthor(string $text): array |
||
35 | { |
||
36 | $res = []; |
||
37 | $res['string'] = $text; |
||
38 | $modText = TextUtil::replaceNonBreakingSpaces($text); |
||
39 | |||
40 | // unWikify or not ? remove wikilinks and bold/italic wikicode |
||
41 | $modText = WikiTextUtil::unWikify($modText); |
||
42 | |||
43 | /* |
||
44 | * Pre-process : add spaces between relevant typographic items |
||
45 | */ |
||
46 | $this->tokenValue = []; |
||
47 | $modText = $this->preprocessTypoPattern($modText); |
||
48 | |||
49 | // PUNCTUATION conversion |
||
50 | $punctuationColl = array_filter( |
||
51 | TextUtil::ALL_PUNCTUATION, |
||
52 | function ($value) { |
||
53 | // skip punctuation chars from mixed names (example : "Pierre-Marie L'Anglois") |
||
54 | return !in_array($value, ["'", '-', '-']); |
||
55 | } |
||
56 | ); |
||
57 | // don't use str_split() which cuts on 1 byte length (≠ multibytes chars) |
||
58 | $modText = str_replace($punctuationColl, ' PATTERNPUNCTUATION ', $modText); |
||
59 | |||
60 | // "BUBBLES COMMA DROPS COMMA AND PARTICLES" |
||
61 | |||
62 | // Split the string |
||
63 | $tokens = preg_split('#[ ]+#', $modText); |
||
64 | $res['pattern'] = ''; |
||
65 | foreach ($tokens as $tok) { |
||
66 | if (empty($tok)) { |
||
67 | continue; |
||
68 | } |
||
69 | if (preg_match('#^(PATTERNINITIAL|PATTERNURL|PATTERNAND|PATTERNCOMMA|PATTERNBIBABREV|PATTERNPUNCTUATION)$#', (string) $tok, $matches) > 0) { |
||
70 | |||
71 | $shortpattern = str_replace('PATTERN','', (string) $tok); |
||
72 | $res['pattern'] .= ' '.$shortpattern; // PATTERNAND -> AND |
||
73 | if (in_array($matches[1], ['PATTERNCOMMA', 'PATTERNPUNCTUATION']) || empty($matches[1])) { |
||
74 | $res['value'][] = '*'; |
||
75 | } else { |
||
76 | $res['value'][] = current($this->tokenValue[$shortpattern]); |
||
77 | next($this->tokenValue[$shortpattern]); |
||
78 | } |
||
79 | //"J. R . R." => INITIAL (1 seule fois) |
||
80 | // $res = str_replace('INITIAL INITIAL', 'INITIAL', $res); |
||
81 | } elseif (preg_match('#^\d+$#', (string) $tok) > 0) { |
||
82 | $res['pattern'] .= ' ALLNUMBER'; |
||
83 | $res['value'][] = $tok; |
||
84 | } elseif (preg_match('#^[0-9\-]+$#', (string) $tok) > 0) { |
||
85 | $res['pattern'] .= ' DASHNUMBER'; |
||
86 | $res['value'][] = $tok; |
||
87 | } elseif (preg_match('#\d#', (string) $tok) > 0) { |
||
88 | $res['pattern'] .= ' WITHNUMBER'; |
||
89 | $res['value'][] = $tok; |
||
90 | } elseif (mb_strtolower((string) $tok, 'UTF-8') === $tok) { |
||
91 | $res['pattern'] .= ' ALLLOWER'; |
||
92 | $res['value'][] = $tok; |
||
93 | } elseif (mb_strtoupper((string) $tok, 'UTF-8') === $tok) { |
||
94 | $res['pattern'] .= ' ALLUPPER'; |
||
95 | $res['value'][] = $tok; |
||
96 | } elseif (mb_strtoupper(substr((string) $tok, 0, 1), 'UTF-8') === substr((string) $tok, 0, 1) |
||
97 | && mb_strtolower(substr((string) $tok, 1), 'UTF-8') === substr((string) $tok, 1) |
||
98 | ) { |
||
99 | $res['pattern'] .= ' FIRSTUPPER'; |
||
100 | $res['value'][] = $tok; |
||
101 | } elseif (preg_match('#[a-zA-Zàéù]#', (string) $tok) > 0) { |
||
102 | $res['pattern'] .= ' MIXED'; |
||
103 | $res['value'][] = $tok; |
||
104 | } else { |
||
105 | $res['pattern'] .= ' UNKNOW'; |
||
106 | $res['value'][] = $tok; |
||
107 | } |
||
108 | } |
||
109 | |||
110 | $res['pattern'] = trim($res['pattern']); |
||
111 | |||
112 | return $res; |
||
113 | } |
||
164 |