Conditions | 14 |
Paths | 12 |
Total Lines | 79 |
Code Lines | 51 |
Lines | 0 |
Ratio | 0 % |
Changes | 0 |
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
1 | <?php |
||
38 | public function typoPatternFromAuthor(string $text): array |
||
39 | { |
||
40 | $res = []; |
||
41 | $res['string'] = $text; |
||
42 | $modText = TextUtil::replaceNonBreakingSpaces($text); |
||
43 | |||
44 | // unWikify or not ? remove wikilinks and bold/italic wikicode |
||
45 | $modText = WikiTextUtil::unWikify($modText); |
||
46 | |||
47 | /* |
||
48 | * Pre-process : add spaces between relevant typographic items |
||
49 | */ |
||
50 | $this->tokenValue = []; |
||
51 | $modText = $this->preprocessTypoPattern($modText); |
||
52 | |||
53 | // PUNCTUATION conversion |
||
54 | $punctuationColl = array_filter( |
||
55 | TextUtil::ALL_PUNCTUATION, |
||
56 | function ($value) { |
||
57 | // skip punctuation chars from mixed names (example : "Pierre-Marie L'Anglois") |
||
58 | return !in_array($value, ["'", '-', '-']); |
||
59 | } |
||
60 | ); |
||
61 | // don't use str_split() which cuts on 1 byte length (≠ multibytes chars) |
||
62 | $modText = str_replace($punctuationColl, ' PATTERNPUNCTUATION ', $modText); |
||
63 | |||
64 | // "BUBBLES COMMA DROPS COMMA AND PARTICLES" |
||
65 | |||
66 | // Split the string |
||
67 | $tokens = preg_split('#[ ]+#', $modText); |
||
68 | $res['pattern'] = ''; |
||
69 | foreach ($tokens as $tok) { |
||
70 | if (empty($tok)) { |
||
71 | continue; |
||
72 | } |
||
73 | if (preg_match('#^(PATTERNINITIAL|PATTERNURL|PATTERNAND|PATTERNCOMMA|PATTERNBIBABREV|PATTERNPUNCTUATION)$#', $tok, $matches) > 0) { |
||
74 | |||
75 | $shortpattern = str_replace('PATTERN','', $tok); |
||
76 | $res['pattern'] .= ' '.$shortpattern; // PATTERNAND -> AND |
||
77 | if (in_array($matches[1], ['PATTERNCOMMA', 'PATTERNPUNCTUATION']) || empty($matches[1])) { |
||
78 | $res['value'][] = '*'; |
||
79 | } else { |
||
80 | $res['value'][] = current($this->tokenValue[$shortpattern]); |
||
81 | next($this->tokenValue[$shortpattern]); |
||
82 | } |
||
83 | //"J. R . R." => INITIAL (1 seule fois) |
||
84 | // $res = str_replace('INITIAL INITIAL', 'INITIAL', $res); |
||
85 | } elseif (preg_match('#^\d+$#', $tok) > 0) { |
||
86 | $res['pattern'] .= ' ALLNUMBER'; |
||
87 | $res['value'][] = $tok; |
||
88 | } elseif (preg_match('#^[0-9\-]+$#', $tok) > 0) { |
||
89 | $res['pattern'] .= ' DASHNUMBER'; |
||
90 | $res['value'][] = $tok; |
||
91 | } elseif (preg_match('#\d#', $tok) > 0) { |
||
92 | $res['pattern'] .= ' WITHNUMBER'; |
||
93 | $res['value'][] = $tok; |
||
94 | } elseif (mb_strtolower($tok, 'UTF-8') === $tok) { |
||
95 | $res['pattern'] .= ' ALLLOWER'; |
||
96 | $res['value'][] = $tok; |
||
97 | } elseif (mb_strtoupper($tok, 'UTF-8') === $tok) { |
||
98 | $res['pattern'] .= ' ALLUPPER'; |
||
99 | $res['value'][] = $tok; |
||
100 | } elseif (mb_strtoupper(substr($tok, 0, 1), 'UTF-8') === substr($tok, 0, 1) |
||
101 | && mb_strtolower(substr($tok, 1), 'UTF-8') === substr($tok, 1) |
||
102 | ) { |
||
103 | $res['pattern'] .= ' FIRSTUPPER'; |
||
104 | $res['value'][] = $tok; |
||
105 | } elseif (preg_match('#[a-zA-Zàéù]#', $tok) > 0) { |
||
106 | $res['pattern'] .= ' MIXED'; |
||
107 | $res['value'][] = $tok; |
||
108 | } else { |
||
109 | $res['pattern'] .= ' UNKNOW'; |
||
110 | $res['value'][] = $tok; |
||
111 | } |
||
112 | } |
||
113 | |||
114 | $res['pattern'] = trim($res['pattern']); |
||
115 | |||
116 | return $res; |
||
117 | } |
||
170 |