| Conditions | 14 |
| Paths | 12 |
| Total Lines | 79 |
| Code Lines | 51 |
| Lines | 0 |
| Ratio | 0 % |
| Changes | 0 | ||
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
| 1 | <?php |
||
| 34 | public function typoPatternFromAuthor(string $text): array |
||
| 35 | { |
||
| 36 | $res = []; |
||
| 37 | $res['string'] = $text; |
||
| 38 | $modText = TextUtil::replaceNonBreakingSpaces($text); |
||
| 39 | |||
| 40 | // unWikify or not ? remove wikilinks and bold/italic wikicode |
||
| 41 | $modText = WikiTextUtil::unWikify($modText); |
||
| 42 | |||
| 43 | /* |
||
| 44 | * Pre-process : add spaces between relevant typographic items |
||
| 45 | */ |
||
| 46 | $this->tokenValue = []; |
||
| 47 | $modText = $this->preprocessTypoPattern($modText); |
||
| 48 | |||
| 49 | // PUNCTUATION conversion |
||
| 50 | $punctuationColl = array_filter( |
||
| 51 | TextUtil::ALL_PUNCTUATION, |
||
| 52 | function ($value) { |
||
| 53 | // skip punctuation chars from mixed names (example : "Pierre-Marie L'Anglois") |
||
| 54 | return !in_array($value, ["'", '-', '-']); |
||
| 55 | } |
||
| 56 | ); |
||
| 57 | // don't use str_split() which cuts on 1 byte length (≠ multibytes chars) |
||
| 58 | $modText = str_replace($punctuationColl, ' PATTERNPUNCTUATION ', $modText); |
||
| 59 | |||
| 60 | // "BUBBLES COMMA DROPS COMMA AND PARTICLES" |
||
| 61 | |||
| 62 | // Split the string |
||
| 63 | $tokens = preg_split('#[ ]+#', $modText); |
||
| 64 | $res['pattern'] = ''; |
||
| 65 | foreach ($tokens as $tok) { |
||
| 66 | if (empty($tok)) { |
||
| 67 | continue; |
||
| 68 | } |
||
| 69 | if (preg_match('#^(PATTERNINITIAL|PATTERNURL|PATTERNAND|PATTERNCOMMA|PATTERNBIBABREV|PATTERNPUNCTUATION)$#', (string) $tok, $matches) > 0) { |
||
| 70 | |||
| 71 | $shortpattern = str_replace('PATTERN','', (string) $tok); |
||
| 72 | $res['pattern'] .= ' '.$shortpattern; // PATTERNAND -> AND |
||
| 73 | if (in_array($matches[1], ['PATTERNCOMMA', 'PATTERNPUNCTUATION']) || empty($matches[1])) { |
||
| 74 | $res['value'][] = '*'; |
||
| 75 | } else { |
||
| 76 | $res['value'][] = current($this->tokenValue[$shortpattern]); |
||
| 77 | next($this->tokenValue[$shortpattern]); |
||
| 78 | } |
||
| 79 | //"J. R . R." => INITIAL (1 seule fois) |
||
| 80 | // $res = str_replace('INITIAL INITIAL', 'INITIAL', $res); |
||
| 81 | } elseif (preg_match('#^\d+$#', (string) $tok) > 0) { |
||
| 82 | $res['pattern'] .= ' ALLNUMBER'; |
||
| 83 | $res['value'][] = $tok; |
||
| 84 | } elseif (preg_match('#^[0-9\-]+$#', (string) $tok) > 0) { |
||
| 85 | $res['pattern'] .= ' DASHNUMBER'; |
||
| 86 | $res['value'][] = $tok; |
||
| 87 | } elseif (preg_match('#\d#', (string) $tok) > 0) { |
||
| 88 | $res['pattern'] .= ' WITHNUMBER'; |
||
| 89 | $res['value'][] = $tok; |
||
| 90 | } elseif (mb_strtolower((string) $tok, 'UTF-8') === $tok) { |
||
| 91 | $res['pattern'] .= ' ALLLOWER'; |
||
| 92 | $res['value'][] = $tok; |
||
| 93 | } elseif (mb_strtoupper((string) $tok, 'UTF-8') === $tok) { |
||
| 94 | $res['pattern'] .= ' ALLUPPER'; |
||
| 95 | $res['value'][] = $tok; |
||
| 96 | } elseif (mb_strtoupper(substr((string) $tok, 0, 1), 'UTF-8') === substr((string) $tok, 0, 1) |
||
| 97 | && mb_strtolower(substr((string) $tok, 1), 'UTF-8') === substr((string) $tok, 1) |
||
| 98 | ) { |
||
| 99 | $res['pattern'] .= ' FIRSTUPPER'; |
||
| 100 | $res['value'][] = $tok; |
||
| 101 | } elseif (preg_match('#[a-zA-Zàéù]#', (string) $tok) > 0) { |
||
| 102 | $res['pattern'] .= ' MIXED'; |
||
| 103 | $res['value'][] = $tok; |
||
| 104 | } else { |
||
| 105 | $res['pattern'] .= ' UNKNOW'; |
||
| 106 | $res['value'][] = $tok; |
||
| 107 | } |
||
| 108 | } |
||
| 109 | |||
| 110 | $res['pattern'] = trim($res['pattern']); |
||
| 111 | |||
| 112 | return $res; |
||
| 113 | } |
||
| 164 |