| Conditions | 14 |
| Paths | 12 |
| Total Lines | 79 |
| Code Lines | 51 |
| Lines | 0 |
| Ratio | 0 % |
| Changes | 0 | ||
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
| 1 | <?php |
||
| 38 | public function typoPatternFromAuthor(string $text): array |
||
| 39 | { |
||
| 40 | $res = []; |
||
| 41 | $res['string'] = $text; |
||
| 42 | $modText = TextUtil::replaceNonBreakingSpaces($text); |
||
| 43 | |||
| 44 | // unWikify or not ? remove wikilinks and bold/italic wikicode |
||
| 45 | $modText = WikiTextUtil::unWikify($modText); |
||
| 46 | |||
| 47 | /* |
||
| 48 | * Pre-process : add spaces between relevant typographic items |
||
| 49 | */ |
||
| 50 | $this->tokenValue = []; |
||
| 51 | $modText = $this->preprocessTypoPattern($modText); |
||
| 52 | |||
| 53 | // PUNCTUATION conversion |
||
| 54 | $punctuationColl = array_filter( |
||
| 55 | TextUtil::ALL_PUNCTUATION, |
||
| 56 | function ($value) { |
||
| 57 | // skip punctuation chars from mixed names (example : "Pierre-Marie L'Anglois") |
||
| 58 | return !in_array($value, ["'", '-', '-']); |
||
| 59 | } |
||
| 60 | ); |
||
| 61 | // don't use str_split() which cuts on 1 byte length (≠ multibytes chars) |
||
| 62 | $modText = str_replace($punctuationColl, ' PATTERNPUNCTUATION ', $modText); |
||
| 63 | |||
| 64 | // "BUBBLES COMMA DROPS COMMA AND PARTICLES" |
||
| 65 | |||
| 66 | // Split the string |
||
| 67 | $tokens = preg_split('#[ ]+#', $modText); |
||
| 68 | $res['pattern'] = ''; |
||
| 69 | foreach ($tokens as $tok) { |
||
| 70 | if (empty($tok)) { |
||
| 71 | continue; |
||
| 72 | } |
||
| 73 | if (preg_match('#^(PATTERNINITIAL|PATTERNURL|PATTERNAND|PATTERNCOMMA|PATTERNBIBABREV|PATTERNPUNCTUATION)$#', $tok, $matches) > 0) { |
||
| 74 | |||
| 75 | $shortpattern = str_replace('PATTERN','', $tok); |
||
| 76 | $res['pattern'] .= ' '.$shortpattern; // PATTERNAND -> AND |
||
| 77 | if (in_array($matches[1], ['PATTERNCOMMA', 'PATTERNPUNCTUATION']) || empty($matches[1])) { |
||
| 78 | $res['value'][] = '*'; |
||
| 79 | } else { |
||
| 80 | $res['value'][] = current($this->tokenValue[$shortpattern]); |
||
| 81 | next($this->tokenValue[$shortpattern]); |
||
| 82 | } |
||
| 83 | //"J. R . R." => INITIAL (1 seule fois) |
||
| 84 | // $res = str_replace('INITIAL INITIAL', 'INITIAL', $res); |
||
| 85 | } elseif (preg_match('#^\d+$#', $tok) > 0) { |
||
| 86 | $res['pattern'] .= ' ALLNUMBER'; |
||
| 87 | $res['value'][] = $tok; |
||
| 88 | } elseif (preg_match('#^[0-9\-]+$#', $tok) > 0) { |
||
| 89 | $res['pattern'] .= ' DASHNUMBER'; |
||
| 90 | $res['value'][] = $tok; |
||
| 91 | } elseif (preg_match('#\d#', $tok) > 0) { |
||
| 92 | $res['pattern'] .= ' WITHNUMBER'; |
||
| 93 | $res['value'][] = $tok; |
||
| 94 | } elseif (mb_strtolower($tok, 'UTF-8') === $tok) { |
||
| 95 | $res['pattern'] .= ' ALLLOWER'; |
||
| 96 | $res['value'][] = $tok; |
||
| 97 | } elseif (mb_strtoupper($tok, 'UTF-8') === $tok) { |
||
| 98 | $res['pattern'] .= ' ALLUPPER'; |
||
| 99 | $res['value'][] = $tok; |
||
| 100 | } elseif (mb_strtoupper(substr($tok, 0, 1), 'UTF-8') === substr($tok, 0, 1) |
||
| 101 | && mb_strtolower(substr($tok, 1), 'UTF-8') === substr($tok, 1) |
||
| 102 | ) { |
||
| 103 | $res['pattern'] .= ' FIRSTUPPER'; |
||
| 104 | $res['value'][] = $tok; |
||
| 105 | } elseif (preg_match('#[a-zA-Zàéù]#', $tok) > 0) { |
||
| 106 | $res['pattern'] .= ' MIXED'; |
||
| 107 | $res['value'][] = $tok; |
||
| 108 | } else { |
||
| 109 | $res['pattern'] .= ' UNKNOW'; |
||
| 110 | $res['value'][] = $tok; |
||
| 111 | } |
||
| 112 | } |
||
| 113 | |||
| 114 | $res['pattern'] = trim($res['pattern']); |
||
| 115 | |||
| 116 | return $res; |
||
| 117 | } |
||
| 170 |