Complex classes like PhoneNumberMatcher often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use PhoneNumberMatcher, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
16 | class PhoneNumberMatcher implements \Iterator |
||
17 | { |
||
18 | protected static $initialized = false; |
||
19 | |||
20 | /** |
||
21 | * The phone number pattern used by $this->find(), similar to |
||
22 | * PhoneNumberUtil::VALID_PHONE_NUMBER, but with the following differences: |
||
23 | * <ul> |
||
24 | * <li>All captures are limited in order to place an upper bound to the text matched by the |
||
25 | * pattern. |
||
26 | * <ul> |
||
27 | * <li>Leading punctuation / plus signs are limited. |
||
28 | * <li>Consecutive occurrences of punctuation are limited. |
||
29 | * <li>Number of digits is limited. |
||
30 | * </ul> |
||
31 | * <li>No whitespace is allowed at the start or end. |
||
32 | * <li>No alpha digits (vanity numbers such as 1-800-SIX-FLAGS) are currently supported. |
||
33 | * </ul> |
||
34 | * |
||
35 | * @var string |
||
36 | */ |
||
37 | protected static $pattern; |
||
38 | |||
39 | /** |
||
40 | * Matches strings that look like publication pages. Example: |
||
41 | * <pre>Computing Complete Answers to Queries in the Presence of Limited Access Patterns. |
||
42 | * Chen Li. VLDB J. 12(3): 211-227 (2003).</pre> |
||
43 | * |
||
44 | * The string "211-227 (2003)" is not a telephone number. |
||
45 | * |
||
46 | * @var string |
||
47 | */ |
||
48 | protected static $pubPages = "\\d{1,5}-+\\d{1,5}\\s{0,4}\\(\\d{1,4}"; |
||
49 | |||
50 | /** |
||
51 | * Matches strings that look like dates using "/" as a separator. Examples 3/10/2011, 31/10/2011 or |
||
52 | * 08/31/95. |
||
53 | * |
||
54 | * @var string |
||
55 | */ |
||
56 | protected static $slashSeparatedDates = "(?:(?:[0-3]?\\d/[01]?\\d)|(?:[01]?\\d/[0-3]?\\d))/(?:[12]\\d)?\\d{2}"; |
||
57 | |||
58 | /** |
||
59 | * Matches timestamps. Examples: "2012-01-02 08:00". Note that the reg-ex does not include the |
||
60 | * trailing ":\d\d" -- that is covered by timeStampsSuffix. |
||
61 | * |
||
62 | * @var string |
||
63 | */ |
||
64 | protected static $timeStamps = "[12]\\d{3}[-/]?[01]\\d[-/]?[0-3]\\d +[0-2]\\d$"; |
||
65 | protected static $timeStampsSuffix = ":[0-5]\\d"; |
||
66 | |||
67 | /** |
||
68 | * Pattern to check that brackets match. Opening brackets should be closed within a phone number. |
||
69 | * This also checks that there is something inside the brackets. Having no brackets at all is also |
||
70 | * fine. |
||
71 | * |
||
72 | * @var string |
||
73 | */ |
||
74 | protected static $matchingBrackets; |
||
75 | |||
76 | /** |
||
77 | * Patterns used to extract phone numbers from a larger phone-number-like pattern. These are |
||
78 | * ordered according to specificity. For example, white-space is last since that is frequently |
||
79 | * used in numbers, not just to separate two numbers. We have separate patterns since we don't |
||
80 | * want to break up the phone-number-like text on more than one different kind of symbol at one |
||
81 | * time, although symbols of the same type (e.g. space) can be safely grouped together. |
||
82 | * |
||
83 | * Note that if there is a match, we will always check any text found up to the first match as |
||
84 | * well. |
||
85 | * |
||
86 | * @var string[] |
||
87 | */ |
||
88 | protected static $innerMatches = array(); |
||
89 | |||
90 | /** |
||
91 | * Punctuation that may be at the start of a phone number - brackets and plus signs. |
||
92 | * |
||
93 | * @var string |
||
94 | */ |
||
95 | protected static $leadClass; |
||
96 | |||
97 | /** |
||
98 | * Prefix of the files |
||
99 | * @var string |
||
100 | */ |
||
101 | protected static $alternateFormatsFilePrefix; |
||
102 | const META_DATA_FILE_PREFIX = 'PhoneNumberAlternateFormats'; |
||
103 | |||
104 | 1 | protected static function init() |
|
105 | { |
||
106 | 1 | static::$alternateFormatsFilePrefix = dirname(__FILE__) . '/data/' . static::META_DATA_FILE_PREFIX; |
|
107 | |||
108 | 1 | static::$innerMatches = array( |
|
109 | // Breaks on the slash - e.g. "651-234-2345/332-445-1234" |
||
110 | 1 | "/+(.*)", |
|
111 | // Note that the bracket here is inside the capturing group, since we consider it part of the |
||
112 | // phone number. Will match a pattern like "(650) 223 3345 (754) 223 3321". |
||
113 | "(\\([^(]*)", |
||
114 | // Breaks on a hyphen - e.g. "12345 - 332-445-1234 is my number." |
||
115 | // We require a space on either side of the hyphen for it to be considered a separator. |
||
116 | "(?:\\p{Z}-|-\\p{Z})\\p{Z}*(.+)", |
||
117 | // Various types of wide hyphens. Note we have decided not to enforce a space here, since it's |
||
118 | // possible that it's supposed to be used to break two numbers without spaces, and we haven't |
||
119 | // seen many instances of it used within a number. |
||
120 | "[‒-―-]\\p{Z}*(.+)", |
||
121 | // Breaks on a full stop - e.g. "12345. 332-445-1234 is my number." |
||
122 | "\\.+\\p{Z}*([^.]+)", |
||
123 | // Breaks on space - e.g. "3324451234 8002341234" |
||
124 | "\\p{Z}+(\\P{Z}+)" |
||
125 | ); |
||
126 | |||
127 | /* |
||
128 | * Builds the matchingBrackets and pattern regular expressions. The building blocks exist |
||
129 | * to make the pattern more easily understood. |
||
130 | */ |
||
131 | |||
132 | 1 | $openingParens = "(\\[\xEF\xBC\x88\xEF\xBC\xBB"; |
|
133 | 1 | $closingParens = ")\\]\xEF\xBC\x89\xEF\xBC\xBD"; |
|
134 | 1 | $nonParens = "[^" . $openingParens . $closingParens . "]"; |
|
135 | |||
136 | // Limit on the number of pairs of brackets in a phone number. |
||
137 | 1 | $bracketPairLimit = static::limit(0, 3); |
|
138 | |||
139 | /* |
||
140 | * An opening bracket at the beginning may not be closed, but subsequent ones should be. It's |
||
141 | * also possible that the leading bracket was dropped, so we shouldn't be surprised if we see a |
||
142 | * closing bracket first. We limit the sets of brackets in a phone number to four. |
||
143 | */ |
||
144 | static::$matchingBrackets = |
||
145 | 1 | "(?:[" . $openingParens . "])?" . "(?:" . $nonParens . "+" . "[" . $closingParens . "])?" |
|
146 | 1 | . $nonParens . "+" |
|
147 | 1 | . "(?:[" . $openingParens . "]" . $nonParens . "+[" . $closingParens . "])" . $bracketPairLimit |
|
148 | 1 | . $nonParens . "*"; |
|
149 | |||
150 | // Limit on the number of leading (plus) characters. |
||
151 | 1 | $leadLimit = static::limit(0, 2); |
|
152 | |||
153 | // Limit on the number of consecutive punctuation characters. |
||
154 | 1 | $punctuationLimit = static::limit(0, 4); |
|
155 | |||
156 | /* |
||
157 | * The maximum number of digits allowed in a digit-separated block. As we allow all digits in a |
||
158 | * single block, set high enough to accommodate the entire national number and the international |
||
159 | * country code |
||
160 | */ |
||
161 | 1 | $digitBlockLimit = PhoneNumberUtil::MAX_LENGTH_FOR_NSN + PhoneNumberUtil::MAX_LENGTH_COUNTRY_CODE; |
|
162 | |||
163 | /* |
||
164 | * Limit on the number of blocks separated by the punctuation. Uses digitBlockLimit since some |
||
165 | * formats use spaces to separate each digit |
||
166 | */ |
||
167 | 1 | $blockLimit = static::limit(0, $digitBlockLimit); |
|
168 | |||
169 | // A punctuation sequence allowing white space |
||
170 | 1 | $punctuation = '[' . PhoneNumberUtil::VALID_PUNCTUATION . ']' . $punctuationLimit; |
|
171 | |||
172 | // A digits block without punctuation. |
||
173 | 1 | $digitSequence = "\\p{Nd}" . static::limit(1, $digitBlockLimit); |
|
174 | |||
175 | |||
176 | 1 | $leadClassChars = $openingParens . PhoneNumberUtil::PLUS_CHARS; |
|
177 | 1 | $leadClass = '[' . $leadClassChars . ']'; |
|
178 | 1 | static::$leadClass = $leadClass; |
|
179 | |||
180 | // Init extension patterns from PhoneNumberUtil |
||
181 | 1 | PhoneNumberUtil::initCapturingExtnDigits(); |
|
182 | 1 | PhoneNumberUtil::initExtnPatterns(); |
|
183 | |||
184 | |||
185 | // Phone number pattern allowing optional punctuation. |
||
186 | 1 | static::$pattern = "(?:" . $leadClass . $punctuation . ")" . $leadLimit |
|
187 | 1 | . $digitSequence . "(?:" . $punctuation . $digitSequence . ")" . $blockLimit |
|
188 | 1 | . "(?:" . PhoneNumberUtil::$EXTN_PATTERNS_FOR_MATCHING . ")?"; |
|
189 | |||
190 | 1 | static::$initialized = true; |
|
191 | 1 | } |
|
192 | |||
193 | /** |
||
194 | * Helper function to generate regular expression with an upper and lower limit. |
||
195 | * |
||
196 | * @param int $lower |
||
197 | * @param int $upper |
||
198 | * @return string |
||
199 | */ |
||
200 | 1 | protected static function limit($lower, $upper) |
|
208 | |||
209 | /** |
||
210 | * The phone number utility. |
||
211 | * @var PhoneNumberUtil |
||
212 | */ |
||
213 | protected $phoneUtil; |
||
214 | |||
215 | /** |
||
216 | * The text searched for phone numbers. |
||
217 | * @var string |
||
218 | */ |
||
219 | protected $text; |
||
220 | |||
221 | /** |
||
222 | * The region (country) to assume for phone numbers without an international prefix, possibly |
||
223 | * null. |
||
224 | * @var string |
||
225 | */ |
||
226 | protected $preferredRegion; |
||
227 | |||
228 | /** |
||
229 | * The degrees of validation requested. |
||
230 | * @var AbstractLeniency |
||
231 | */ |
||
232 | protected $leniency; |
||
233 | |||
234 | /** |
||
235 | * The maximum number of retires after matching an invalid number. |
||
236 | * @var int |
||
237 | */ |
||
238 | protected $maxTries; |
||
239 | |||
240 | /** |
||
241 | * One of: |
||
242 | * - NOT_READY |
||
243 | * - READY |
||
244 | * - DONE |
||
245 | * @var string |
||
246 | */ |
||
247 | protected $state = 'NOT_READY'; |
||
248 | |||
249 | /** |
||
250 | * The last successful match, null unless $this->state = READY |
||
251 | * @var PhoneNumberMatch |
||
252 | */ |
||
253 | protected $lastMatch; |
||
254 | |||
255 | /** |
||
256 | * The next index to start searching at. Undefined when $this->state = DONE |
||
257 | * @var int |
||
258 | */ |
||
259 | protected $searchIndex = 0; |
||
260 | |||
261 | /** |
||
262 | * Creates a new instance. See the factory methods in PhoneNumberUtil on how to obtain a new instance. |
||
263 | * |
||
264 | * |
||
265 | * @param PhoneNumberUtil $util The Phone Number Util to use |
||
266 | * @param string|null $text The text that we will search, null for no text |
||
267 | * @param string|null $country The country to assume for phone numbers not written in international format. |
||
268 | * (with a leading plus, or with the international dialling prefix of the specified region). |
||
269 | * May be null, or "ZZ" if only numbers with a leading plus should be considered. |
||
270 | * @param AbstractLeniency $leniency The leniency to use when evaluating candidate phone numbers |
||
271 | * @param int $maxTries The maximum number of invalid numbers to try before giving up on the text. |
||
272 | * This is to cover degenerate cases where the text has a lot of false positives in it. Must be >= 0 |
||
273 | * @throws \NullPointerException |
||
274 | * @throws \InvalidArgumentException |
||
275 | */ |
||
276 | 205 | public function __construct(PhoneNumberUtil $util, $text, $country, AbstractLeniency $leniency, $maxTries) |
|
292 | |||
293 | /** |
||
294 | * Attempts to find the next subsequence in the searched sequence on or after {@code searchIndex} |
||
295 | * that represents a phone number. Returns the next match, null if none was found. |
||
296 | * |
||
297 | * @param int $index The search index to start searching at |
||
298 | * @return PhoneNumberMatch|null The Phone Number Match found, null if none can be found |
||
299 | */ |
||
300 | 199 | protected function find($index) |
|
324 | |||
325 | /** |
||
326 | * Trims away any characters after the first match of $pattern in $candidate, |
||
327 | * returning the trimmed version. |
||
328 | * |
||
329 | * @param string $pattern |
||
330 | * @param string $candidate |
||
331 | * @return string |
||
332 | */ |
||
333 | 198 | protected static function trimAfterFirstMatch($pattern, $candidate) |
|
342 | |||
343 | /** |
||
344 | * Helper method to determine if a character is a Latin-script letter or not. For our purposes, |
||
345 | * combining marks should also return true since we assume they have been added to a preceding |
||
346 | * Latin character. |
||
347 | * |
||
348 | * @param string $letter |
||
349 | * @return bool |
||
350 | * @internal |
||
351 | */ |
||
352 | 58 | public static function isLatinLetter($letter) |
|
362 | |||
363 | /** |
||
364 | * @param string $character |
||
365 | * @return bool |
||
366 | */ |
||
367 | 47 | protected static function isInvalidPunctuationSymbol($character) |
|
371 | |||
372 | /** |
||
373 | * Attempts to extract a match from a $candidate. |
||
374 | * |
||
375 | * @param string $candidate The candidate text that might contain a phone number |
||
376 | * @param int $offset The offset of $candidate within $this->text |
||
377 | * @return PhoneNumberMatch|null The match found, null if none can be found |
||
378 | */ |
||
379 | 198 | protected function extractMatch($candidate, $offset) |
|
407 | |||
408 | /** |
||
409 | * Attempts to extract a match from $candidate if the whole candidate does not qualify as a |
||
410 | * match. |
||
411 | * |
||
412 | * @param string $candidate The candidate text that might contact a phone number |
||
413 | * @param int $offset The current offset of $candidate within $this->text |
||
414 | * @return PhoneNumberMatch|null The match found, null if none can be found |
||
415 | */ |
||
416 | 74 | protected function extractInnerMatch($candidate, $offset) |
|
446 | |||
447 | /** |
||
448 | * Parses a phone number from the $candidate} using PhoneNumberUtil::parse() and |
||
449 | * verifies it matches the requested leniency. If parsing and verification succeed, a |
||
450 | * corresponding PhoneNumberMatch is returned, otherwise this method returns null. |
||
451 | * |
||
452 | * @param string $candidate The candidate match |
||
453 | * @param int $offset The offset of $candidate within $this->text |
||
454 | * @return PhoneNumberMatch|null The parsed and validated phone number match, or null |
||
455 | */ |
||
456 | 178 | protected function parseAndVerify($candidate, $offset) |
|
525 | |||
526 | /** |
||
527 | * @param PhoneNumberUtil $util |
||
528 | * @param PhoneNumber $number |
||
529 | * @param string $normalizedCandidate |
||
530 | * @param string[] $formattedNumberGroups |
||
531 | * @return bool |
||
532 | */ |
||
533 | 26 | public static function allNumberGroupsRemainGrouped( |
|
590 | |||
591 | /** |
||
592 | * @param PhoneNumberUtil $util |
||
593 | * @param PhoneNumber $number |
||
594 | * @param string $normalizedCandidate |
||
595 | * @param string[] $formattedNumberGroups |
||
596 | * @return bool |
||
597 | */ |
||
598 | 26 | public static function allNumberGroupsAreExactlyPresent( |
|
635 | |||
636 | /** |
||
637 | * Helper method to get the national-number part of a number, formatted without any national |
||
638 | * prefix, and return it as a set of digit blocks that would be formatted together. |
||
639 | * |
||
640 | * @param PhoneNumberUtil $util |
||
641 | * @param PhoneNumber $number |
||
642 | * @param NumberFormat $formattingPattern |
||
643 | * @return string[] |
||
644 | */ |
||
645 | 52 | protected static function getNationalNumberGroups( |
|
670 | |||
671 | /** |
||
672 | * @param PhoneNumber $number |
||
673 | * @param string $candidate |
||
674 | * @param PhoneNumberUtil $util |
||
675 | * @param \Closure $checker |
||
676 | * @return bool |
||
677 | */ |
||
678 | 52 | public static function checkNumberGroupingIsValid( |
|
705 | |||
706 | /** |
||
707 | * @param PhoneNumber $number |
||
708 | * @param string $candidate |
||
709 | * @return bool |
||
710 | */ |
||
711 | 53 | public static function containsMoreThanOneSlashInNationalNumber(PhoneNumber $number, $candidate) |
|
741 | |||
742 | /** |
||
743 | * @param PhoneNumber $number |
||
744 | * @param string $candidate |
||
745 | * @param PhoneNumberUtil $util |
||
746 | * @return bool |
||
747 | */ |
||
748 | 97 | public static function containsOnlyValidXChars(PhoneNumber $number, $candidate, PhoneNumberUtil $util) |
|
780 | |||
781 | /** |
||
782 | * @param PhoneNumber $number |
||
783 | * @param PhoneNumberUtil $util |
||
784 | * @return bool |
||
785 | */ |
||
786 | 97 | public static function isNationalPrefixPresentIfRequired(PhoneNumber $number, PhoneNumberUtil $util) |
|
827 | |||
828 | |||
829 | /** |
||
830 | * Storage for Alternate Formats |
||
831 | * @var PhoneMetadata[] |
||
832 | */ |
||
833 | protected static $callingCodeToAlternateFormatsMap = array(); |
||
834 | |||
835 | /** |
||
836 | * @param $countryCallingCode |
||
837 | * @return PhoneMetadata|null |
||
838 | */ |
||
839 | 13 | protected static function getAlternateFormatsForCountry($countryCallingCode) |
|
853 | |||
854 | /** |
||
855 | * @param string $countryCallingCode |
||
856 | * @throws \Exception |
||
857 | */ |
||
858 | 2 | protected static function loadAlternateFormatsMetadataFromFile($countryCallingCode) |
|
872 | |||
873 | |||
874 | /** |
||
875 | * Return the current element |
||
876 | * @link http://php.net/manual/en/iterator.current.php |
||
877 | * @return PhoneNumberMatch|null |
||
878 | */ |
||
879 | 197 | public function current() |
|
883 | |||
884 | /** |
||
885 | * Move forward to next element |
||
886 | * @link http://php.net/manual/en/iterator.next.php |
||
887 | * @return void Any returned value is ignored. |
||
888 | */ |
||
889 | 199 | public function next() |
|
902 | |||
903 | /** |
||
904 | * Return the key of the current element |
||
905 | * @link http://php.net/manual/en/iterator.key.php |
||
906 | * @return mixed scalar on success, or null on failure. |
||
907 | * @since 5.0.0 |
||
908 | */ |
||
909 | public function key() |
||
913 | |||
914 | /** |
||
915 | * Checks if current position is valid |
||
916 | * @link http://php.net/manual/en/iterator.valid.php |
||
917 | * @return boolean The return value will be casted to boolean and then evaluated. |
||
918 | * Returns true on success or false on failure. |
||
919 | * @since 5.0.0 |
||
920 | */ |
||
921 | 29 | public function valid() |
|
925 | |||
926 | /** |
||
927 | * Rewind the Iterator to the first element |
||
928 | * @link http://php.net/manual/en/iterator.rewind.php |
||
929 | * @return void Any returned value is ignored. |
||
930 | * @since 5.0.0 |
||
931 | */ |
||
932 | 18 | public function rewind() |
|
937 | } |
||
938 |