Complex classes like PhoneNumberMatcher often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use PhoneNumberMatcher, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 16 | class PhoneNumberMatcher implements \Iterator |
||
| 17 | { |
||
| 18 | protected static $initialized = false; |
||
| 19 | |||
| 20 | /** |
||
| 21 | * The phone number pattern used by $this->find(), similar to |
||
| 22 | * PhoneNumberUtil::VALID_PHONE_NUMBER, but with the following differences: |
||
| 23 | * <ul> |
||
| 24 | * <li>All captures are limited in order to place an upper bound to the text matched by the |
||
| 25 | * pattern. |
||
| 26 | * <ul> |
||
| 27 | * <li>Leading punctuation / plus signs are limited. |
||
| 28 | * <li>Consecutive occurrences of punctuation are limited. |
||
| 29 | * <li>Number of digits is limited. |
||
| 30 | * </ul> |
||
| 31 | * <li>No whitespace is allowed at the start or end. |
||
| 32 | * <li>No alpha digits (vanity numbers such as 1-800-SIX-FLAGS) are currently supported. |
||
| 33 | * </ul> |
||
| 34 | * |
||
| 35 | * @var string |
||
| 36 | */ |
||
| 37 | protected static $pattern; |
||
| 38 | |||
| 39 | /** |
||
| 40 | * Matches strings that look like publication pages. Example: |
||
| 41 | * <pre>Computing Complete Answers to Queries in the Presence of Limited Access Patterns. |
||
| 42 | * Chen Li. VLDB J. 12(3): 211-227 (2003).</pre> |
||
| 43 | * |
||
| 44 | * The string "211-227 (2003)" is not a telephone number. |
||
| 45 | * |
||
| 46 | * @var string |
||
| 47 | */ |
||
| 48 | protected static $pubPages = "\\d{1,5}-+\\d{1,5}\\s{0,4}\\(\\d{1,4}"; |
||
| 49 | |||
| 50 | /** |
||
| 51 | * Matches strings that look like dates using "/" as a separator. Examples 3/10/2011, 31/10/2011 or |
||
| 52 | * 08/31/95. |
||
| 53 | * |
||
| 54 | * @var string |
||
| 55 | */ |
||
| 56 | protected static $slashSeparatedDates = "(?:(?:[0-3]?\\d/[01]?\\d)|(?:[01]?\\d/[0-3]?\\d))/(?:[12]\\d)?\\d{2}"; |
||
| 57 | |||
| 58 | /** |
||
| 59 | * Matches timestamps. Examples: "2012-01-02 08:00". Note that the reg-ex does not include the |
||
| 60 | * trailing ":\d\d" -- that is covered by timeStampsSuffix. |
||
| 61 | * |
||
| 62 | * @var string |
||
| 63 | */ |
||
| 64 | protected static $timeStamps = "[12]\\d{3}[-/]?[01]\\d[-/]?[0-3]\\d +[0-2]\\d$"; |
||
| 65 | protected static $timeStampsSuffix = ":[0-5]\\d"; |
||
| 66 | |||
| 67 | /** |
||
| 68 | * Pattern to check that brackets match. Opening brackets should be closed within a phone number. |
||
| 69 | * This also checks that there is something inside the brackets. Having no brackets at all is also |
||
| 70 | * fine. |
||
| 71 | * |
||
| 72 | * @var string |
||
| 73 | */ |
||
| 74 | protected static $matchingBrackets; |
||
| 75 | |||
| 76 | /** |
||
| 77 | * Patterns used to extract phone numbers from a larger phone-number-like pattern. These are |
||
| 78 | * ordered according to specificity. For example, white-space is last since that is frequently |
||
| 79 | * used in numbers, not just to separate two numbers. We have separate patterns since we don't |
||
| 80 | * want to break up the phone-number-like text on more than one different kind of symbol at one |
||
| 81 | * time, although symbols of the same type (e.g. space) can be safely grouped together. |
||
| 82 | * |
||
| 83 | * Note that if there is a match, we will always check any text found up to the first match as |
||
| 84 | * well. |
||
| 85 | * |
||
| 86 | * @var string[] |
||
| 87 | */ |
||
| 88 | protected static $innerMatches = array(); |
||
| 89 | |||
| 90 | /** |
||
| 91 | * Punctuation that may be at the start of a phone number - brackets and plus signs. |
||
| 92 | * |
||
| 93 | * @var string |
||
| 94 | */ |
||
| 95 | protected static $leadClass; |
||
| 96 | |||
| 97 | /** |
||
| 98 | * Prefix of the files |
||
| 99 | * @var string |
||
| 100 | */ |
||
| 101 | protected static $alternateFormatsFilePrefix; |
||
| 102 | const META_DATA_FILE_PREFIX = 'PhoneNumberAlternateFormats'; |
||
| 103 | |||
| 104 | 1 | protected static function init() |
|
| 192 | |||
| 193 | /** |
||
| 194 | * Helper function to generate regular expression with an upper and lower limit. |
||
| 195 | * |
||
| 196 | * @param int $lower |
||
| 197 | * @param int $upper |
||
| 198 | * @return string |
||
| 199 | */ |
||
| 200 | 1 | protected static function limit($lower, $upper) |
|
| 208 | |||
| 209 | /** |
||
| 210 | * The phone number utility. |
||
| 211 | * @var PhoneNumberUtil |
||
| 212 | */ |
||
| 213 | protected $phoneUtil; |
||
| 214 | |||
| 215 | /** |
||
| 216 | * The text searched for phone numbers. |
||
| 217 | * @var string |
||
| 218 | */ |
||
| 219 | protected $text; |
||
| 220 | |||
| 221 | /** |
||
| 222 | * The region (country) to assume for phone numbers without an international prefix, possibly |
||
| 223 | * null. |
||
| 224 | * @var string |
||
| 225 | */ |
||
| 226 | protected $preferredRegion; |
||
| 227 | |||
| 228 | /** |
||
| 229 | * The degrees of validation requested. |
||
| 230 | * @var AbstractLeniency |
||
| 231 | */ |
||
| 232 | protected $leniency; |
||
| 233 | |||
| 234 | /** |
||
| 235 | * The maximum number of retires after matching an invalid number. |
||
| 236 | * @var int |
||
| 237 | */ |
||
| 238 | protected $maxTries; |
||
| 239 | |||
| 240 | /** |
||
| 241 | * One of: |
||
| 242 | * - NOT_READY |
||
| 243 | * - READY |
||
| 244 | * - DONE |
||
| 245 | * @var string |
||
| 246 | */ |
||
| 247 | protected $state = 'NOT_READY'; |
||
| 248 | |||
| 249 | /** |
||
| 250 | * The last successful match, null unless $this->state = READY |
||
| 251 | * @var PhoneNumberMatch |
||
| 252 | */ |
||
| 253 | protected $lastMatch; |
||
| 254 | |||
| 255 | /** |
||
| 256 | * The next index to start searching at. Undefined when $this->state = DONE |
||
| 257 | * @var int |
||
| 258 | */ |
||
| 259 | protected $searchIndex = 0; |
||
| 260 | |||
| 261 | /** |
||
| 262 | * Creates a new instance. See the factory methods in PhoneNumberUtil on how to obtain a new instance. |
||
| 263 | * |
||
| 264 | * |
||
| 265 | * @param PhoneNumberUtil $util The Phone Number Util to use |
||
| 266 | * @param string|null $text The text that we will search, null for no text |
||
| 267 | * @param string|null $country The country to assume for phone numbers not written in international format. |
||
| 268 | * (with a leading plus, or with the international dialling prefix of the specified region). |
||
| 269 | * May be null, or "ZZ" if only numbers with a leading plus should be considered. |
||
| 270 | * @param AbstractLeniency $leniency The leniency to use when evaluating candidate phone numbers |
||
| 271 | * @param int $maxTries The maximum number of invalid numbers to try before giving up on the text. |
||
| 272 | * This is to cover degenerate cases where the text has a lot of false positives in it. Must be >= 0 |
||
| 273 | * @throws \NullPointerException |
||
| 274 | * @throws \InvalidArgumentException |
||
| 275 | */ |
||
| 276 | 205 | public function __construct(PhoneNumberUtil $util, $text, $country, AbstractLeniency $leniency, $maxTries) |
|
| 292 | |||
| 293 | /** |
||
| 294 | * Attempts to find the next subsequence in the searched sequence on or after {@code searchIndex} |
||
| 295 | * that represents a phone number. Returns the next match, null if none was found. |
||
| 296 | * |
||
| 297 | * @param int $index The search index to start searching at |
||
| 298 | * @return PhoneNumberMatch|null The Phone Number Match found, null if none can be found |
||
| 299 | */ |
||
| 300 | 199 | protected function find($index) |
|
| 324 | |||
| 325 | /** |
||
| 326 | * Trims away any characters after the first match of $pattern in $candidate, |
||
| 327 | * returning the trimmed version. |
||
| 328 | * |
||
| 329 | * @param string $pattern |
||
| 330 | * @param string $candidate |
||
| 331 | * @return string |
||
| 332 | */ |
||
| 333 | 198 | protected static function trimAfterFirstMatch($pattern, $candidate) |
|
| 342 | |||
| 343 | /** |
||
| 344 | * Helper method to determine if a character is a Latin-script letter or not. For our purposes, |
||
| 345 | * combining marks should also return true since we assume they have been added to a preceding |
||
| 346 | * Latin character. |
||
| 347 | * |
||
| 348 | * @param string $letter |
||
| 349 | * @return bool |
||
| 350 | * @internal |
||
| 351 | */ |
||
| 352 | 58 | public static function isLatinLetter($letter) |
|
| 362 | |||
| 363 | /** |
||
| 364 | * @param string $character |
||
| 365 | * @return bool |
||
| 366 | */ |
||
| 367 | 47 | protected static function isInvalidPunctuationSymbol($character) |
|
| 371 | |||
| 372 | /** |
||
| 373 | * Attempts to extract a match from a $candidate. |
||
| 374 | * |
||
| 375 | * @param string $candidate The candidate text that might contain a phone number |
||
| 376 | * @param int $offset The offset of $candidate within $this->text |
||
| 377 | * @return PhoneNumberMatch|null The match found, null if none can be found |
||
| 378 | */ |
||
| 379 | 198 | protected function extractMatch($candidate, $offset) |
|
| 408 | |||
| 409 | /** |
||
| 410 | * Attempts to extract a match from $candidate if the whole candidate does not qualify as a |
||
| 411 | * match. |
||
| 412 | * |
||
| 413 | * @param string $candidate The candidate text that might contact a phone number |
||
| 414 | * @param int $offset The current offset of $candidate within $this->text |
||
| 415 | * @return PhoneNumberMatch|null The match found, null if none can be found |
||
| 416 | */ |
||
| 417 | 74 | protected function extractInnerMatch($candidate, $offset) |
|
| 447 | |||
| 448 | /** |
||
| 449 | * Parses a phone number from the $candidate} using PhoneNumberUtil::parse() and |
||
| 450 | * verifies it matches the requested leniency. If parsing and verification succeed, a |
||
| 451 | * corresponding PhoneNumberMatch is returned, otherwise this method returns null. |
||
| 452 | * |
||
| 453 | * @param string $candidate The candidate match |
||
| 454 | * @param int $offset The offset of $candidate within $this->text |
||
| 455 | * @return PhoneNumberMatch|null The parsed and validated phone number match, or null |
||
| 456 | */ |
||
| 457 | 178 | protected function parseAndVerify($candidate, $offset) |
|
| 526 | |||
| 527 | /** |
||
| 528 | * @param PhoneNumberUtil $util |
||
| 529 | * @param PhoneNumber $number |
||
| 530 | * @param string $normalizedCandidate |
||
| 531 | * @param string[] $formattedNumberGroups |
||
| 532 | * @return bool |
||
| 533 | */ |
||
| 534 | 26 | public static function allNumberGroupsRemainGrouped( |
|
| 535 | PhoneNumberUtil $util, |
||
| 536 | PhoneNumber $number, |
||
| 537 | $normalizedCandidate, |
||
| 538 | $formattedNumberGroups |
||
| 539 | ) { |
||
| 540 | 26 | $fromIndex = 0; |
|
| 541 | 26 | if ($number->getCountryCodeSource() !== CountryCodeSource::FROM_DEFAULT_COUNTRY) { |
|
| 542 | // First skip the country code if the normalized candidate contained it. |
||
| 543 | 10 | $countryCode = $number->getCountryCode(); |
|
| 544 | 10 | $fromIndex = mb_strpos($normalizedCandidate, $countryCode) + mb_strlen($countryCode); |
|
| 545 | } |
||
| 546 | |||
| 547 | // Check each group of consecutive digits are not broken into separate groupings in the |
||
| 548 | // $normalizedCandidate string. |
||
| 549 | 26 | $formattedNumberGroupsLength = count($formattedNumberGroups); |
|
| 550 | 26 | for ($i = 0; $i < $formattedNumberGroupsLength; $i++) { |
|
| 551 | // Fails if the substring of $normalizedCandidate starting from $fromIndex |
||
| 552 | // doesn't contain the consecutive digits in $formattedNumberGroups[$i]. |
||
| 553 | 26 | $fromIndex = mb_strpos($normalizedCandidate, $formattedNumberGroups[$i], $fromIndex); |
|
| 554 | 26 | if ($fromIndex === false) { |
|
| 555 | 8 | return false; |
|
| 556 | } |
||
| 557 | |||
| 558 | // Moves $fromIndex forward. |
||
| 559 | 25 | $fromIndex += mb_strlen($formattedNumberGroups[$i]); |
|
| 560 | 25 | if ($i === 0 && $fromIndex < mb_strlen($normalizedCandidate)) { |
|
| 561 | // We are at the position right after the NDC. We get the region used for formatting |
||
| 562 | // information based on the country code in the phone number, rather than the number itself, |
||
| 563 | // as we do not need to distinguish between different countries with the same country |
||
| 564 | // calling code and this is faster. |
||
| 565 | 25 | $region = $util->getRegionCodeForCountryCode($number->getCountryCode()); |
|
| 566 | |||
| 567 | 25 | if ($util->getNddPrefixForRegion($region, true) !== null |
|
| 568 | 25 | && is_int(mb_substr($normalizedCandidate, $fromIndex, 1)) |
|
| 569 | ) { |
||
| 570 | // This means there is no formatting symbol after the NDC. In this case, we only |
||
| 571 | // accept the number if there is no formatting symbol at all in the number, except |
||
| 572 | // for extensions. This is only important for countries with national prefixes. |
||
| 573 | $nationalSignificantNumber = $util->getNationalSignificantNumber($number); |
||
| 574 | return mb_substr( |
||
| 575 | mb_substr($normalizedCandidate, $fromIndex - mb_strlen($formattedNumberGroups[$i])), |
||
| 576 | mb_strlen($nationalSignificantNumber) |
||
| 577 | ) === $nationalSignificantNumber; |
||
| 578 | } |
||
| 579 | } |
||
| 580 | } |
||
| 581 | // The check here makes sure that we haven't mistakenly already used the extension to |
||
| 582 | // match the last group of the subscriber number. Note the extension cannot have |
||
| 583 | // formatting in-between digits |
||
| 584 | |||
| 585 | 25 | if ($number->hasExtension()) { |
|
| 586 | 4 | return mb_strpos(mb_substr($normalizedCandidate, $fromIndex), $number->getExtension()) !== false; |
|
| 587 | } |
||
| 588 | |||
| 589 | 21 | return true; |
|
| 590 | } |
||
| 591 | |||
| 592 | /** |
||
| 593 | * @param PhoneNumberUtil $util |
||
| 594 | * @param PhoneNumber $number |
||
| 595 | * @param string $normalizedCandidate |
||
| 596 | * @param string[] $formattedNumberGroups |
||
| 597 | * @return bool |
||
| 598 | */ |
||
| 599 | 26 | public static function allNumberGroupsAreExactlyPresent( |
|
| 636 | |||
| 637 | /** |
||
| 638 | * Helper method to get the national-number part of a number, formatted without any national |
||
| 639 | * prefix, and return it as a set of digit blocks that would be formatted together. |
||
| 640 | * |
||
| 641 | * @param PhoneNumberUtil $util |
||
| 642 | * @param PhoneNumber $number |
||
| 643 | * @param NumberFormat $formattingPattern |
||
| 644 | * @return string[] |
||
| 645 | */ |
||
| 646 | 52 | protected static function getNationalNumberGroups( |
|
| 671 | |||
| 672 | /** |
||
| 673 | * @param PhoneNumber $number |
||
| 674 | * @param string $candidate |
||
| 675 | * @param PhoneNumberUtil $util |
||
| 676 | * @param \Closure $checker |
||
| 677 | * @return bool |
||
| 678 | */ |
||
| 679 | 52 | public static function checkNumberGroupingIsValid( |
|
| 706 | |||
| 707 | /** |
||
| 708 | * @param PhoneNumber $number |
||
| 709 | * @param string $candidate |
||
| 710 | * @return bool |
||
| 711 | */ |
||
| 712 | 53 | public static function containsMoreThanOneSlashInNationalNumber(PhoneNumber $number, $candidate) |
|
| 742 | |||
| 743 | /** |
||
| 744 | * @param PhoneNumber $number |
||
| 745 | * @param string $candidate |
||
| 746 | * @param PhoneNumberUtil $util |
||
| 747 | * @return bool |
||
| 748 | */ |
||
| 749 | 97 | public static function containsOnlyValidXChars(PhoneNumber $number, $candidate, PhoneNumberUtil $util) |
|
| 781 | |||
| 782 | /** |
||
| 783 | * @param PhoneNumber $number |
||
| 784 | * @param PhoneNumberUtil $util |
||
| 785 | * @return bool |
||
| 786 | */ |
||
| 787 | 97 | public static function isNationalPrefixPresentIfRequired(PhoneNumber $number, PhoneNumberUtil $util) |
|
| 828 | |||
| 829 | |||
| 830 | /** |
||
| 831 | * Storage for Alternate Formats |
||
| 832 | * @var PhoneMetadata[] |
||
| 833 | */ |
||
| 834 | protected static $callingCodeToAlternateFormatsMap = array(); |
||
| 835 | |||
| 836 | /** |
||
| 837 | * @param $countryCallingCode |
||
| 838 | * @return PhoneMetadata|null |
||
| 839 | */ |
||
| 840 | 13 | protected static function getAlternateFormatsForCountry($countryCallingCode) |
|
| 854 | |||
| 855 | /** |
||
| 856 | * @param string $countryCallingCode |
||
| 857 | * @throws \Exception |
||
| 858 | */ |
||
| 859 | 2 | protected static function loadAlternateFormatsMetadataFromFile($countryCallingCode) |
|
| 873 | |||
| 874 | |||
| 875 | /** |
||
| 876 | * Return the current element |
||
| 877 | * @link http://php.net/manual/en/iterator.current.php |
||
| 878 | * @return PhoneNumberMatch|null |
||
| 879 | */ |
||
| 880 | 197 | public function current() |
|
| 884 | |||
| 885 | /** |
||
| 886 | * Move forward to next element |
||
| 887 | * @link http://php.net/manual/en/iterator.next.php |
||
| 888 | * @return void Any returned value is ignored. |
||
| 889 | */ |
||
| 890 | 199 | public function next() |
|
| 903 | |||
| 904 | /** |
||
| 905 | * Return the key of the current element |
||
| 906 | * @link http://php.net/manual/en/iterator.key.php |
||
| 907 | * @return mixed scalar on success, or null on failure. |
||
| 908 | * @since 5.0.0 |
||
| 909 | */ |
||
| 910 | public function key() |
||
| 914 | |||
| 915 | /** |
||
| 916 | * Checks if current position is valid |
||
| 917 | * @link http://php.net/manual/en/iterator.valid.php |
||
| 918 | * @return boolean The return value will be casted to boolean and then evaluated. |
||
| 919 | * Returns true on success or false on failure. |
||
| 920 | * @since 5.0.0 |
||
| 921 | */ |
||
| 922 | 29 | public function valid() |
|
| 926 | |||
| 927 | /** |
||
| 928 | * Rewind the Iterator to the first element |
||
| 929 | * @link http://php.net/manual/en/iterator.rewind.php |
||
| 930 | * @return void Any returned value is ignored. |
||
| 931 | * @since 5.0.0 |
||
| 932 | */ |
||
| 933 | 18 | public function rewind() |
|
| 938 | } |
||
| 939 |