Complex classes like StringHelper often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use StringHelper, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
7 | class StringHelper |
||
8 | { |
||
9 | /** Constants */ |
||
10 | /** Regular Expressions */ |
||
11 | // Fraction |
||
12 | const STRING_REGEXP_FRACTION = '(-?)(\d+)\s+(\d+\/\d+)'; |
||
13 | |||
14 | /** |
||
15 | * Control characters array. |
||
16 | * |
||
17 | * @var string[] |
||
18 | */ |
||
19 | private static $controlCharacters = []; |
||
20 | |||
21 | /** |
||
22 | * SYLK Characters array. |
||
23 | * |
||
24 | * @var array |
||
25 | */ |
||
26 | private static $SYLKCharacters = []; |
||
27 | |||
28 | /** |
||
29 | * Decimal separator. |
||
30 | * |
||
31 | * @var string |
||
32 | */ |
||
33 | private static $decimalSeparator; |
||
34 | |||
35 | /** |
||
36 | * Thousands separator. |
||
37 | * |
||
38 | * @var string |
||
39 | */ |
||
40 | private static $thousandsSeparator; |
||
41 | |||
42 | /** |
||
43 | * Currency code. |
||
44 | * |
||
45 | * @var string |
||
46 | */ |
||
47 | private static $currencyCode; |
||
48 | |||
49 | /** |
||
50 | * Is iconv extension avalable? |
||
51 | * |
||
52 | * @var bool |
||
53 | */ |
||
54 | private static $isIconvEnabled; |
||
55 | |||
56 | /** |
||
57 | * Build control characters array. |
||
58 | */ |
||
59 | 1 | private static function buildControlCharacters() |
|
69 | |||
70 | /** |
||
71 | * Build SYLK characters array. |
||
72 | */ |
||
73 | 1 | private static function buildSYLKCharacters() |
|
234 | |||
235 | /** |
||
236 | * Get whether iconv extension is available. |
||
237 | * |
||
238 | * @return bool |
||
239 | */ |
||
240 | 19 | public static function getIsIconvEnabled() |
|
280 | |||
281 | 3 | private static function buildCharacterSets() |
|
291 | |||
292 | /** |
||
293 | * Convert from OpenXML escaped control character to PHP control character. |
||
294 | * |
||
295 | * Excel 2007 team: |
||
296 | * ---------------- |
||
297 | * That's correct, control characters are stored directly in the shared-strings table. |
||
298 | * We do encode characters that cannot be represented in XML using the following escape sequence: |
||
299 | * _xHHHH_ where H represents a hexadecimal character in the character's value... |
||
300 | * So you could end up with something like _x0008_ in a string (either in a cell value (<v>) |
||
301 | * element or in the shared string <t> element. |
||
302 | * |
||
303 | * @param string $value Value to unescape |
||
304 | * |
||
305 | * @return string |
||
306 | */ |
||
307 | 1 | public static function controlCharacterOOXML2PHP($value) |
|
313 | |||
314 | /** |
||
315 | * Convert from PHP control character to OpenXML escaped control character. |
||
316 | * |
||
317 | * Excel 2007 team: |
||
318 | * ---------------- |
||
319 | * That's correct, control characters are stored directly in the shared-strings table. |
||
320 | * We do encode characters that cannot be represented in XML using the following escape sequence: |
||
321 | * _xHHHH_ where H represents a hexadecimal character in the character's value... |
||
322 | * So you could end up with something like _x0008_ in a string (either in a cell value (<v>) |
||
323 | * element or in the shared string <t> element. |
||
324 | * |
||
325 | * @param string $value Value to escape |
||
326 | * |
||
327 | * @return string |
||
328 | */ |
||
329 | 1 | public static function controlCharacterPHP2OOXML($value) |
|
335 | |||
336 | /** |
||
337 | * Try to sanitize UTF8, stripping invalid byte sequences. Not perfect. Does not surrogate characters. |
||
338 | * |
||
339 | * @param string $value |
||
340 | * |
||
341 | * @return string |
||
342 | */ |
||
343 | 18 | public static function sanitizeUTF8($value) |
|
355 | |||
356 | /** |
||
357 | * Check if a string contains UTF8 data. |
||
358 | * |
||
359 | * @param string $value |
||
360 | * |
||
361 | * @return bool |
||
362 | */ |
||
363 | public static function isUTF8($value) |
||
367 | |||
368 | /** |
||
369 | * Formats a numeric value as a string for output in various output writers forcing |
||
370 | * point as decimal separator in case locale is other than English. |
||
371 | * |
||
372 | * @param mixed $value |
||
373 | * |
||
374 | * @return string |
||
375 | */ |
||
376 | 1 | public static function formatNumber($value) |
|
377 | { |
||
378 | 1 | if (is_float($value)) { |
|
379 | 1 | return str_replace(',', '.', $value); |
|
380 | } |
||
381 | |||
382 | return (string) $value; |
||
383 | } |
||
384 | |||
385 | /** |
||
386 | * Converts a UTF-8 string into BIFF8 Unicode string data (8-bit string length) |
||
387 | * Writes the string using uncompressed notation, no rich text, no Asian phonetics |
||
388 | * If mbstring extension is not available, ASCII is assumed, and compressed notation is used |
||
389 | * although this will give wrong results for non-ASCII strings |
||
390 | * see OpenOffice.org's Documentation of the Microsoft Excel File Format, sect. 2.5.3. |
||
391 | * |
||
392 | * @param string $value UTF-8 encoded string |
||
393 | * @param mixed[] $arrcRuns Details of rich text runs in $value |
||
394 | * |
||
395 | * @return string |
||
396 | */ |
||
397 | public static function UTF8toBIFF8UnicodeShort($value, $arrcRuns = []) |
||
398 | { |
||
399 | // character count |
||
400 | $ln = self::countCharacters($value, 'UTF-8'); |
||
401 | // option flags |
||
402 | if (empty($arrcRuns)) { |
||
403 | $data = pack('CC', $ln, 0x0001); |
||
404 | // characters |
||
405 | $data .= self::convertEncoding($value, 'UTF-16LE', 'UTF-8'); |
||
406 | } else { |
||
407 | $data = pack('vC', $ln, 0x09); |
||
408 | $data .= pack('v', count($arrcRuns)); |
||
409 | // characters |
||
410 | $data .= self::convertEncoding($value, 'UTF-16LE', 'UTF-8'); |
||
411 | foreach ($arrcRuns as $cRun) { |
||
412 | $data .= pack('v', $cRun['strlen']); |
||
413 | $data .= pack('v', $cRun['fontidx']); |
||
414 | } |
||
415 | } |
||
416 | |||
417 | return $data; |
||
418 | } |
||
419 | |||
420 | /** |
||
421 | * Converts a UTF-8 string into BIFF8 Unicode string data (16-bit string length) |
||
422 | * Writes the string using uncompressed notation, no rich text, no Asian phonetics |
||
423 | * If mbstring extension is not available, ASCII is assumed, and compressed notation is used |
||
424 | * although this will give wrong results for non-ASCII strings |
||
425 | * see OpenOffice.org's Documentation of the Microsoft Excel File Format, sect. 2.5.3. |
||
426 | * |
||
427 | * @param string $value UTF-8 encoded string |
||
428 | * |
||
429 | * @return string |
||
430 | */ |
||
431 | public static function UTF8toBIFF8UnicodeLong($value) |
||
432 | { |
||
433 | // character count |
||
434 | $ln = self::countCharacters($value, 'UTF-8'); |
||
435 | |||
436 | // characters |
||
437 | $chars = self::convertEncoding($value, 'UTF-16LE', 'UTF-8'); |
||
438 | |||
439 | $data = pack('vC', $ln, 0x0001) . $chars; |
||
440 | |||
441 | return $data; |
||
442 | } |
||
443 | |||
444 | /** |
||
445 | * Convert string from one encoding to another. |
||
446 | * |
||
447 | * @param string $value |
||
448 | * @param string $to Encoding to convert to, e.g. 'UTF-8' |
||
449 | * @param string $from Encoding to convert from, e.g. 'UTF-16LE' |
||
450 | * |
||
451 | * @return string |
||
452 | */ |
||
453 | public static function convertEncoding($value, $to, $from) |
||
454 | { |
||
455 | if (self::getIsIconvEnabled()) { |
||
456 | $result = iconv($from, $to . '//IGNORE//TRANSLIT', $value); |
||
457 | if (false !== $result) { |
||
458 | return $result; |
||
459 | } |
||
460 | } |
||
461 | |||
462 | return mb_convert_encoding($value, $to, $from); |
||
463 | } |
||
464 | |||
465 | /** |
||
466 | * Get character count. |
||
467 | * |
||
468 | * @param string $value |
||
469 | * @param string $enc Encoding |
||
470 | * |
||
471 | * @return int Character count |
||
472 | */ |
||
473 | 68 | public static function countCharacters($value, $enc = 'UTF-8') |
|
477 | |||
478 | /** |
||
479 | * Get a substring of a UTF-8 encoded string. |
||
480 | * |
||
481 | * @param string $pValue UTF-8 encoded string |
||
482 | * @param int $pStart Start offset |
||
483 | * @param int $pLength Maximum number of characters in substring |
||
484 | * |
||
485 | * @return string |
||
486 | */ |
||
487 | 44 | public static function substring($pValue, $pStart, $pLength = 0) |
|
491 | |||
492 | /** |
||
493 | * Convert a UTF-8 encoded string to upper case. |
||
494 | * |
||
495 | * @param string $pValue UTF-8 encoded string |
||
496 | * |
||
497 | * @return string |
||
498 | */ |
||
499 | 4 | public static function strToUpper($pValue) |
|
503 | |||
504 | /** |
||
505 | * Convert a UTF-8 encoded string to lower case. |
||
506 | * |
||
507 | * @param string $pValue UTF-8 encoded string |
||
508 | * |
||
509 | * @return string |
||
510 | */ |
||
511 | 4 | public static function strToLower($pValue) |
|
515 | |||
516 | /** |
||
517 | * Convert a UTF-8 encoded string to title/proper case |
||
518 | * (uppercase every first character in each word, lower case all other characters). |
||
519 | * |
||
520 | * @param string $pValue UTF-8 encoded string |
||
521 | * |
||
522 | * @return string |
||
523 | */ |
||
524 | 3 | public static function strToTitle($pValue) |
|
528 | |||
529 | 21 | public static function mbIsUpper($char) |
|
533 | |||
534 | 21 | public static function mbStrSplit($string) |
|
540 | |||
541 | /** |
||
542 | * Reverse the case of a string, so that all uppercase characters become lowercase |
||
543 | * and all lowercase characters become uppercase. |
||
544 | * |
||
545 | * @param string $pValue UTF-8 encoded string |
||
546 | * |
||
547 | * @return string |
||
548 | */ |
||
549 | 21 | public static function strCaseReverse($pValue) |
|
562 | |||
563 | /** |
||
564 | * Identify whether a string contains a fractional numeric value, |
||
565 | * and convert it to a numeric if it is. |
||
566 | * |
||
567 | * @param string &$operand string value to test |
||
568 | * |
||
569 | * @return bool |
||
570 | */ |
||
571 | public static function convertToNumberIfFraction(&$operand) |
||
572 | { |
||
573 | if (preg_match('/^' . self::STRING_REGEXP_FRACTION . '$/i', $operand, $match)) { |
||
574 | $sign = ($match[1] == '-') ? '-' : '+'; |
||
575 | $fractionFormula = '=' . $sign . $match[2] . $sign . $match[3]; |
||
576 | $operand = Calculation::getInstance()->_calculateFormulaValue($fractionFormula); |
||
577 | |||
578 | return true; |
||
579 | } |
||
580 | |||
581 | return false; |
||
582 | } |
||
583 | |||
584 | // function convertToNumberIfFraction() |
||
585 | |||
586 | /** |
||
587 | * Get the decimal separator. If it has not yet been set explicitly, try to obtain number |
||
588 | * formatting information from locale. |
||
589 | * |
||
590 | * @return string |
||
591 | */ |
||
592 | 26 | public static function getDecimalSeparator() |
|
607 | |||
608 | /** |
||
609 | * Set the decimal separator. Only used by NumberFormat::toFormattedString() |
||
610 | * to format output by \PhpOffice\PhpSpreadsheet\Writer\Html and \PhpOffice\PhpSpreadsheet\Writer\Pdf. |
||
611 | * |
||
612 | * @param string $pValue Character for decimal separator |
||
613 | */ |
||
614 | 77 | public static function setDecimalSeparator($pValue) |
|
618 | |||
619 | /** |
||
620 | * Get the thousands separator. If it has not yet been set explicitly, try to obtain number |
||
621 | * formatting information from locale. |
||
622 | * |
||
623 | * @return string |
||
624 | */ |
||
625 | 34 | public static function getThousandsSeparator() |
|
640 | |||
641 | /** |
||
642 | * Set the thousands separator. Only used by NumberFormat::toFormattedString() |
||
643 | * to format output by \PhpOffice\PhpSpreadsheet\Writer\Html and \PhpOffice\PhpSpreadsheet\Writer\Pdf. |
||
644 | * |
||
645 | * @param string $pValue Character for thousands separator |
||
646 | */ |
||
647 | 77 | public static function setThousandsSeparator($pValue) |
|
651 | |||
652 | /** |
||
653 | * Get the currency code. If it has not yet been set explicitly, try to obtain the |
||
654 | * symbol information from locale. |
||
655 | * |
||
656 | * @return string |
||
657 | */ |
||
658 | 17 | public static function getCurrencyCode() |
|
678 | |||
679 | /** |
||
680 | * Set the currency code. Only used by NumberFormat::toFormattedString() |
||
681 | * to format output by \PhpOffice\PhpSpreadsheet\Writer\Html and \PhpOffice\PhpSpreadsheet\Writer\Pdf. |
||
682 | * |
||
683 | * @param string $pValue Character for currency code |
||
684 | */ |
||
685 | 41 | public static function setCurrencyCode($pValue) |
|
689 | |||
690 | /** |
||
691 | * Convert SYLK encoded string to UTF-8. |
||
692 | * |
||
693 | * @param string $pValue |
||
694 | * |
||
695 | * @return string UTF-8 encoded string |
||
696 | */ |
||
697 | 1 | public static function SYLKtoUTF8($pValue) |
|
698 | { |
||
699 | 1 | self::buildCharacterSets(); |
|
700 | |||
701 | // If there is no escape character in the string there is nothing to do |
||
702 | 1 | if (strpos($pValue, '') === false) { |
|
703 | return $pValue; |
||
704 | } |
||
705 | |||
706 | 1 | foreach (self::$SYLKCharacters as $k => $v) { |
|
707 | 1 | $pValue = str_replace($k, $v, $pValue); |
|
708 | } |
||
709 | |||
710 | 1 | return $pValue; |
|
711 | } |
||
712 | |||
713 | /** |
||
714 | * Retrieve any leading numeric part of a string, or return the full string if no leading numeric |
||
715 | * (handles basic integer or float, but not exponent or non decimal). |
||
716 | * |
||
717 | * @param string $value |
||
718 | * |
||
719 | * @return mixed string or only the leading numeric part of the string |
||
720 | */ |
||
721 | 82 | public static function testStringAsNumeric($value) |
|
730 | } |
||
731 |
Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.
The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.
This check looks for comments that seem to be mostly valid code and reports them.