| Total Complexity | 62 |
| Total Lines | 395 |
| Duplicated Lines | 0 % |
| Changes | 3 | ||
| Bugs | 1 | Features | 0 |
Complex classes like Text often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Text, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 39 | class Text |
||
| 40 | { |
||
| 41 | // Constants |
||
| 42 | // ========================================================================= |
||
| 43 | |||
| 44 | public const LANGUAGE_MAP = [ |
||
| 45 | 'en' => 'English', |
||
| 46 | 'fr' => 'French', |
||
| 47 | 'de' => 'German', |
||
| 48 | 'it' => 'Italian', |
||
| 49 | 'no' => 'Norwegian', |
||
| 50 | 'es' => 'Spanish', |
||
| 51 | ]; |
||
| 52 | |||
| 53 | // Public Static Methods |
||
| 54 | // ========================================================================= |
||
| 55 | |||
| 56 | /** |
||
| 57 | * Truncates the string to a given length. If $substring is provided, and |
||
| 58 | * truncating occurs, the string is further truncated so that the substring |
||
| 59 | * may be appended without exceeding the desired length. |
||
| 60 | * |
||
| 61 | * @param string $string The string to truncate |
||
| 62 | * @param int $length Desired length of the truncated string |
||
| 63 | * @param string $substring The substring to append if it can fit |
||
| 64 | * |
||
| 65 | * @return string with the resulting $str after truncating |
||
| 66 | */ |
||
| 67 | public static function truncate($string, $length, $substring = '…'): string |
||
| 68 | { |
||
| 69 | $result = $string; |
||
| 70 | |||
| 71 | if (!empty($string)) { |
||
| 72 | $string = HtmlPurifier::process($string, ['HTML.Allowed' => '']); |
||
| 73 | $string = html_entity_decode($string, ENT_NOQUOTES, 'UTF-8'); |
||
| 74 | $result = (string)Stringy::create($string)->truncate($length, $substring); |
||
| 75 | } |
||
| 76 | |||
| 77 | return $result; |
||
| 78 | } |
||
| 79 | |||
| 80 | /** |
||
| 81 | * Truncates the string to a given length, while ensuring that it does not |
||
| 82 | * split words. If $substring is provided, and truncating occurs, the |
||
| 83 | * string is further truncated so that the substring may be appended without |
||
| 84 | * exceeding the desired length. |
||
| 85 | * |
||
| 86 | * @param string $string The string to truncate |
||
| 87 | * @param int $length Desired length of the truncated string |
||
| 88 | * @param string $substring The substring to append if it can fit |
||
| 89 | * |
||
| 90 | * @return string with the resulting $str after truncating |
||
| 91 | */ |
||
| 92 | public static function truncateOnWord($string, $length, $substring = '…'): string |
||
| 93 | { |
||
| 94 | $result = $string; |
||
| 95 | |||
| 96 | if (!empty($string)) { |
||
| 97 | $string = HtmlPurifier::process($string, ['HTML.Allowed' => '']); |
||
| 98 | $string = html_entity_decode($string, ENT_NOQUOTES, 'UTF-8'); |
||
| 99 | $result = (string)Stringy::create($string)->safeTruncate($length, $substring); |
||
| 100 | } |
||
| 101 | |||
| 102 | return $result; |
||
| 103 | } |
||
| 104 | |||
| 105 | /** |
||
| 106 | * Extract plain old text from a field |
||
| 107 | * |
||
| 108 | * @param $field |
||
| 109 | * |
||
| 110 | * @return string |
||
| 111 | */ |
||
| 112 | public static function extractTextFromField($field): string |
||
| 113 | { |
||
| 114 | if (empty($field)) { |
||
| 115 | return ''; |
||
| 116 | } |
||
| 117 | if ($field instanceof EntryQuery |
||
| 118 | || (self::isArrayLike($field) && $field[0] instanceof Entry)) { |
||
| 119 | $result = self::extractTextFromMatrix($field); |
||
| 120 | } elseif ($field instanceof NeoBlockQuery |
||
| 121 | || (self::isArrayLike($field) && $field[0] instanceof NeoBlock)) { |
||
| 122 | $result = self::extractTextFromNeo($field); |
||
| 123 | } elseif ($field instanceof TagQuery |
||
| 124 | || (self::isArrayLike($field) && $field[0] instanceof Tag)) { |
||
| 125 | $result = self::extractTextFromTags($field); |
||
| 126 | } elseif ($field instanceof DoxterData) { |
||
| 127 | $result = self::smartStripTags(Doxter::$plugin->getService()->parseMarkdown($field->getRaw())); |
||
| 128 | } else { |
||
| 129 | if (self::isArrayLike($field)) { |
||
| 130 | $result = self::smartStripTags((string)$field[0]); |
||
| 131 | } else { |
||
| 132 | $result = self::smartStripTags((string)$field); |
||
| 133 | } |
||
| 134 | } |
||
| 135 | |||
| 136 | //return $result; |
||
| 137 | return self::sanitizeUserInput($result); |
||
| 138 | } |
||
| 139 | |||
| 140 | /** |
||
| 141 | * Extract concatenated text from all of the tags in the $tagElement and |
||
| 142 | * return as a comma-delimited string |
||
| 143 | * |
||
| 144 | * @param TagQuery|Tag[]|array $tags |
||
| 145 | * |
||
| 146 | * @return string |
||
| 147 | */ |
||
| 148 | public static function extractTextFromTags($tags): string |
||
| 164 | } |
||
| 165 | |||
| 166 | /** |
||
| 167 | * Extract text from all of the blocks in a matrix field, concatenating it |
||
| 168 | * together. |
||
| 169 | * |
||
| 170 | * @param EntryQuery|Entry[]|array $blocks |
||
| 171 | * @param string $fieldHandle |
||
| 172 | * |
||
| 173 | * @return string |
||
| 174 | */ |
||
| 175 | public static function extractTextFromMatrix($blocks, $fieldHandle = ''): string |
||
| 176 | { |
||
| 177 | if (empty($blocks)) { |
||
| 178 | return ''; |
||
| 179 | } |
||
| 180 | $result = ''; |
||
| 181 | // Iterate through all of the matrix blocks |
||
| 182 | if ($blocks instanceof EntryQuery) { |
||
| 183 | $blocks = $blocks->all(); |
||
| 184 | } |
||
| 185 | foreach ($blocks as $block) { |
||
| 186 | try { |
||
| 187 | $matrixEntryTypeModel = $block->getType(); |
||
| 188 | } catch (InvalidConfigException $e) { |
||
| 189 | $matrixEntryTypeModel = null; |
||
| 190 | } |
||
| 191 | // Find any text fields inside of the matrix block |
||
| 192 | if ($matrixEntryTypeModel) { |
||
| 193 | $fieldClasses = FieldHelper::FIELD_CLASSES[FieldHelper::TEXT_FIELD_CLASS_KEY]; |
||
| 194 | $fields = $matrixEntryTypeModel->getCustomFields(); |
||
| 195 | |||
| 196 | foreach ($fields as $field) { |
||
| 197 | /** @var array $fieldClasses */ |
||
| 198 | foreach ($fieldClasses as $fieldClassKey) { |
||
| 199 | if ($field instanceof $fieldClassKey) { |
||
| 200 | if ($field->handle === $fieldHandle || empty($fieldHandle)) { |
||
| 201 | $result .= self::extractTextFromField($block[$field->handle]) . ' '; |
||
| 202 | } |
||
| 203 | } |
||
| 204 | } |
||
| 205 | } |
||
| 206 | } |
||
| 207 | } |
||
| 208 | |||
| 209 | return $result; |
||
| 210 | } |
||
| 211 | |||
| 212 | /** |
||
| 213 | * Extract text from all of the blocks in a Neo field, concatenating it |
||
| 214 | * together. |
||
| 215 | * |
||
| 216 | * @param NeoBlockQuery|NeoBlock[]|array $blocks |
||
| 217 | * @param string $fieldHandle |
||
| 218 | * |
||
| 219 | * @return string |
||
| 220 | */ |
||
| 221 | public static function extractTextFromNeo($blocks, $fieldHandle = ''): string |
||
| 222 | { |
||
| 223 | if (empty($blocks)) { |
||
| 224 | return ''; |
||
| 225 | } |
||
| 226 | $result = ''; |
||
| 227 | // Iterate through all of the matrix blocks |
||
| 228 | if ($blocks instanceof NeoBlockQuery) { |
||
| 229 | $blocks = $blocks->all(); |
||
| 230 | } |
||
| 231 | foreach ($blocks as $block) { |
||
| 232 | $layout = $block->getFieldLayout(); |
||
| 233 | // Find any text fields inside of the neo block |
||
| 234 | if ($layout) { |
||
| 235 | $fieldClasses = FieldHelper::FIELD_CLASSES[FieldHelper::TEXT_FIELD_CLASS_KEY]; |
||
| 236 | $fieldElements = $layout->getCustomFieldElements(); |
||
| 237 | foreach ($fieldElements as $fieldElement) { |
||
| 238 | $field = $fieldElement->getField(); |
||
| 239 | /** @var array $fieldClasses */ |
||
| 240 | foreach ($fieldClasses as $fieldClassKey) { |
||
| 241 | if ($field instanceof $fieldClassKey) { |
||
| 242 | if ($field->handle === $fieldHandle || empty($fieldHandle)) { |
||
| 243 | $result .= self::extractTextFromField($block[$field->handle]) . ' '; |
||
| 244 | } |
||
| 245 | } |
||
| 246 | } |
||
| 247 | } |
||
| 248 | } |
||
| 249 | } |
||
| 250 | |||
| 251 | return $result; |
||
| 252 | } |
||
| 253 | |||
| 254 | /** |
||
| 255 | * Return the most important keywords extracted from the text as a comma- |
||
| 256 | * delimited string |
||
| 257 | * |
||
| 258 | * @param string $text |
||
| 259 | * @param int $limit |
||
| 260 | * @param bool $useStopWords |
||
| 261 | * |
||
| 262 | * @return string |
||
| 263 | */ |
||
| 264 | public static function extractKeywords($text, $limit = 15, $useStopWords = true): string |
||
| 265 | { |
||
| 266 | if (empty($text)) { |
||
| 267 | return ''; |
||
| 268 | } |
||
| 269 | $api = new TextRankFacade(); |
||
| 270 | // Set the stop words that should be ignored |
||
| 271 | if ($useStopWords) { |
||
| 272 | $language = strtolower(substr(Seomatic::$language, 0, 2)); |
||
| 273 | $stopWords = self::stopWordsForLanguage($language); |
||
| 274 | if ($stopWords !== null) { |
||
| 275 | $api->setStopWords($stopWords); |
||
| 276 | } |
||
| 277 | } |
||
| 278 | // Array of the most important keywords: |
||
| 279 | $keywords = $api->getOnlyKeyWords(self::cleanupText($text)); |
||
| 280 | |||
| 281 | // If it's empty, just return the text |
||
| 282 | if (empty($keywords)) { |
||
| 283 | return $text; |
||
| 284 | } |
||
| 285 | |||
| 286 | $result = implode(', ', array_slice(array_keys($keywords), 0, $limit)); |
||
| 287 | |||
| 288 | return self::sanitizeUserInput($result); |
||
| 289 | } |
||
| 290 | |||
| 291 | /** |
||
| 292 | * Extract a summary consisting of the 3 most important sentences from the |
||
| 293 | * text |
||
| 294 | * |
||
| 295 | * @param string $text |
||
| 296 | * @param bool $useStopWords |
||
| 297 | * |
||
| 298 | * @return string |
||
| 299 | */ |
||
| 300 | public static function extractSummary($text, $useStopWords = true): string |
||
| 301 | { |
||
| 302 | if (empty($text)) { |
||
| 303 | return ''; |
||
| 304 | } |
||
| 305 | $api = new TextRankFacade(); |
||
| 306 | // Set the stop words that should be ignored |
||
| 307 | if ($useStopWords) { |
||
| 308 | $language = strtolower(substr(Seomatic::$language, 0, 2)); |
||
| 309 | $stopWords = self::stopWordsForLanguage($language); |
||
| 310 | if ($stopWords !== null) { |
||
| 311 | $api->setStopWords($stopWords); |
||
| 312 | } |
||
| 313 | } |
||
| 314 | // Array of the most important keywords: |
||
| 315 | $sentences = $api->getHighlights(self::cleanupText($text)); |
||
| 316 | |||
| 317 | // If it's empty, just return the text |
||
| 318 | if (empty($sentences)) { |
||
| 319 | return $text; |
||
| 320 | } |
||
| 321 | |||
| 322 | $result = implode(' ', $sentences); |
||
| 323 | |||
| 324 | return self::sanitizeUserInput($result); |
||
| 325 | } |
||
| 326 | |||
| 327 | |||
| 328 | /** |
||
| 329 | * Sanitize user input by decoding any HTML Entities, URL decoding the text, |
||
| 330 | * then removing any newlines, stripping tags, stripping Twig tags, and changing |
||
| 331 | * single {}'s into ()'s |
||
| 332 | * |
||
| 333 | * @param $str |
||
| 334 | * @return string |
||
| 335 | */ |
||
| 336 | public static function sanitizeUserInput($str): string |
||
| 337 | { |
||
| 338 | // Do some general cleanup |
||
| 339 | $str = html_entity_decode($str, ENT_NOQUOTES, 'UTF-8'); |
||
| 340 | $str = rawurldecode($str); |
||
| 341 | // Remove any linebreaks |
||
| 342 | $str = (string)preg_replace("/\r|\n/", "", $str); |
||
| 343 | $str = HtmlPurifier::process($str, ['HTML.Allowed' => '']); |
||
| 344 | $str = html_entity_decode($str, ENT_NOQUOTES, 'UTF-8'); |
||
| 345 | // Remove any embedded Twig code |
||
| 346 | $str = preg_replace('/{{.*?}}/', '', $str); |
||
| 347 | $str = preg_replace('/{%.*?%}/', '', $str); |
||
| 348 | // Change single brackets to parenthesis |
||
| 349 | $str = preg_replace('/{/', '(', $str); |
||
| 350 | $str = preg_replace('/}/', ')', $str); |
||
| 351 | if (empty($str)) { |
||
| 352 | $str = ''; |
||
| 353 | } |
||
| 354 | |||
| 355 | return $str; |
||
| 356 | } |
||
| 357 | |||
| 358 | /** |
||
| 359 | * Strip HTML tags, but replace them with a space rather than just eliminating them |
||
| 360 | * |
||
| 361 | * @param $str |
||
| 362 | * @return string |
||
| 363 | */ |
||
| 364 | public static function smartStripTags($str) |
||
| 372 | } |
||
| 373 | |||
| 374 | /** |
||
| 375 | * Clean up the passed in text by converting it to UTF-8, stripping tags, |
||
| 376 | * removing whitespace, and decoding HTML entities |
||
| 377 | * |
||
| 378 | * @param string $text |
||
| 379 | * |
||
| 380 | * @return string |
||
| 381 | */ |
||
| 382 | public static function cleanupText($text): string |
||
| 383 | { |
||
| 384 | if (empty($text)) { |
||
| 385 | return ''; |
||
| 386 | } |
||
| 387 | // Convert to UTF-8 |
||
| 388 | $text = StringHelper::convertToUtf8($text); |
||
| 389 | // Strip HTML tags |
||
| 390 | $text = HtmlPurifier::process($text, ['HTML.Allowed' => '']); |
||
| 391 | $text = html_entity_decode($text, ENT_NOQUOTES, 'UTF-8'); |
||
| 392 | // Remove excess whitespace |
||
| 393 | $text = preg_replace('/\s{2,}/u', ' ', $text); |
||
| 394 | // Decode any HTML entities |
||
| 395 | $text = html_entity_decode($text); |
||
| 396 | |||
| 397 | return $text; |
||
| 398 | } |
||
| 399 | |||
| 400 | /** |
||
| 401 | * Is $var an array or array-like object? |
||
| 402 | * |
||
| 403 | * @param $var |
||
| 404 | * @return bool |
||
| 405 | */ |
||
| 406 | public static function isArrayLike($var): bool |
||
| 407 | { |
||
| 408 | return is_array($var) || ($var instanceof Collection); |
||
| 409 | } |
||
| 410 | |||
| 411 | // Protected Static Methods |
||
| 412 | // ========================================================================= |
||
| 413 | |||
| 414 | /** |
||
| 415 | * @param string $language |
||
| 416 | * |
||
| 417 | * @return null|StopWordsAbstract |
||
| 418 | */ |
||
| 419 | protected static function stopWordsForLanguage(string $language) |
||
| 434 | } |
||
| 435 | } |
||
| 436 |
The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g.
excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths