nystudio107 /
craft-seomatic
| 1 | <?php |
||||
| 2 | /** |
||||
| 3 | * SEOmatic plugin for Craft CMS |
||||
| 4 | * |
||||
| 5 | * A turnkey SEO implementation for Craft CMS that is comprehensive, powerful, |
||||
| 6 | * and flexible |
||||
| 7 | * |
||||
| 8 | * @link https://nystudio107.com |
||||
| 9 | * @copyright Copyright (c) 2017 nystudio107 |
||||
| 10 | */ |
||||
| 11 | |||||
| 12 | namespace nystudio107\seomatic\helpers; |
||||
| 13 | |||||
| 14 | use benf\neo\elements\Block as NeoBlock; |
||||
| 15 | use benf\neo\elements\db\BlockQuery as NeoBlockQuery; |
||||
| 16 | use craft\elements\db\EntryQuery; |
||||
| 17 | use craft\elements\db\TagQuery; |
||||
| 18 | use craft\elements\Entry; |
||||
|
0 ignored issues
–
show
|
|||||
| 19 | use craft\elements\Tag; |
||||
| 20 | use craft\helpers\HtmlPurifier; |
||||
| 21 | use craft\helpers\StringHelper; |
||||
| 22 | use Illuminate\Support\Collection; |
||||
| 23 | use nystudio107\seomatic\helpers\Field as FieldHelper; |
||||
| 24 | use nystudio107\seomatic\Seomatic; |
||||
| 25 | use PhpScience\TextRank\TextRankFacade; |
||||
| 26 | use PhpScience\TextRank\Tool\StopWords\StopWordsAbstract; |
||||
| 27 | use Stringy\Stringy; |
||||
| 28 | use verbb\doxter\Doxter; |
||||
| 29 | use verbb\doxter\fields\data\DoxterData; |
||||
| 30 | use yii\base\InvalidConfigException; |
||||
| 31 | use function array_slice; |
||||
| 32 | use function is_array; |
||||
| 33 | |||||
| 34 | /** |
||||
| 35 | * @author nystudio107 |
||||
| 36 | * @package Seomatic |
||||
| 37 | * @since 3.0.0 |
||||
| 38 | */ |
||||
| 39 | class Text |
||||
| 40 | { |
||||
| 41 | // Constants |
||||
| 42 | // ========================================================================= |
||||
| 43 | |||||
| 44 | public const LANGUAGE_MAP = [ |
||||
| 45 | 'en' => 'English', |
||||
| 46 | 'fr' => 'French', |
||||
| 47 | 'de' => 'German', |
||||
| 48 | 'it' => 'Italian', |
||||
| 49 | 'no' => 'Norwegian', |
||||
| 50 | 'es' => 'Spanish', |
||||
| 51 | ]; |
||||
| 52 | |||||
| 53 | // Public Static Methods |
||||
| 54 | // ========================================================================= |
||||
| 55 | |||||
| 56 | /** |
||||
| 57 | * Truncates the string to a given length. If $substring is provided, and |
||||
| 58 | * truncating occurs, the string is further truncated so that the substring |
||||
| 59 | * may be appended without exceeding the desired length. |
||||
| 60 | * |
||||
| 61 | * @param string $string The string to truncate |
||||
| 62 | * @param int $length Desired length of the truncated string |
||||
| 63 | * @param string $substring The substring to append if it can fit |
||||
| 64 | * |
||||
| 65 | * @return string with the resulting $str after truncating |
||||
| 66 | */ |
||||
| 67 | public static function truncate($string, $length, $substring = '…'): string |
||||
| 68 | { |
||||
| 69 | $result = $string; |
||||
| 70 | |||||
| 71 | if (!empty($string)) { |
||||
| 72 | $string = HtmlPurifier::process($string, ['HTML.Allowed' => '']); |
||||
| 73 | $string = html_entity_decode($string, ENT_NOQUOTES, 'UTF-8'); |
||||
| 74 | $result = (string)Stringy::create($string)->truncate($length, $substring); |
||||
| 75 | } |
||||
| 76 | |||||
| 77 | return $result; |
||||
| 78 | } |
||||
| 79 | |||||
| 80 | /** |
||||
| 81 | * Truncates the string to a given length, while ensuring that it does not |
||||
| 82 | * split words. If $substring is provided, and truncating occurs, the |
||||
| 83 | * string is further truncated so that the substring may be appended without |
||||
| 84 | * exceeding the desired length. |
||||
| 85 | * |
||||
| 86 | * @param string $string The string to truncate |
||||
| 87 | * @param int $length Desired length of the truncated string |
||||
| 88 | * @param string $substring The substring to append if it can fit |
||||
| 89 | * |
||||
| 90 | * @return string with the resulting $str after truncating |
||||
| 91 | */ |
||||
| 92 | public static function truncateOnWord($string, $length, $substring = '…'): string |
||||
| 93 | { |
||||
| 94 | $result = $string; |
||||
| 95 | |||||
| 96 | if (!empty($string)) { |
||||
| 97 | $string = HtmlPurifier::process($string, ['HTML.Allowed' => '']); |
||||
| 98 | $string = html_entity_decode($string, ENT_NOQUOTES, 'UTF-8'); |
||||
| 99 | $result = (string)Stringy::create($string)->safeTruncate($length, $substring); |
||||
| 100 | } |
||||
| 101 | |||||
| 102 | return $result; |
||||
| 103 | } |
||||
| 104 | |||||
| 105 | /** |
||||
| 106 | * Extract plain old text from a field |
||||
| 107 | * |
||||
| 108 | * @param $field |
||||
| 109 | * |
||||
| 110 | * @return string |
||||
| 111 | */ |
||||
| 112 | public static function extractTextFromField($field): string |
||||
| 113 | { |
||||
| 114 | if (empty($field)) { |
||||
| 115 | return ''; |
||||
| 116 | } |
||||
| 117 | if ($field instanceof EntryQuery |
||||
| 118 | || (self::isArrayLike($field) && $field[0] instanceof Entry)) { |
||||
| 119 | $result = self::extractTextFromMatrix($field); |
||||
| 120 | } elseif ($field instanceof NeoBlockQuery |
||||
| 121 | || (self::isArrayLike($field) && $field[0] instanceof NeoBlock)) { |
||||
| 122 | $result = self::extractTextFromNeo($field); |
||||
| 123 | } elseif ($field instanceof TagQuery |
||||
| 124 | || (self::isArrayLike($field) && $field[0] instanceof Tag)) { |
||||
| 125 | $result = self::extractTextFromTags($field); |
||||
| 126 | } elseif ($field instanceof DoxterData) { |
||||
| 127 | $result = self::smartStripTags(Doxter::$plugin->getService()->parseMarkdown($field->getRaw())); |
||||
|
0 ignored issues
–
show
The method
getService() does not exist on null.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces. This is most likely a typographical error or the method has been renamed. Loading history...
|
|||||
| 128 | } else { |
||||
| 129 | if (self::isArrayLike($field)) { |
||||
| 130 | $result = self::smartStripTags((string)$field[0]); |
||||
| 131 | } else { |
||||
| 132 | $result = self::smartStripTags((string)$field); |
||||
| 133 | } |
||||
| 134 | } |
||||
| 135 | |||||
| 136 | //return $result; |
||||
| 137 | return self::sanitizeUserInput($result); |
||||
| 138 | } |
||||
| 139 | |||||
| 140 | /** |
||||
| 141 | * Extract concatenated text from all of the tags in the $tagElement and |
||||
| 142 | * return as a comma-delimited string |
||||
| 143 | * |
||||
| 144 | * @param TagQuery|Tag[]|array $tags |
||||
| 145 | * |
||||
| 146 | * @return string |
||||
| 147 | */ |
||||
| 148 | public static function extractTextFromTags($tags): string |
||||
| 149 | { |
||||
| 150 | if (empty($tags)) { |
||||
| 151 | return ''; |
||||
| 152 | } |
||||
| 153 | $result = ''; |
||||
| 154 | // Iterate through all of the matrix blocks |
||||
| 155 | if ($tags instanceof TagQuery) { |
||||
| 156 | $tags = $tags->all(); |
||||
| 157 | } |
||||
| 158 | foreach ($tags as $tag) { |
||||
| 159 | $result .= $tag->title . ', '; |
||||
| 160 | } |
||||
| 161 | $result = rtrim($result, ', '); |
||||
| 162 | |||||
| 163 | return $result; |
||||
| 164 | } |
||||
| 165 | |||||
| 166 | /** |
||||
| 167 | * Extract text from all of the blocks in a matrix field, concatenating it |
||||
| 168 | * together. |
||||
| 169 | * |
||||
| 170 | * @param EntryQuery|Entry[]|array $blocks |
||||
| 171 | * @param string $fieldHandle |
||||
| 172 | * |
||||
| 173 | * @return string |
||||
| 174 | */ |
||||
| 175 | public static function extractTextFromMatrix($blocks, $fieldHandle = ''): string |
||||
| 176 | { |
||||
| 177 | if (empty($blocks)) { |
||||
| 178 | return ''; |
||||
| 179 | } |
||||
| 180 | $result = ''; |
||||
| 181 | // Iterate through all of the matrix blocks |
||||
| 182 | if ($blocks instanceof EntryQuery) { |
||||
| 183 | $blocks = $blocks->all(); |
||||
| 184 | } |
||||
| 185 | foreach ($blocks as $block) { |
||||
| 186 | try { |
||||
| 187 | $matrixEntryTypeModel = $block->getType(); |
||||
| 188 | } catch (InvalidConfigException $e) { |
||||
| 189 | $matrixEntryTypeModel = null; |
||||
| 190 | } |
||||
| 191 | // Find any text fields inside of the matrix block |
||||
| 192 | if ($matrixEntryTypeModel) { |
||||
| 193 | $fieldClasses = FieldHelper::FIELD_CLASSES[FieldHelper::TEXT_FIELD_CLASS_KEY]; |
||||
| 194 | $fields = $matrixEntryTypeModel->getCustomFields(); |
||||
| 195 | |||||
| 196 | foreach ($fields as $field) { |
||||
| 197 | /** @var array $fieldClasses */ |
||||
| 198 | foreach ($fieldClasses as $fieldClassKey) { |
||||
| 199 | if ($field instanceof $fieldClassKey) { |
||||
| 200 | if ($field->handle === $fieldHandle || empty($fieldHandle)) { |
||||
| 201 | $result .= self::extractTextFromField($block[$field->handle]) . ' '; |
||||
| 202 | } |
||||
| 203 | } |
||||
| 204 | } |
||||
| 205 | } |
||||
| 206 | } |
||||
| 207 | } |
||||
| 208 | |||||
| 209 | return $result; |
||||
| 210 | } |
||||
| 211 | |||||
| 212 | /** |
||||
| 213 | * Extract text from all of the blocks in a Neo field, concatenating it |
||||
| 214 | * together. |
||||
| 215 | * |
||||
| 216 | * @param NeoBlockQuery|NeoBlock[]|array $blocks |
||||
| 217 | * @param string $fieldHandle |
||||
| 218 | * |
||||
| 219 | * @return string |
||||
| 220 | */ |
||||
| 221 | public static function extractTextFromNeo($blocks, $fieldHandle = ''): string |
||||
| 222 | { |
||||
| 223 | if (empty($blocks)) { |
||||
| 224 | return ''; |
||||
| 225 | } |
||||
| 226 | $result = ''; |
||||
| 227 | // Iterate through all of the matrix blocks |
||||
| 228 | if ($blocks instanceof NeoBlockQuery) { |
||||
| 229 | $blocks = $blocks->all(); |
||||
| 230 | } |
||||
| 231 | foreach ($blocks as $block) { |
||||
| 232 | $layout = $block->getFieldLayout(); |
||||
| 233 | // Find any text fields inside of the neo block |
||||
| 234 | if ($layout) { |
||||
| 235 | $fieldClasses = FieldHelper::FIELD_CLASSES[FieldHelper::TEXT_FIELD_CLASS_KEY]; |
||||
| 236 | $fieldElements = $layout->getCustomFieldElements(); |
||||
| 237 | foreach ($fieldElements as $fieldElement) { |
||||
| 238 | $field = $fieldElement->getField(); |
||||
| 239 | /** @var array $fieldClasses */ |
||||
| 240 | foreach ($fieldClasses as $fieldClassKey) { |
||||
| 241 | if ($field instanceof $fieldClassKey) { |
||||
| 242 | if ($field->handle === $fieldHandle || empty($fieldHandle)) { |
||||
| 243 | $result .= self::extractTextFromField($block[$field->handle]) . ' '; |
||||
| 244 | } |
||||
| 245 | } |
||||
| 246 | } |
||||
| 247 | } |
||||
| 248 | } |
||||
| 249 | } |
||||
| 250 | |||||
| 251 | return $result; |
||||
| 252 | } |
||||
| 253 | |||||
| 254 | /** |
||||
| 255 | * Return the most important keywords extracted from the text as a comma- |
||||
| 256 | * delimited string |
||||
| 257 | * |
||||
| 258 | * @param string $text |
||||
| 259 | * @param int $limit |
||||
| 260 | * @param bool $useStopWords |
||||
| 261 | * |
||||
| 262 | * @return string |
||||
| 263 | */ |
||||
| 264 | public static function extractKeywords($text, $limit = 15, $useStopWords = true): string |
||||
| 265 | { |
||||
| 266 | if (empty($text)) { |
||||
| 267 | return ''; |
||||
| 268 | } |
||||
| 269 | $api = new TextRankFacade(); |
||||
| 270 | // Set the stop words that should be ignored |
||||
| 271 | if ($useStopWords) { |
||||
| 272 | $language = strtolower(substr(Seomatic::$language, 0, 2)); |
||||
| 273 | $stopWords = self::stopWordsForLanguage($language); |
||||
| 274 | if ($stopWords !== null) { |
||||
| 275 | $api->setStopWords($stopWords); |
||||
| 276 | } |
||||
| 277 | } |
||||
| 278 | // Array of the most important keywords: |
||||
| 279 | $keywords = $api->getOnlyKeyWords(self::cleanupText($text)); |
||||
| 280 | |||||
| 281 | // If it's empty, just return the text |
||||
| 282 | if (empty($keywords)) { |
||||
| 283 | return $text; |
||||
| 284 | } |
||||
| 285 | |||||
| 286 | $result = implode(', ', array_slice(array_keys($keywords), 0, $limit)); |
||||
| 287 | |||||
| 288 | return self::sanitizeUserInput($result); |
||||
| 289 | } |
||||
| 290 | |||||
| 291 | /** |
||||
| 292 | * Extract a summary consisting of the 3 most important sentences from the |
||||
| 293 | * text |
||||
| 294 | * |
||||
| 295 | * @param string $text |
||||
| 296 | * @param bool $useStopWords |
||||
| 297 | * |
||||
| 298 | * @return string |
||||
| 299 | */ |
||||
| 300 | public static function extractSummary($text, $useStopWords = true): string |
||||
| 301 | { |
||||
| 302 | if (empty($text)) { |
||||
| 303 | return ''; |
||||
| 304 | } |
||||
| 305 | $api = new TextRankFacade(); |
||||
| 306 | // Set the stop words that should be ignored |
||||
| 307 | if ($useStopWords) { |
||||
| 308 | $language = strtolower(substr(Seomatic::$language, 0, 2)); |
||||
| 309 | $stopWords = self::stopWordsForLanguage($language); |
||||
| 310 | if ($stopWords !== null) { |
||||
| 311 | $api->setStopWords($stopWords); |
||||
| 312 | } |
||||
| 313 | } |
||||
| 314 | // Array of the most important keywords: |
||||
| 315 | $sentences = $api->getHighlights(self::cleanupText($text)); |
||||
| 316 | |||||
| 317 | // If it's empty, just return the text |
||||
| 318 | if (empty($sentences)) { |
||||
| 319 | return $text; |
||||
| 320 | } |
||||
| 321 | |||||
| 322 | $result = implode(' ', $sentences); |
||||
| 323 | |||||
| 324 | return self::sanitizeUserInput($result); |
||||
| 325 | } |
||||
| 326 | |||||
| 327 | |||||
| 328 | /** |
||||
| 329 | * Sanitize user input by decoding any HTML Entities, URL decoding the text, |
||||
| 330 | * then removing any newlines, stripping tags, stripping Twig tags, and changing |
||||
| 331 | * single {}'s into ()'s |
||||
| 332 | * |
||||
| 333 | * @param $str |
||||
| 334 | * @return string |
||||
| 335 | */ |
||||
| 336 | public static function sanitizeUserInput($str): string |
||||
| 337 | { |
||||
| 338 | // Do some general cleanup |
||||
| 339 | $str = html_entity_decode($str, ENT_NOQUOTES, 'UTF-8'); |
||||
| 340 | $str = rawurldecode($str); |
||||
| 341 | // Remove any linebreaks |
||||
| 342 | $str = (string)preg_replace("/\r|\n/", "", $str); |
||||
| 343 | $str = HtmlPurifier::process($str, ['HTML.Allowed' => '']); |
||||
| 344 | $str = html_entity_decode($str, ENT_NOQUOTES, 'UTF-8'); |
||||
| 345 | // Remove any embedded Twig code |
||||
| 346 | $str = preg_replace('/{{.*?}}/', '', $str); |
||||
| 347 | $str = preg_replace('/{%.*?%}/', '', $str); |
||||
| 348 | // Change single brackets to parenthesis |
||||
| 349 | $str = preg_replace('/{/', '(', $str); |
||||
| 350 | $str = preg_replace('/}/', ')', $str); |
||||
| 351 | if (empty($str)) { |
||||
| 352 | $str = ''; |
||||
| 353 | } |
||||
| 354 | |||||
| 355 | return $str; |
||||
| 356 | } |
||||
| 357 | |||||
| 358 | /** |
||||
| 359 | * Strip HTML tags, but replace them with a space rather than just eliminating them |
||||
| 360 | * |
||||
| 361 | * @param $str |
||||
| 362 | * @return string |
||||
| 363 | */ |
||||
| 364 | public static function smartStripTags($str) |
||||
| 365 | { |
||||
| 366 | $str = str_replace('<', ' <', $str); |
||||
| 367 | $str = HtmlPurifier::process($str, ['HTML.Allowed' => '']); |
||||
| 368 | $str = html_entity_decode($str, ENT_NOQUOTES, 'UTF-8'); |
||||
| 369 | $str = str_replace(' ', ' ', $str); |
||||
| 370 | |||||
| 371 | return $str; |
||||
| 372 | } |
||||
| 373 | |||||
| 374 | /** |
||||
| 375 | * Clean up the passed in text by converting it to UTF-8, stripping tags, |
||||
| 376 | * removing whitespace, and decoding HTML entities |
||||
| 377 | * |
||||
| 378 | * @param string $text |
||||
| 379 | * |
||||
| 380 | * @return string |
||||
| 381 | */ |
||||
| 382 | public static function cleanupText($text): string |
||||
| 383 | { |
||||
| 384 | if (empty($text)) { |
||||
| 385 | return ''; |
||||
| 386 | } |
||||
| 387 | // Convert to UTF-8 |
||||
| 388 | $text = StringHelper::convertToUtf8($text); |
||||
| 389 | // Strip HTML tags |
||||
| 390 | $text = HtmlPurifier::process($text, ['HTML.Allowed' => '']); |
||||
| 391 | $text = html_entity_decode($text, ENT_NOQUOTES, 'UTF-8'); |
||||
| 392 | // Remove excess whitespace |
||||
| 393 | $text = preg_replace('/\s{2,}/u', ' ', $text); |
||||
| 394 | // Decode any HTML entities |
||||
| 395 | $text = html_entity_decode($text); |
||||
| 396 | |||||
| 397 | return $text; |
||||
| 398 | } |
||||
| 399 | |||||
| 400 | /** |
||||
| 401 | * Is $var an array or array-like object? |
||||
| 402 | * |
||||
| 403 | * @param $var |
||||
| 404 | * @return bool |
||||
| 405 | */ |
||||
| 406 | public static function isArrayLike($var): bool |
||||
| 407 | { |
||||
| 408 | return is_array($var) || ($var instanceof Collection); |
||||
| 409 | } |
||||
| 410 | |||||
| 411 | // Protected Static Methods |
||||
| 412 | // ========================================================================= |
||||
| 413 | |||||
| 414 | /** |
||||
| 415 | * @param string $language |
||||
| 416 | * |
||||
| 417 | * @return null|StopWordsAbstract |
||||
| 418 | */ |
||||
| 419 | protected static function stopWordsForLanguage(string $language) |
||||
| 420 | { |
||||
| 421 | $stopWords = null; |
||||
| 422 | if (!empty(self::LANGUAGE_MAP[$language])) { |
||||
| 423 | $language = self::LANGUAGE_MAP[$language]; |
||||
| 424 | } else { |
||||
| 425 | $language = 'English'; |
||||
| 426 | } |
||||
| 427 | |||||
| 428 | $className = 'PhpScience\\TextRank\\Tool\\StopWords\\' . ucfirst($language); |
||||
| 429 | if (class_exists($className)) { |
||||
| 430 | $stopWords = new $className(); |
||||
| 431 | } |
||||
| 432 | |||||
| 433 | return $stopWords; |
||||
| 434 | } |
||||
| 435 | } |
||||
| 436 |
The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g.
excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths