1 | <?php |
||||
2 | /** |
||||
3 | * SEOmatic plugin for Craft CMS |
||||
4 | * |
||||
5 | * A turnkey SEO implementation for Craft CMS that is comprehensive, powerful, |
||||
6 | * and flexible |
||||
7 | * |
||||
8 | * @link https://nystudio107.com |
||||
9 | * @copyright Copyright (c) 2017 nystudio107 |
||||
10 | */ |
||||
11 | |||||
12 | namespace nystudio107\seomatic\helpers; |
||||
13 | |||||
14 | use benf\neo\elements\Block as NeoBlock; |
||||
15 | use benf\neo\elements\db\BlockQuery as NeoBlockQuery; |
||||
16 | use craft\elements\db\EntryQuery; |
||||
17 | use craft\elements\db\TagQuery; |
||||
18 | use craft\elements\Entry; |
||||
19 | use craft\elements\Tag; |
||||
20 | use craft\helpers\HtmlPurifier; |
||||
21 | use Illuminate\Support\Collection; |
||||
22 | use nystudio107\seomatic\helpers\Field as FieldHelper; |
||||
23 | use nystudio107\seomatic\Seomatic; |
||||
24 | use PhpScience\TextRank\TextRankFacade; |
||||
25 | use PhpScience\TextRank\Tool\StopWords\StopWordsAbstract; |
||||
26 | use Stringy\Stringy; |
||||
27 | use verbb\doxter\Doxter; |
||||
28 | use verbb\doxter\fields\data\DoxterData; |
||||
29 | use yii\base\InvalidConfigException; |
||||
30 | use function array_slice; |
||||
31 | use function function_exists; |
||||
32 | use function is_array; |
||||
33 | |||||
34 | /** |
||||
35 | * @author nystudio107 |
||||
36 | * @package Seomatic |
||||
37 | * @since 3.0.0 |
||||
38 | */ |
||||
39 | class Text |
||||
40 | { |
||||
41 | // Constants |
||||
42 | // ========================================================================= |
||||
43 | |||||
44 | public const LANGUAGE_MAP = [ |
||||
45 | 'en' => 'English', |
||||
46 | 'fr' => 'French', |
||||
47 | 'de' => 'German', |
||||
48 | 'it' => 'Italian', |
||||
49 | 'no' => 'Norwegian', |
||||
50 | 'es' => 'Spanish', |
||||
51 | ]; |
||||
52 | |||||
53 | // Public Static Methods |
||||
54 | // ========================================================================= |
||||
55 | |||||
56 | /** |
||||
57 | * Truncates the string to a given length. If $substring is provided, and |
||||
58 | * truncating occurs, the string is further truncated so that the substring |
||||
59 | * may be appended without exceeding the desired length. |
||||
60 | * |
||||
61 | * @param string $string The string to truncate |
||||
62 | * @param int $length Desired length of the truncated string |
||||
63 | * @param string $substring The substring to append if it can fit |
||||
64 | * |
||||
65 | * @return string with the resulting $str after truncating |
||||
66 | */ |
||||
67 | public static function truncate($string, $length, $substring = '…'): string |
||||
68 | { |
||||
69 | $result = $string; |
||||
70 | |||||
71 | if (!empty($string)) { |
||||
72 | $string = HtmlPurifier::process($string, ['HTML.Allowed' => '']); |
||||
73 | $string = html_entity_decode($string, ENT_NOQUOTES, 'UTF-8'); |
||||
74 | $result = (string)Stringy::create($string)->truncate($length, $substring); |
||||
75 | } |
||||
76 | |||||
77 | return $result; |
||||
78 | } |
||||
79 | |||||
80 | /** |
||||
81 | * Truncates the string to a given length, while ensuring that it does not |
||||
82 | * split words. If $substring is provided, and truncating occurs, the |
||||
83 | * string is further truncated so that the substring may be appended without |
||||
84 | * exceeding the desired length. |
||||
85 | * |
||||
86 | * @param string $string The string to truncate |
||||
87 | * @param int $length Desired length of the truncated string |
||||
88 | * @param string $substring The substring to append if it can fit |
||||
89 | * |
||||
90 | * @return string with the resulting $str after truncating |
||||
91 | */ |
||||
92 | public static function truncateOnWord($string, $length, $substring = '…'): string |
||||
93 | { |
||||
94 | $result = $string; |
||||
95 | |||||
96 | if (!empty($string)) { |
||||
97 | $string = HtmlPurifier::process($string, ['HTML.Allowed' => '']); |
||||
98 | $string = html_entity_decode($string, ENT_NOQUOTES, 'UTF-8'); |
||||
99 | $result = (string)Stringy::create($string)->safeTruncate($length, $substring); |
||||
100 | } |
||||
101 | |||||
102 | return $result; |
||||
103 | } |
||||
104 | |||||
105 | /** |
||||
106 | * Extract plain old text from a field |
||||
107 | * |
||||
108 | * @param $field |
||||
109 | * |
||||
110 | * @return string |
||||
111 | */ |
||||
112 | public static function extractTextFromField($field): string |
||||
113 | { |
||||
114 | if (empty($field)) { |
||||
115 | return ''; |
||||
116 | } |
||||
117 | if ($field instanceof EntryQuery |
||||
118 | || (self::isArrayLike($field) && $field[0] instanceof Entry)) { |
||||
119 | $result = self::extractTextFromMatrix($field); |
||||
120 | } elseif ($field instanceof NeoBlockQuery |
||||
121 | || (self::isArrayLike($field) && $field[0] instanceof NeoBlock)) { |
||||
122 | $result = self::extractTextFromNeo($field); |
||||
123 | } elseif ($field instanceof TagQuery |
||||
124 | || (self::isArrayLike($field) && $field[0] instanceof Tag)) { |
||||
125 | $result = self::extractTextFromTags($field); |
||||
126 | } elseif ($field instanceof DoxterData) { |
||||
127 | $result = self::smartStripTags(Doxter::$plugin->getService()->parseMarkdown($field->getRaw())); |
||||
0 ignored issues
–
show
|
|||||
128 | } else { |
||||
129 | if (self::isArrayLike($field)) { |
||||
130 | $result = self::smartStripTags((string)$field[0]); |
||||
131 | } else { |
||||
132 | $result = self::smartStripTags((string)$field); |
||||
133 | } |
||||
134 | } |
||||
135 | |||||
136 | //return $result; |
||||
137 | return self::sanitizeUserInput($result); |
||||
138 | } |
||||
139 | |||||
140 | /** |
||||
141 | * Extract concatenated text from all of the tags in the $tagElement and |
||||
142 | * return as a comma-delimited string |
||||
143 | * |
||||
144 | * @param TagQuery|Tag[]|array $tags |
||||
145 | * |
||||
146 | * @return string |
||||
147 | */ |
||||
148 | public static function extractTextFromTags($tags): string |
||||
149 | { |
||||
150 | if (empty($tags)) { |
||||
151 | return ''; |
||||
152 | } |
||||
153 | $result = ''; |
||||
154 | // Iterate through all of the matrix blocks |
||||
155 | if ($tags instanceof TagQuery) { |
||||
156 | $tags = $tags->all(); |
||||
157 | } |
||||
158 | foreach ($tags as $tag) { |
||||
159 | $result .= $tag->title . ', '; |
||||
160 | } |
||||
161 | $result = rtrim($result, ', '); |
||||
162 | |||||
163 | return $result; |
||||
164 | } |
||||
165 | |||||
166 | /** |
||||
167 | * Extract text from all of the blocks in a matrix field, concatenating it |
||||
168 | * together. |
||||
169 | * |
||||
170 | * @param EntryQuery|Entry[]|array $blocks |
||||
171 | * @param string $fieldHandle |
||||
172 | * |
||||
173 | * @return string |
||||
174 | */ |
||||
175 | public static function extractTextFromMatrix($blocks, $fieldHandle = ''): string |
||||
176 | { |
||||
177 | if (empty($blocks)) { |
||||
178 | return ''; |
||||
179 | } |
||||
180 | $result = ''; |
||||
181 | // Iterate through all of the matrix blocks |
||||
182 | if ($blocks instanceof EntryQuery) { |
||||
183 | $blocks = $blocks->all(); |
||||
184 | } |
||||
185 | foreach ($blocks as $block) { |
||||
186 | try { |
||||
187 | $matrixEntryTypeModel = $block->getType(); |
||||
188 | } catch (InvalidConfigException $e) { |
||||
189 | $matrixEntryTypeModel = null; |
||||
190 | } |
||||
191 | // Find any text fields inside of the matrix block |
||||
192 | if ($matrixEntryTypeModel) { |
||||
193 | $fieldClasses = FieldHelper::FIELD_CLASSES[FieldHelper::TEXT_FIELD_CLASS_KEY]; |
||||
194 | $fields = $matrixEntryTypeModel->getCustomFields(); |
||||
195 | |||||
196 | foreach ($fields as $field) { |
||||
197 | /** @var array $fieldClasses */ |
||||
198 | foreach ($fieldClasses as $fieldClassKey) { |
||||
199 | if ($field instanceof $fieldClassKey) { |
||||
200 | if ($field->handle === $fieldHandle || empty($fieldHandle)) { |
||||
201 | $result .= self::extractTextFromField($block[$field->handle]) . ' '; |
||||
202 | } |
||||
203 | } |
||||
204 | } |
||||
205 | } |
||||
206 | } |
||||
207 | } |
||||
208 | |||||
209 | return $result; |
||||
210 | } |
||||
211 | |||||
212 | /** |
||||
213 | * Extract text from all of the blocks in a Neo field, concatenating it |
||||
214 | * together. |
||||
215 | * |
||||
216 | * @param NeoBlockQuery|NeoBlock[]|array $blocks |
||||
217 | * @param string $fieldHandle |
||||
218 | * |
||||
219 | * @return string |
||||
220 | */ |
||||
221 | public static function extractTextFromNeo($blocks, $fieldHandle = ''): string |
||||
222 | { |
||||
223 | if (empty($blocks)) { |
||||
224 | return ''; |
||||
225 | } |
||||
226 | $result = ''; |
||||
227 | // Iterate through all of the matrix blocks |
||||
228 | if ($blocks instanceof NeoBlockQuery) { |
||||
229 | $blocks = $blocks->all(); |
||||
230 | } |
||||
231 | foreach ($blocks as $block) { |
||||
232 | $layout = $block->getFieldLayout(); |
||||
233 | // Find any text fields inside of the neo block |
||||
234 | if ($layout) { |
||||
235 | $fieldClasses = FieldHelper::FIELD_CLASSES[FieldHelper::TEXT_FIELD_CLASS_KEY]; |
||||
236 | $fieldElements = $layout->getCustomFieldElements(); |
||||
237 | foreach ($fieldElements as $fieldElement) { |
||||
238 | $field = $fieldElement->getField(); |
||||
239 | /** @var array $fieldClasses */ |
||||
240 | foreach ($fieldClasses as $fieldClassKey) { |
||||
241 | if ($field instanceof $fieldClassKey) { |
||||
242 | if ($field->handle === $fieldHandle || empty($fieldHandle)) { |
||||
243 | $result .= self::extractTextFromField($block[$field->handle]) . ' '; |
||||
244 | } |
||||
245 | } |
||||
246 | } |
||||
247 | } |
||||
248 | } |
||||
249 | } |
||||
250 | |||||
251 | return $result; |
||||
252 | } |
||||
253 | |||||
254 | /** |
||||
255 | * Return the most important keywords extracted from the text as a comma- |
||||
256 | * delimited string |
||||
257 | * |
||||
258 | * @param string $text |
||||
259 | * @param int $limit |
||||
260 | * @param bool $useStopWords |
||||
261 | * |
||||
262 | * @return string |
||||
263 | */ |
||||
264 | public static function extractKeywords($text, $limit = 15, $useStopWords = true): string |
||||
265 | { |
||||
266 | if (empty($text)) { |
||||
267 | return ''; |
||||
268 | } |
||||
269 | $api = new TextRankFacade(); |
||||
270 | // Set the stop words that should be ignored |
||||
271 | if ($useStopWords) { |
||||
272 | $language = strtolower(substr(Seomatic::$language, 0, 2)); |
||||
273 | $stopWords = self::stopWordsForLanguage($language); |
||||
274 | if ($stopWords !== null) { |
||||
275 | $api->setStopWords($stopWords); |
||||
276 | } |
||||
277 | } |
||||
278 | // Array of the most important keywords: |
||||
279 | $keywords = $api->getOnlyKeyWords(self::cleanupText($text)); |
||||
280 | |||||
281 | // If it's empty, just return the text |
||||
282 | if (empty($keywords)) { |
||||
283 | return $text; |
||||
284 | } |
||||
285 | |||||
286 | $result = implode(', ', array_slice(array_keys($keywords), 0, $limit)); |
||||
287 | |||||
288 | return self::sanitizeUserInput($result); |
||||
289 | } |
||||
290 | |||||
291 | /** |
||||
292 | * Extract a summary consisting of the 3 most important sentences from the |
||||
293 | * text |
||||
294 | * |
||||
295 | * @param string $text |
||||
296 | * @param bool $useStopWords |
||||
297 | * |
||||
298 | * @return string |
||||
299 | */ |
||||
300 | public static function extractSummary($text, $useStopWords = true): string |
||||
301 | { |
||||
302 | if (empty($text)) { |
||||
303 | return ''; |
||||
304 | } |
||||
305 | $api = new TextRankFacade(); |
||||
306 | // Set the stop words that should be ignored |
||||
307 | if ($useStopWords) { |
||||
308 | $language = strtolower(substr(Seomatic::$language, 0, 2)); |
||||
309 | $stopWords = self::stopWordsForLanguage($language); |
||||
310 | if ($stopWords !== null) { |
||||
311 | $api->setStopWords($stopWords); |
||||
312 | } |
||||
313 | } |
||||
314 | // Array of the most important keywords: |
||||
315 | $sentences = $api->getHighlights(self::cleanupText($text)); |
||||
316 | |||||
317 | // If it's empty, just return the text |
||||
318 | if (empty($sentences)) { |
||||
319 | return $text; |
||||
320 | } |
||||
321 | |||||
322 | $result = implode(' ', $sentences); |
||||
323 | |||||
324 | return self::sanitizeUserInput($result); |
||||
325 | } |
||||
326 | |||||
327 | |||||
328 | /** |
||||
329 | * Sanitize user input by decoding any HTML Entities, URL decoding the text, |
||||
330 | * then removing any newlines, stripping tags, stripping Twig tags, and changing |
||||
331 | * single {}'s into ()'s |
||||
332 | * |
||||
333 | * @param $str |
||||
334 | * @return string |
||||
335 | */ |
||||
336 | public static function sanitizeUserInput($str): string |
||||
337 | { |
||||
338 | // Do some general cleanup |
||||
339 | $str = html_entity_decode($str, ENT_NOQUOTES, 'UTF-8'); |
||||
340 | $str = rawurldecode($str); |
||||
341 | // Remove any linebreaks |
||||
342 | $str = (string)preg_replace("/\r|\n/", "", $str); |
||||
343 | $str = HtmlPurifier::process($str, ['HTML.Allowed' => '']); |
||||
344 | $str = html_entity_decode($str, ENT_NOQUOTES, 'UTF-8'); |
||||
345 | // Remove any embedded Twig code |
||||
346 | $str = preg_replace('/{{.*?}}/', '', $str); |
||||
347 | $str = preg_replace('/{%.*?%}/', '', $str); |
||||
348 | // Change single brackets to parenthesis |
||||
349 | $str = preg_replace('/{/', '(', $str); |
||||
350 | $str = preg_replace('/}/', ')', $str); |
||||
351 | if (empty($str)) { |
||||
352 | $str = ''; |
||||
353 | } |
||||
354 | |||||
355 | return $str; |
||||
356 | } |
||||
357 | |||||
358 | /** |
||||
359 | * Strip HTML tags, but replace them with a space rather than just eliminating them |
||||
360 | * |
||||
361 | * @param $str |
||||
362 | * @return string |
||||
363 | */ |
||||
364 | public static function smartStripTags($str) |
||||
365 | { |
||||
366 | $str = str_replace('<', ' <', $str); |
||||
367 | $str = HtmlPurifier::process($str, ['HTML.Allowed' => '']); |
||||
368 | $str = html_entity_decode($str, ENT_NOQUOTES, 'UTF-8'); |
||||
369 | $str = str_replace(' ', ' ', $str); |
||||
370 | |||||
371 | return $str; |
||||
372 | } |
||||
373 | |||||
374 | /** |
||||
375 | * Clean up the passed in text by converting it to UTF-8, stripping tags, |
||||
376 | * removing whitespace, and decoding HTML entities |
||||
377 | * |
||||
378 | * @param string $text |
||||
379 | * |
||||
380 | * @return string |
||||
381 | */ |
||||
382 | public static function cleanupText($text): string |
||||
383 | { |
||||
384 | if (empty($text)) { |
||||
385 | return ''; |
||||
386 | } |
||||
387 | // Convert to UTF-8 |
||||
388 | if (function_exists('iconv')) { |
||||
389 | $text = iconv(mb_detect_encoding($text, mb_detect_order(), true), 'UTF-8//IGNORE', $text); |
||||
0 ignored issues
–
show
It seems like
mb_detect_order() can also be of type true ; however, parameter $encodings of mb_detect_encoding() does only seem to accept array|null|string , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
390 | } else { |
||||
391 | ini_set('mbstring.substitute_character', 'none'); |
||||
392 | $text = mb_convert_encoding($text, 'UTF-8', 'UTF-8'); |
||||
393 | } |
||||
394 | // Strip HTML tags |
||||
395 | $text = HtmlPurifier::process($text, ['HTML.Allowed' => '']); |
||||
0 ignored issues
–
show
It seems like
$text can also be of type array ; however, parameter $content of yii\helpers\BaseHtmlPurifier::process() does only seem to accept string , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
396 | $text = html_entity_decode($text, ENT_NOQUOTES, 'UTF-8'); |
||||
397 | // Remove excess whitespace |
||||
398 | $text = preg_replace('/\s{2,}/u', ' ', $text); |
||||
399 | // Decode any HTML entities |
||||
400 | $text = html_entity_decode($text); |
||||
401 | |||||
402 | return $text; |
||||
403 | } |
||||
404 | |||||
405 | /** |
||||
406 | * Is $var an array or array-like object? |
||||
407 | * |
||||
408 | * @param $var |
||||
409 | * @return bool |
||||
410 | */ |
||||
411 | public static function isArrayLike($var): bool |
||||
412 | { |
||||
413 | return is_array($var) || ($var instanceof Collection); |
||||
414 | } |
||||
415 | |||||
416 | // Protected Static Methods |
||||
417 | // ========================================================================= |
||||
418 | |||||
419 | /** |
||||
420 | * @param string $language |
||||
421 | * |
||||
422 | * @return null|StopWordsAbstract |
||||
423 | */ |
||||
424 | protected static function stopWordsForLanguage(string $language) |
||||
425 | { |
||||
426 | $stopWords = null; |
||||
427 | if (!empty(self::LANGUAGE_MAP[$language])) { |
||||
428 | $language = self::LANGUAGE_MAP[$language]; |
||||
429 | } else { |
||||
430 | $language = 'English'; |
||||
431 | } |
||||
432 | |||||
433 | $className = 'PhpScience\\TextRank\\Tool\\StopWords\\' . ucfirst($language); |
||||
434 | if (class_exists($className)) { |
||||
435 | $stopWords = new $className(); |
||||
436 | } |
||||
437 | |||||
438 | return $stopWords; |
||||
439 | } |
||||
440 | } |
||||
441 |
This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.
This is most likely a typographical error or the method has been renamed.