Passed
Push — v4 ( 4ea8ea...032521 )
by Andrew
24:07 queued 14s
created

Text::isArrayLike()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 12

Importance

Changes 1
Bugs 1 Features 0
Metric Value
eloc 1
c 1
b 1
f 0
dl 0
loc 3
ccs 0
cts 2
cp 0
rs 10
cc 3
nc 3
nop 1
crap 12
1
<?php
2
/**
3
 * SEOmatic plugin for Craft CMS
4
 *
5
 * A turnkey SEO implementation for Craft CMS that is comprehensive, powerful,
6
 * and flexible
7
 *
8
 * @link      https://nystudio107.com
9
 * @copyright Copyright (c) 2017 nystudio107
10
 */
11
12
namespace nystudio107\seomatic\helpers;
13
14
use ArrayAccess;
15
use benf\neo\elements\Block as NeoBlock;
16
use benf\neo\elements\db\BlockQuery as NeoBlockQuery;
17
use craft\elements\db\MatrixBlockQuery;
18
use craft\elements\db\TagQuery;
19
use craft\elements\MatrixBlock;
20
use craft\elements\Tag;
21
use craft\helpers\HtmlPurifier;
22
use craft\models\FieldLayout;
23
use nystudio107\seomatic\helpers\Field as FieldHelper;
24
use nystudio107\seomatic\Seomatic;
25
use PhpScience\TextRank\TextRankFacade;
26
use PhpScience\TextRank\Tool\StopWords\StopWordsAbstract;
27
use Stringy\Stringy;
28
use Traversable;
29
use verbb\doxter\Doxter;
30
use verbb\doxter\fields\data\DoxterData;
31
use verbb\supertable\elements\db\SuperTableBlockQuery;
32
use verbb\supertable\elements\SuperTableBlockElement as SuperTableBlock;
33
use yii\base\InvalidConfigException;
34
use function array_slice;
35
use function function_exists;
36
use function is_array;
37
38
/**
39
 * @author    nystudio107
40
 * @package   Seomatic
41
 * @since     3.0.0
42
 */
43
class Text
44
{
45
    // Constants
46
    // =========================================================================
47
48
    public const LANGUAGE_MAP = [
49
        'en' => 'English',
50
        'fr' => 'French',
51
        'de' => 'German',
52
        'it' => 'Italian',
53
        'no' => 'Norwegian',
54
        'es' => 'Spanish',
55
    ];
56
57
    // Public Static Methods
58
    // =========================================================================
59
60
    /**
61
     * Truncates the string to a given length. If $substring is provided, and
62
     * truncating occurs, the string is further truncated so that the substring
63
     * may be appended without exceeding the desired length.
64
     *
65
     * @param string $string The string to truncate
66
     * @param int $length Desired length of the truncated string
67
     * @param string $substring The substring to append if it can fit
68
     *
69
     * @return string with the resulting $str after truncating
70
     */
71
    public static function truncate($string, $length, $substring = '…'): string
72
    {
73
        $result = $string;
74
75
        if (!empty($string)) {
76
            $string = HtmlPurifier::process($string, ['HTML.Allowed' => '']);
77
            $string = html_entity_decode($string, ENT_NOQUOTES, 'UTF-8');
78
            $result = (string)Stringy::create($string)->truncate($length, $substring);
79
        }
80
81
        return $result;
82
    }
83
84
    /**
85
     * Truncates the string to a given length, while ensuring that it does not
86
     * split words. If $substring is provided, and truncating occurs, the
87
     * string is further truncated so that the substring may be appended without
88
     * exceeding the desired length.
89
     *
90
     * @param string $string The string to truncate
91
     * @param int $length Desired length of the truncated string
92
     * @param string $substring The substring to append if it can fit
93
     *
94
     * @return string with the resulting $str after truncating
95
     */
96 1
    public static function truncateOnWord($string, $length, $substring = '…'): string
97
    {
98 1
        $result = $string;
99
100 1
        if (!empty($string)) {
101 1
            $string = HtmlPurifier::process($string, ['HTML.Allowed' => '']);
102 1
            $string = html_entity_decode($string, ENT_NOQUOTES, 'UTF-8');
103 1
            $result = (string)Stringy::create($string)->safeTruncate($length, $substring);
104
        }
105
106 1
        return $result;
107
    }
108
109
    /**
110
     * Extract plain old text from a field
111
     *
112
     * @param $field
113
     *
114
     * @return string
115
     */
116
    public static function extractTextFromField($field): string
117
    {
118
        if (empty($field)) {
119
            return '';
120
        }
121
        if ($field instanceof MatrixBlockQuery
122
            || (self::isArrayLike($field) && $field[0] instanceof MatrixBlock)) {
123
            $result = self::extractTextFromMatrix($field);
124
        } elseif ($field instanceof NeoBlockQuery
125
            || (self::isArrayLike($field) && $field[0] instanceof NeoBlock)) {
126
            $result = self::extractTextFromNeo($field);
127
        } elseif ($field instanceof SuperTableBlockQuery
128
            || (self::isArrayLike($field) && $field[0] instanceof SuperTableBlock)) {
129
            $result = self::extractTextFromSuperTable($field);
130
        } elseif ($field instanceof TagQuery
131
            || (self::isArrayLike($field) && $field[0] instanceof Tag)) {
132
            $result = self::extractTextFromTags($field);
133
        } elseif ($field instanceof DoxterData) {
134
            $result = self::smartStripTags(Doxter::$plugin->getService()->parseMarkdown($field->getRaw()));
135
        } else {
136
            if (self::isArrayLike($field)) {
137
                $result = self::smartStripTags((string)$field[0]);
138
            } else {
139
                $result = self::smartStripTags((string)$field);
140
            }
141
        }
142
143
        //return $result;
144
        return self::sanitizeUserInput($result);
145
    }
146
147
    /**
148
     * Extract concatenated text from all of the tags in the $tagElement and
149
     * return as a comma-delimited string
150
     *
151
     * @param TagQuery|Tag[]|array $tags
152
     *
153
     * @return string
154
     */
155
    public static function extractTextFromTags($tags): string
156
    {
157
        if (empty($tags)) {
158
            return '';
159
        }
160
        $result = '';
161
        // Iterate through all of the matrix blocks
162
        if ($tags instanceof TagQuery) {
163
            $tags = $tags->all();
164
        }
165
        foreach ($tags as $tag) {
166
            $result .= $tag->title . ', ';
167
        }
168
        $result = rtrim($result, ', ');
169
170
        return $result;
171
    }
172
173
    /**
174
     * Extract text from all of the blocks in a matrix field, concatenating it
175
     * together.
176
     *
177
     * @param MatrixBlockQuery|MatrixBlock[]|array $blocks
178
     * @param string $fieldHandle
179
     *
180
     * @return string
181
     */
182
    public static function extractTextFromMatrix($blocks, $fieldHandle = ''): string
183
    {
184
        if (empty($blocks)) {
185
            return '';
186
        }
187
        $result = '';
188
        // Iterate through all of the matrix blocks
189
        if ($blocks instanceof MatrixBlockQuery) {
190
            $blocks = $blocks->all();
191
        }
192
        foreach ($blocks as $block) {
193
            try {
194
                $matrixBlockTypeModel = $block->getType();
195
            } catch (InvalidConfigException $e) {
196
                $matrixBlockTypeModel = null;
197
            }
198
            // Find any text fields inside of the matrix block
199
            if ($matrixBlockTypeModel) {
200
                $fieldClasses = FieldHelper::FIELD_CLASSES[FieldHelper::TEXT_FIELD_CLASS_KEY];
201
                $fields = $matrixBlockTypeModel->getCustomFields();
202
203
                foreach ($fields as $field) {
204
                    /** @var array $fieldClasses */
205
                    foreach ($fieldClasses as $fieldClassKey) {
206
                        if ($field instanceof $fieldClassKey) {
207
                            if ($field->handle === $fieldHandle || empty($fieldHandle)) {
208
                                $result .= self::extractTextFromField($block[$field->handle]) . ' ';
209
                            }
210
                        }
211
                    }
212
                }
213
            }
214
        }
215
216
        return $result;
217
    }
218
219
    /**
220
     * Extract text from all of the blocks in a Neo field, concatenating it
221
     * together.
222
     *
223
     * @param NeoBlockQuery|NeoBlock[]|array $blocks
224
     * @param string $fieldHandle
225
     *
226
     * @return string
227
     */
228
    public static function extractTextFromNeo($blocks, $fieldHandle = ''): string
229
    {
230
        if (empty($blocks)) {
231
            return '';
232
        }
233
        $result = '';
234
        // Iterate through all of the matrix blocks
235
        if ($blocks instanceof NeoBlockQuery) {
236
            $blocks = $blocks->all();
237
        }
238
        foreach ($blocks as $block) {
239
            $layout = $block->getFieldLayout();
240
            // Find any text fields inside of the neo block
241
            if ($layout) {
242
                $fieldClasses = FieldHelper::FIELD_CLASSES[FieldHelper::TEXT_FIELD_CLASS_KEY];
243
                $fieldElements = $layout->getCustomFieldElements();
244
                foreach ($fieldElements as $fieldElement) {
245
                    $field = $fieldElement->getField();
246
                    /** @var array $fieldClasses */
247
                    foreach ($fieldClasses as $fieldClassKey) {
248
                        if ($field instanceof $fieldClassKey) {
249
                            if ($field->handle === $fieldHandle || empty($fieldHandle)) {
250
                                $result .= self::extractTextFromField($block[$field->handle]) . ' ';
251
                            }
252
                        }
253
                    }
254
                }
255
            }
256
        }
257
258
        return $result;
259
    }
260
261
    /**
262
     * Extract text from all of the blocks in a matrix field, concatenating it
263
     * together.
264
     *
265
     * @param SuperTableBlockQuery|SuperTableBlock[]|array $blocks
266
     * @param string $fieldHandle
267
     *
268
     * @return string
269
     */
270
    public static function extractTextFromSuperTable($blocks, $fieldHandle = ''): string
271
    {
272
        if (empty($blocks)) {
273
            return '';
274
        }
275
        $result = '';
276
        // Iterate through all of the supertable blocks
277
        if ($blocks instanceof SuperTableBlockQuery) {
278
            $blocks = $blocks->all();
279
        }
280
        foreach ($blocks as $block) {
281
            try {
282
                $superTableBlockTypeModel = $block->getType();
283
            } catch (InvalidConfigException $e) {
284
                $superTableBlockTypeModel = null;
285
            }
286
            // Find any text fields inside of the matrix block
287
            if ($superTableBlockTypeModel) {
288
                $fieldClasses = FieldHelper::FIELD_CLASSES[FieldHelper::TEXT_FIELD_CLASS_KEY];
289
                /** @var ?FieldLayout $layout */
290
                // The SuperTableBlockType class lacks @mixin FieldLayoutBehavior in its annotations
291
                /** @phpstan-ignore-next-line */
292
                $layout = $superTableBlockTypeModel->getFieldLayout();
293
                $fieldElements = $layout->getCustomFieldElements();
294
                foreach ($fieldElements as $fieldElement) {
295
                    $field = $fieldElement->getField();
296
                    /** @var array $fieldClasses */
297
                    foreach ($fieldClasses as $fieldClassKey) {
298
                        if ($field instanceof $fieldClassKey) {
299
                            if ($field->handle === $fieldHandle || empty($fieldHandle)) {
300
                                $result .= self::extractTextFromField($block[$field->handle]) . ' ';
301
                            }
302
                        }
303
                    }
304
                }
305
            }
306
        }
307
308
        return $result;
309
    }
310
311
    /**
312
     * Return the most important keywords extracted from the text as a comma-
313
     * delimited string
314
     *
315
     * @param string $text
316
     * @param int $limit
317
     * @param bool $useStopWords
318
     *
319
     * @return string
320
     */
321
    public static function extractKeywords($text, $limit = 15, $useStopWords = true): string
322
    {
323
        if (empty($text)) {
324
            return '';
325
        }
326
        $api = new TextRankFacade();
327
        // Set the stop words that should be ignored
328
        if ($useStopWords) {
329
            $language = strtolower(substr(Seomatic::$language, 0, 2));
330
            $stopWords = self::stopWordsForLanguage($language);
331
            if ($stopWords !== null) {
332
                $api->setStopWords($stopWords);
333
            }
334
        }
335
        // Array of the most important keywords:
336
        $keywords = $api->getOnlyKeyWords(self::cleanupText($text));
337
338
        // If it's empty, just return the text
339
        if (empty($keywords)) {
340
            return $text;
341
        }
342
343
        $result = implode(', ', array_slice(array_keys($keywords), 0, $limit));
344
345
        return self::sanitizeUserInput($result);
346
    }
347
348
    /**
349
     * Extract a summary consisting of the 3 most important sentences from the
350
     * text
351
     *
352
     * @param string $text
353
     * @param bool $useStopWords
354
     *
355
     * @return string
356
     */
357
    public static function extractSummary($text, $useStopWords = true): string
358
    {
359
        if (empty($text)) {
360
            return '';
361
        }
362
        $api = new TextRankFacade();
363
        // Set the stop words that should be ignored
364
        if ($useStopWords) {
365
            $language = strtolower(substr(Seomatic::$language, 0, 2));
366
            $stopWords = self::stopWordsForLanguage($language);
367
            if ($stopWords !== null) {
368
                $api->setStopWords($stopWords);
369
            }
370
        }
371
        // Array of the most important keywords:
372
        $sentences = $api->getHighlights(self::cleanupText($text));
373
374
        // If it's empty, just return the text
375
        if (empty($sentences)) {
376
            return $text;
377
        }
378
379
        $result = implode(' ', $sentences);
380
381
        return self::sanitizeUserInput($result);
382
    }
383
384
385
    /**
386
     * Sanitize user input by decoding any HTML Entities, URL decoding the text,
387
     * then removing any newlines, stripping tags, stripping Twig tags, and changing
388
     * single {}'s into ()'s
389
     *
390
     * @param $str
391
     * @return string
392
     */
393 2
    public static function sanitizeUserInput($str): string
394
    {
395
        // Do some general cleanup
396 2
        $str = html_entity_decode($str, ENT_NOQUOTES, 'UTF-8');
397 2
        $str = rawurldecode($str);
398
        // Remove any linebreaks
399 2
        $str = (string)preg_replace("/\r|\n/", "", $str);
400 2
        $str = HtmlPurifier::process($str, ['HTML.Allowed' => '']);
401 2
        $str = html_entity_decode($str, ENT_NOQUOTES, 'UTF-8');
402
        // Remove any embedded Twig code
403 2
        $str = preg_replace('/{{.*?}}/', '', $str);
404 2
        $str = preg_replace('/{%.*?%}/', '', $str);
405
        // Change single brackets to parenthesis
406 2
        $str = preg_replace('/{/', '(', $str);
407 2
        $str = preg_replace('/}/', ')', $str);
408 2
        if (empty($str)) {
409 2
            $str = '';
410
        }
411
412 2
        return $str;
413
    }
414
415
    /**
416
     * Strip HTML tags, but replace them with a space rather than just eliminating them
417
     *
418
     * @param $str
419
     * @return string
420
     */
421
    public static function smartStripTags($str)
422
    {
423
        $str = str_replace('<', ' <', $str);
424
        $str = HtmlPurifier::process($str, ['HTML.Allowed' => '']);
425
        $str = html_entity_decode($str, ENT_NOQUOTES, 'UTF-8');
426
        $str = str_replace('  ', ' ', $str);
427
428
        return $str;
429
    }
430
431
    /**
432
     * Clean up the passed in text by converting it to UTF-8, stripping tags,
433
     * removing whitespace, and decoding HTML entities
434
     *
435
     * @param string $text
436
     *
437
     * @return string
438
     */
439
    public static function cleanupText($text): string
440
    {
441
        if (empty($text)) {
442
            return '';
443
        }
444
        // Convert to UTF-8
445
        if (function_exists('iconv')) {
446
            $text = iconv(mb_detect_encoding($text, mb_detect_order(), true), 'UTF-8//IGNORE', $text);
0 ignored issues
show
Bug introduced by
It seems like mb_detect_order() can also be of type true; however, parameter $encodings of mb_detect_encoding() does only seem to accept array|null|string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

446
            $text = iconv(mb_detect_encoding($text, /** @scrutinizer ignore-type */ mb_detect_order(), true), 'UTF-8//IGNORE', $text);
Loading history...
447
        } else {
448
            ini_set('mbstring.substitute_character', 'none');
449
            $text = mb_convert_encoding($text, 'UTF-8', 'UTF-8');
450
        }
451
        // Strip HTML tags
452
        $text = HtmlPurifier::process($text, ['HTML.Allowed' => '']);
0 ignored issues
show
Bug introduced by
It seems like $text can also be of type array; however, parameter $content of yii\helpers\BaseHtmlPurifier::process() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

452
        $text = HtmlPurifier::process(/** @scrutinizer ignore-type */ $text, ['HTML.Allowed' => '']);
Loading history...
453
        $text = html_entity_decode($text, ENT_NOQUOTES, 'UTF-8');
454
        // Remove excess whitespace
455
        $text = preg_replace('/\s{2,}/u', ' ', $text);
456
        // Decode any HTML entities
457
        $text = html_entity_decode($text);
458
459
        return $text;
460
    }
461
462
    /**
463
     * Is $var an array or array-like object?
464
     *
465
     * @param $var
466
     * @return bool
467
     */
468
    public static function isArrayLike($var): bool
469
    {
470
        return is_array($var) || ($var instanceof ArrayAccess && $var instanceof Traversable);
471
    }
472
473
    // Protected Static Methods
474
    // =========================================================================
475
476
    /**
477
     * @param string $language
478
     *
479
     * @return null|StopWordsAbstract
480
     */
481
    protected static function stopWordsForLanguage(string $language)
482
    {
483
        $stopWords = null;
484
        if (!empty(self::LANGUAGE_MAP[$language])) {
485
            $language = self::LANGUAGE_MAP[$language];
486
        } else {
487
            $language = 'English';
488
        }
489
490
        $className = 'PhpScience\\TextRank\\Tool\\StopWords\\' . ucfirst($language);
491
        if (class_exists($className)) {
492
            $stopWords = new $className();
493
        }
494
495
        return $stopWords;
496
    }
497
}
498