Passed
Push — v3 ( 1e51d3...178405 )
by Andrew
25:46
created

src/helpers/Text.php (2 issues)

1
<?php
2
/**
3
 * SEOmatic plugin for Craft CMS 3.x
4
 *
5
 * A turnkey SEO implementation for Craft CMS that is comprehensive, powerful,
6
 * and flexible
7
 *
8
 * @link      https://nystudio107.com
9
 * @copyright Copyright (c) 2017 nystudio107
10
 */
11
12
namespace nystudio107\seomatic\helpers;
13
14
use nystudio107\seomatic\helpers\Field as FieldHelper;
15
16
use nystudio107\seomatic\Seomatic;
17
18
use craft\elements\db\MatrixBlockQuery;
19
use craft\elements\db\TagQuery;
20
use craft\elements\MatrixBlock;
21
use craft\elements\Tag;
22
23
use yii\base\InvalidConfigException;
24
25
use verbb\supertable\elements\SuperTableBlockElement as SuperTableBlock;
26
use verbb\supertable\elements\db\SuperTableBlockQuery;
27
28
use benf\neo\elements\db\BlockQuery as NeoBlockQuery;
29
use benf\neo\elements\Block as NeoBlock;
30
31
use Stringy\Stringy;
32
33
use PhpScience\TextRank\TextRankFacade;
34
use PhpScience\TextRank\Tool\StopWords\StopWordsAbstract;
35
36
/**
37
 * @author    nystudio107
38
 * @package   Seomatic
39
 * @since     3.0.0
40
 */
41
class Text
42
{
43
    // Constants
44
    // =========================================================================
45
46
    const LANGUAGE_MAP = [
47
        'en' => 'English',
48
        'fr' => 'French',
49
        'de' => 'German',
50
        'it' => 'Italian',
51
        'no' => 'Norwegian',
52
        'es' => 'Spanish',
53
    ];
54
55
    // Public Static Methods
56
    // =========================================================================
57
58
    /**
59
     * Truncates the string to a given length. If $substring is provided, and
60
     * truncating occurs, the string is further truncated so that the substring
61
     * may be appended without exceeding the desired length.
62
     *
63
     * @param  string $string    The string to truncate
64
     * @param  int    $length    Desired length of the truncated string
65
     * @param  string $substring The substring to append if it can fit
66
     *
67
     * @return string with the resulting $str after truncating
68
     */
69
    public static function truncate($string, $length, $substring = '…'): string
70
    {
71
        $result = $string;
72
73
        if (!empty($string)) {
74
            $string = strip_tags($string);
75
            $result = (string)Stringy::create($string)->truncate($length, $substring);
76
        }
77
78
        return $result;
79
    }
80
81
    /**
82
     * Truncates the string to a given length, while ensuring that it does not
83
     * split words. If $substring is provided, and truncating occurs, the
84
     * string is further truncated so that the substring may be appended without
85
     * exceeding the desired length.
86
     *
87
     * @param  string $string    The string to truncate
88
     * @param  int    $length    Desired length of the truncated string
89
     * @param  string $substring The substring to append if it can fit
90
     *
91
     * @return string with the resulting $str after truncating
92
     */
93 1
    public static function truncateOnWord($string, $length, $substring = '…'): string
94
    {
95 1
        $result = $string;
96
97 1
        if (!empty($string)) {
98 1
            $string = strip_tags($string);
99 1
            $result = (string)Stringy::create($string)->safeTruncate($length, $substring);
100
        }
101
102 1
        return $result;
103
    }
104
105
    /**
106
     * Extract plain old text from a field
107
     *
108
     * @param $field
109
     *
110
     * @return string
111
     */
112
    public static function extractTextFromField($field): string
113
    {
114
        if (empty($field)) {
115
            return '';
116
        }
117
        if ($field instanceof MatrixBlockQuery
118
            || (\is_array($field) && $field[0] instanceof MatrixBlock)) {
119
            $result = self::extractTextFromMatrix($field);
120
        } elseif ($field instanceof NeoBlockQuery
121
            || (\is_array($field) && $field[0] instanceof NeoBlock)) {
122
            $result = self::extractTextFromNeo($field);
123
        } elseif ($field instanceof SuperTableBlockQuery
124
            || (\is_array($field) && $field[0] instanceof SuperTableBlock)) {
125
            $result = self::extractTextFromSuperTable($field);
126
        } elseif ($field instanceof TagQuery
127
            || (\is_array($field) && $field[0] instanceof Tag)) {
128
            $result = self::extractTextFromTags($field);
129
        } else {
130
            if (\is_array($field)) {
131
                $result = self::smartStripTags((string)$field[0]);
132
            } else {
133
                $result = self::smartStripTags((string)$field);
134
            }
135
        }
136
137
        //return $result;
138
        return self::sanitizeUserInput($result);
139
    }
140
141
    /**
142
     * Extract concatenated text from all of the tags in the $tagElement and
143
     * return as a comma-delimited string
144
     *
145
     * @param TagQuery|Tag[] $tags
146
     *
147
     * @return string
148
     */
149
    public static function extractTextFromTags($tags): string
150
    {
151
        if (empty($tags)) {
152
            return '';
153
        }
154
        $result = '';
155
        // Iterate through all of the matrix blocks
156
        if ($tags instanceof TagQuery) {
157
            $tags = $tags->all();
158
        }
159
        foreach ($tags as $tag) {
160
            $result .= $tag->title.', ';
161
        }
162
        $result = rtrim($result, ', ');
163
164
        return $result;
165
    }
166
167
    /**
168
     * Extract text from all of the blocks in a matrix field, concatenating it
169
     * together.
170
     *
171
     * @param MatrixBlockQuery|MatrixBlock[] $blocks
172
     * @param string                         $fieldHandle
173
     *
174
     * @return string
175
     */
176
    public static function extractTextFromMatrix($blocks, $fieldHandle = ''): string
177
    {
178
        if (empty($blocks)) {
179
            return '';
180
        }
181
        $result = '';
182
        // Iterate through all of the matrix blocks
183
        if ($blocks instanceof MatrixBlockQuery) {
184
            $blocks = $blocks->all();
185
        }
186
        foreach ($blocks as $block) {
187
            try {
188
                $matrixBlockTypeModel = $block->getType();
189
            } catch (InvalidConfigException $e) {
190
                $matrixBlockTypeModel = null;
191
            }
192
            // Find any text fields inside of the matrix block
193
            if ($matrixBlockTypeModel) {
194
                $fieldClasses = FieldHelper::FIELD_CLASSES[FieldHelper::TEXT_FIELD_CLASS_KEY];
195
                $fields = $matrixBlockTypeModel->getFields();
196
197
                foreach ($fields as $field) {
198
                    /** @var array $fieldClasses */
199
                    foreach ($fieldClasses as $fieldClassKey) {
200
                        if ($field instanceof $fieldClassKey) {
201
                            if ($field->handle === $fieldHandle || empty($fieldHandle)) {
202
                                $result .= self::extractTextFromField($block[$field->handle]).' ';
203
                            }
204
                        }
205
                    }
206
                }
207
            }
208
        }
209
210
        return $result;
211
    }
212
213
    /**
214
     * Extract text from all of the blocks in a Neo field, concatenating it
215
     * together.
216
     *
217
     * @param NeoBlockQuery|NeoBlock[] $blocks
218
     * @param string                         $fieldHandle
219
     *
220
     * @return string
221
     */
222
    public static function extractTextFromNeo($blocks, $fieldHandle = ''): string
223
    {
224
        if (empty($blocks)) {
225
            return '';
226
        }
227
        $result = '';
228
        // Iterate through all of the matrix blocks
229
        if ($blocks instanceof NeoBlockQuery) {
230
            $blocks = $blocks->all();
231
        }
232
        foreach ($blocks as $block) {
233
            try {
234
                $neoBlockTypeModel = $block->getType();
235
            } catch (InvalidConfigException $e) {
236
                $neoBlockTypeModel = null;
237
            }
238
            // Find any text fields inside of the matrix block
239
            if ($neoBlockTypeModel) {
240
                $fieldClasses = FieldHelper::FIELD_CLASSES[FieldHelper::TEXT_FIELD_CLASS_KEY];
241
                $fields = $neoBlockTypeModel->getFields();
242
243
                foreach ($fields as $field) {
244
                    /** @var array $fieldClasses */
245
                    foreach ($fieldClasses as $fieldClassKey) {
246
                        if ($field instanceof $fieldClassKey) {
247
                            if ($field->handle === $fieldHandle || empty($fieldHandle)) {
248
                                $result .= self::extractTextFromField($block[$field->handle]).' ';
249
                            }
250
                        }
251
                    }
252
                }
253
            }
254
        }
255
256
        return $result;
257
    }
258
259
    /**
260
     * Extract text from all of the blocks in a matrix field, concatenating it
261
     * together.
262
     *
263
     * @param SuperTableBlockQuery|SuperTableBlock[] $blocks
264
     * @param string                         $fieldHandle
265
     *
266
     * @return string
267
     */
268
    public static function extractTextFromSuperTable($blocks, $fieldHandle = ''): string
269
    {
270
        if (empty($blocks)) {
271
            return '';
272
        }
273
        $result = '';
274
        // Iterate through all of the matrix blocks
275
        if ($blocks instanceof SuperTableBlockQuery) {
276
            $blocks = $blocks->all();
277
        }
278
        foreach ($blocks as $block) {
279
            try {
280
                $superTableBlockTypeModel = $block->getType();
281
            } catch (InvalidConfigException $e) {
282
                $superTableBlockTypeModel = null;
283
            }
284
            // Find any text fields inside of the matrix block
285
            if ($superTableBlockTypeModel) {
286
                $fieldClasses = FieldHelper::FIELD_CLASSES[FieldHelper::TEXT_FIELD_CLASS_KEY];
287
                $fields = $superTableBlockTypeModel->getFields();
288
289
                foreach ($fields as $field) {
290
                    /** @var array $fieldClasses */
291
                    foreach ($fieldClasses as $fieldClassKey) {
292
                        if ($field instanceof $fieldClassKey) {
293
                            if ($field->handle === $fieldHandle || empty($fieldHandle)) {
294
                                $result .= self::extractTextFromField($block[$field->handle]).' ';
295
                            }
296
                        }
297
                    }
298
                }
299
            }
300
        }
301
302
        return $result;
303
    }
304
305
    /**
306
     * Return the most important keywords extracted from the text as a comma-
307
     * delimited string
308
     *
309
     * @param string $text
310
     * @param int    $limit
311
     * @param bool   $useStopWords
312
     *
313
     * @return string
314
     */
315
    public static function extractKeywords($text, $limit = 15, $useStopWords = true): string
316
    {
317
        if (empty($text)) {
318
            return '';
319
        }
320
        $api = new TextRankFacade();
321
        // Set the stop words that should be ignored
322
        if ($useStopWords) {
323
            $language = strtolower(substr(Seomatic::$language, 0, 2));
324
            $stopWords = self::stopWordsForLanguage($language);
325
            if ($stopWords !== null) {
326
                $api->setStopWords($stopWords);
327
            }
328
        }
329
        // Array of the most important keywords:
330
        $keywords = $api->getOnlyKeyWords(self::cleanupText($text));
331
332
        // If it's empty, just return the text
333
        if (empty($keywords)) {
334
            return $text;
335
        }
336
337
        $result = \is_array($keywords)
0 ignored issues
show
The condition is_array($keywords) is always true.
Loading history...
338
            ? implode(', ', \array_slice(array_keys($keywords), 0, $limit))
339
            : (string)$keywords;
340
341
        return self::sanitizeUserInput($result);
342
    }
343
344
    /**
345
     * Extract a summary consisting of the 3 most important sentences from the
346
     * text
347
     *
348
     * @param string $text
349
     * @param bool   $useStopWords
350
     *
351
     * @return string
352
     */
353
    public static function extractSummary($text, $useStopWords = true): string
354
    {
355
        if (empty($text)) {
356
            return '';
357
        }
358
        $api = new TextRankFacade();
359
        // Set the stop words that should be ignored
360
        if ($useStopWords) {
361
            $language = strtolower(substr(Seomatic::$language, 0, 2));
362
            $stopWords = self::stopWordsForLanguage($language);
363
            if ($stopWords !== null) {
364
                $api->setStopWords($stopWords);
365
            }
366
        }
367
        // Array of the most important keywords:
368
        $sentences = $api->getHighlights(self::cleanupText($text));
369
370
        // If it's empty, just return the text
371
        if (empty($sentences)) {
372
            return $text;
373
        }
374
375
        $result = \is_array($sentences)
0 ignored issues
show
The condition is_array($sentences) is always true.
Loading history...
376
            ? implode(' ', $sentences)
377
            : (string)$sentences;
378
379
        return self::sanitizeUserInput($result);
380
    }
381
382
383
    /**
384
     * Sanitize user input by decoding any HTML Entities, URL decoding the text,
385
     * then removing any newlines, stripping tags, stripping Twig tags, and changing
386
     * single {}'s into ()'s
387
     *
388
     * @param $str
389
     * @return string
390
     */
391 2
    public static function sanitizeUserInput($str): string
392
    {
393
        // Do some general cleanup
394 2
        $str = html_entity_decode($str, ENT_NOQUOTES, 'UTF-8');
395 2
        $str = urldecode($str);
396
        // Remove any linebreaks
397 2
        $str = (string)preg_replace("/\r|\n/", "", $str);
398 2
        $str = strip_tags($str);
399
        // Remove any embedded Twig code
400 2
        $str = preg_replace('/{{.*?}}/', '', $str);
401 2
        $str = preg_replace('/{%.*?%}/', '', $str);
402
        // Change single brackets to parenthesis
403 2
        $str = preg_replace('/{/', '(', $str);
404 2
        $str = preg_replace('/}/', ')', $str);
405 2
        if (empty($str) || is_array($str)) {
406 2
            $str = '';
407
        }
408
409 2
        return $str;
410
    }
411
412
    /**
413
     * Strip HTML tags, but replace them with a space rather than just eliminating them
414
     *
415
     * @param $str
416
     * @return string
417
     */
418
    public static function smartStripTags($str)
419
    {
420
        $str = str_replace('<', ' <', $str);
421
        $str = strip_tags($str);
422
        $str = str_replace('  ', ' ', $str);
423
424
        return $str;
425
    }
426
427
    /**
428
     * Clean up the passed in text by converting it to UTF-8, stripping tags,
429
     * removing whitespace, and decoding HTML entities
430
     *
431
     * @param string $text
432
     *
433
     * @return string
434
     */
435
    public static function cleanupText($text): string
436
    {
437
        if (empty($text)) {
438
            return '';
439
        }
440
        // Convert to UTF-8
441
        if (\function_exists('iconv')) {
442
            $text = iconv(mb_detect_encoding($text, mb_detect_order(), true), 'UTF-8//IGNORE', $text);
443
        } else {
444
            ini_set('mbstring.substitute_character', 'none');
445
            $text = mb_convert_encoding($text, 'UTF-8', 'UTF-8');
446
        }
447
        // Strip HTML tags
448
        $text = strip_tags($text);
449
        // Remove excess whitespace
450
        $text = preg_replace('/\s{2,}/u', ' ', $text);
451
        // Decode any HTML entities
452
        $text = html_entity_decode($text);
453
454
        return $text;
455
    }
456
457
    // Protected Static Methods
458
    // =========================================================================
459
460
    /**
461
     * @param string $language
462
     *
463
     * @return null|StopWordsAbstract
464
     */
465
    protected static function stopWordsForLanguage(string $language)
466
    {
467
        $stopWords = null;
468
        if (!empty(self::LANGUAGE_MAP[$language])) {
469
            $language = self::LANGUAGE_MAP[$language];
470
        } else {
471
            $language = 'English';
472
        }
473
474
        $className = 'PhpScience\\TextRank\\Tool\\StopWords\\'.ucfirst($language);
475
        if (class_exists($className)) {
476
            $stopWords = new $className;
477
        }
478
479
        return $stopWords;
480
    }
481
}
482