Passed
Push — develop ( fa3a5c...2de439 )
by Andrew
10:24
created

Text::truncateOnWord()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 10
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 6
CRAP Score 2

Importance

Changes 0
Metric Value
cc 2
eloc 5
nc 2
nop 3
dl 0
loc 10
ccs 6
cts 6
cp 1
crap 2
rs 10
c 0
b 0
f 0
1
<?php
2
/**
3
 * SEOmatic plugin for Craft CMS 3.x
4
 *
5
 * A turnkey SEO implementation for Craft CMS that is comprehensive, powerful,
6
 * and flexible
7
 *
8
 * @link      https://nystudio107.com
9
 * @copyright Copyright (c) 2017 nystudio107
10
 */
11
12
namespace nystudio107\seomatic\helpers;
13
14
use nystudio107\seomatic\helpers\Field as FieldHelper;
15
16
use nystudio107\seomatic\Seomatic;
17
18
use craft\elements\db\MatrixBlockQuery;
19
use craft\elements\db\TagQuery;
20
use craft\elements\MatrixBlock;
21
use craft\elements\Tag;
22
23
use yii\base\InvalidConfigException;
24
25
use verbb\supertable\elements\SuperTableBlockElement as SuperTableBlock;
0 ignored issues
show
Bug introduced by
The type verbb\supertable\elements\SuperTableBlockElement was not found. Maybe you did not declare it correctly or list all dependencies?

The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g. excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:

filter:
    dependency_paths: ["lib/*"]

For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths

Loading history...
26
use verbb\supertable\elements\db\SuperTableBlockQuery;
0 ignored issues
show
Bug introduced by
The type verbb\supertable\elements\db\SuperTableBlockQuery was not found. Maybe you did not declare it correctly or list all dependencies?

The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g. excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:

filter:
    dependency_paths: ["lib/*"]

For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths

Loading history...
27
28
use benf\neo\elements\db\BlockQuery as NeoBlockQuery;
0 ignored issues
show
Bug introduced by
The type benf\neo\elements\db\BlockQuery was not found. Maybe you did not declare it correctly or list all dependencies?

The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g. excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:

filter:
    dependency_paths: ["lib/*"]

For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths

Loading history...
29
use benf\neo\elements\Block as NeoBlock;
0 ignored issues
show
Bug introduced by
The type benf\neo\elements\Block was not found. Maybe you did not declare it correctly or list all dependencies?

The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g. excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:

filter:
    dependency_paths: ["lib/*"]

For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths

Loading history...
30
31
use Stringy\Stringy;
32
33
use PhpScience\TextRank\TextRankFacade;
34
use PhpScience\TextRank\Tool\StopWords\StopWordsAbstract;
35
36
/**
37
 * @author    nystudio107
0 ignored issues
show
Coding Style introduced by
The tag in position 1 should be the @package tag
Loading history...
Coding Style introduced by
Content of the @author tag must be in the form "Display Name <[email protected]>"
Loading history...
38
 * @package   Seomatic
39
 * @since     3.0.0
40
 */
41
class Text
42
{
43
    // Constants
44
    // =========================================================================
45
46
    const LANGUAGE_MAP = [
47
        'en' => 'English',
48
        'fr' => 'French',
49
        'de' => 'German',
50
        'it' => 'Italian',
51
        'no' => 'Norwegian',
52
        'es' => 'Spanish',
53
    ];
54
55
    // Public Static Methods
56
    // =========================================================================
57
58
    /**
59
     * Truncates the string to a given length. If $substring is provided, and
60
     * truncating occurs, the string is further truncated so that the substring
61
     * may be appended without exceeding the desired length.
62
     *
63
     * @param  string $string    The string to truncate
64
     * @param  int    $length    Desired length of the truncated string
65
     * @param  string $substring The substring to append if it can fit
66
     *
67
     * @return string with the resulting $str after truncating
68
     */
69
    public static function truncate($string, $length, $substring = '…'): string
70
    {
71
        $result = $string;
72
73
        if (!empty($string)) {
74
            $string = strip_tags($string);
75
            $result = (string)Stringy::create($string)->truncate($length, $substring);
76
        }
77
78
        return $result;
79
    }
80
81
    /**
82
     * Truncates the string to a given length, while ensuring that it does not
83
     * split words. If $substring is provided, and truncating occurs, the
84
     * string is further truncated so that the substring may be appended without
85
     * exceeding the desired length.
86
     *
87
     * @param  string $string    The string to truncate
88
     * @param  int    $length    Desired length of the truncated string
89
     * @param  string $substring The substring to append if it can fit
90
     *
91
     * @return string with the resulting $str after truncating
92
     */
93 1
    public static function truncateOnWord($string, $length, $substring = '…'): string
94
    {
95 1
        $result = $string;
96
97 1
        if (!empty($string)) {
98 1
            $string = strip_tags($string);
99 1
            $result = (string)Stringy::create($string)->safeTruncate($length, $substring);
100
        }
101
102 1
        return $result;
103
    }
104
105
    /**
106
     * Extract plain old text from a field
107
     *
108
     * @param $field
0 ignored issues
show
Coding Style introduced by
Missing parameter comment
Loading history...
109
     *
110
     * @return string
111
     */
112
    public static function extractTextFromField($field): string
113
    {
114
        if (empty($field)) {
115
            return '';
116
        }
117
        if ($field instanceof MatrixBlockQuery
118
            || (\is_array($field) && $field[0] instanceof MatrixBlock)) {
0 ignored issues
show
Coding Style introduced by
Closing parenthesis of a multi-line IF statement must be on a new line
Loading history...
119
            $result = self::extractTextFromMatrix($field);
120
        } elseif ($field instanceof NeoBlockQuery
121
            || (\is_array($field) && $field[0] instanceof NeoBlock)) {
0 ignored issues
show
Coding Style introduced by
Closing parenthesis of a multi-line IF statement must be on a new line
Loading history...
122
            $result = self::extractTextFromNeo($field);
123
        } elseif ($field instanceof SuperTableBlockQuery
124
            || (\is_array($field) && $field[0] instanceof SuperTableBlock)) {
0 ignored issues
show
Coding Style introduced by
Closing parenthesis of a multi-line IF statement must be on a new line
Loading history...
125
            $result = self::extractTextFromSuperTable($field);
126
        } elseif ($field instanceof TagQuery
127
            || (\is_array($field) && $field[0] instanceof Tag)) {
0 ignored issues
show
Coding Style introduced by
Closing parenthesis of a multi-line IF statement must be on a new line
Loading history...
128
            $result = self::extractTextFromTags($field);
129
        } else {
130
            if (\is_array($field)) {
131
                $result = self::smartStripTags((string)$field[0]);
132
            } else {
133
                $result = self::smartStripTags((string)$field);
134
            }
135
        }
136
137
        //return $result;
138
        return self::sanitizeUserInput($result);
139
    }
140
141
    /**
142
     * Extract concatenated text from all of the tags in the $tagElement and
143
     * return as a comma-delimited string
144
     *
145
     * @param TagQuery|Tag[] $tags
0 ignored issues
show
Coding Style introduced by
Missing parameter comment
Loading history...
146
     *
147
     * @return string
148
     */
149
    public static function extractTextFromTags($tags): string
150
    {
151
        if (empty($tags)) {
152
            return '';
153
        }
154
        $result = '';
155
        // Iterate through all of the matrix blocks
156
        if ($tags instanceof TagQuery) {
157
            $tags = $tags->all();
158
        }
159
        foreach ($tags as $tag) {
160
            $result .= $tag->title.', ';
161
        }
162
        $result = rtrim($result, ', ');
163
164
        return $result;
165
    }
166
167
    /**
168
     * Extract text from all of the blocks in a matrix field, concatenating it
169
     * together.
170
     *
171
     * @param MatrixBlockQuery|MatrixBlock[] $blocks
0 ignored issues
show
Coding Style introduced by
Missing parameter comment
Loading history...
172
     * @param string                         $fieldHandle
0 ignored issues
show
Coding Style introduced by
Missing parameter comment
Loading history...
173
     *
174
     * @return string
175
     */
176
    public static function extractTextFromMatrix($blocks, $fieldHandle = ''): string
177
    {
178
        if (empty($blocks)) {
179
            return '';
180
        }
181
        $result = '';
182
        // Iterate through all of the matrix blocks
183
        if ($blocks instanceof MatrixBlockQuery) {
184
            $blocks = $blocks->all();
185
        }
186
        foreach ($blocks as $block) {
187
            try {
188
                $matrixBlockTypeModel = $block->getType();
189
            } catch (InvalidConfigException $e) {
190
                $matrixBlockTypeModel = null;
191
            }
192
            // Find any text fields inside of the matrix block
193
            if ($matrixBlockTypeModel) {
194
                $fieldClasses = FieldHelper::FIELD_CLASSES[FieldHelper::TEXT_FIELD_CLASS_KEY];
195
                $fields = $matrixBlockTypeModel->getFields();
196
197
                foreach ($fields as $field) {
198
                    /** @var array $fieldClasses */
0 ignored issues
show
Coding Style introduced by
The open comment tag must be the only content on the line
Loading history...
Coding Style introduced by
The close comment tag must be the only content on the line
Loading history...
199
                    foreach ($fieldClasses as $fieldClassKey) {
200
                        if ($field instanceof $fieldClassKey) {
201
                            if ($field->handle === $fieldHandle || empty($fieldHandle)) {
202
                                $result .= self::extractTextFromField($block[$field->handle]).' ';
203
                            }
204
                        }
205
                    }
206
                }
207
            }
208
        }
209
210
        return $result;
211
    }
212
213
    /**
214
     * Extract text from all of the blocks in a Neo field, concatenating it
215
     * together.
216
     *
217
     * @param NeoBlockQuery|NeoBlock[] $blocks
0 ignored issues
show
Coding Style introduced by
Missing parameter comment
Loading history...
218
     * @param string                         $fieldHandle
0 ignored issues
show
Coding Style introduced by
Missing parameter comment
Loading history...
Coding Style introduced by
Expected 19 spaces after parameter type; 25 found
Loading history...
219
     *
220
     * @return string
221
     */
222
    public static function extractTextFromNeo($blocks, $fieldHandle = ''): string
223
    {
224
        if (empty($blocks)) {
225
            return '';
226
        }
227
        $result = '';
228
        // Iterate through all of the matrix blocks
229
        if ($blocks instanceof NeoBlockQuery) {
230
            $blocks = $blocks->all();
231
        }
232
        foreach ($blocks as $block) {
233
            try {
234
                $neoBlockTypeModel = $block->getType();
235
            } catch (InvalidConfigException $e) {
236
                $neoBlockTypeModel = null;
237
            }
238
            // Find any text fields inside of the matrix block
239
            if ($neoBlockTypeModel) {
240
                $fieldClasses = FieldHelper::FIELD_CLASSES[FieldHelper::TEXT_FIELD_CLASS_KEY];
241
                $fields = $neoBlockTypeModel->getFields();
242
243
                foreach ($fields as $field) {
244
                    /** @var array $fieldClasses */
0 ignored issues
show
Coding Style introduced by
The open comment tag must be the only content on the line
Loading history...
Coding Style introduced by
The close comment tag must be the only content on the line
Loading history...
245
                    foreach ($fieldClasses as $fieldClassKey) {
246
                        if ($field instanceof $fieldClassKey) {
247
                            if ($field->handle === $fieldHandle || empty($fieldHandle)) {
248
                                $result .= self::extractTextFromField($block[$field->handle]).' ';
249
                            }
250
                        }
251
                    }
252
                }
253
            }
254
        }
255
256
        return $result;
257
    }
258
259
    /**
260
     * Extract text from all of the blocks in a matrix field, concatenating it
261
     * together.
262
     *
263
     * @param SuperTableBlockQuery|SuperTableBlock[] $blocks
0 ignored issues
show
Coding Style introduced by
Missing parameter comment
Loading history...
264
     * @param string                         $fieldHandle
0 ignored issues
show
Coding Style introduced by
Missing parameter comment
Loading history...
Coding Style introduced by
Expected 33 spaces after parameter type; 25 found
Loading history...
265
     *
266
     * @return string
267
     */
268
    public static function extractTextFromSuperTable($blocks, $fieldHandle = ''): string
269
    {
270
        if (empty($blocks)) {
271
            return '';
272
        }
273
        $result = '';
274
        // Iterate through all of the matrix blocks
275
        if ($blocks instanceof SuperTableBlockQuery) {
276
            $blocks = $blocks->all();
277
        }
278
        foreach ($blocks as $block) {
279
            try {
280
                $superTableBlockTypeModel = $block->getType();
281
            } catch (InvalidConfigException $e) {
282
                $superTableBlockTypeModel = null;
283
            }
284
            // Find any text fields inside of the matrix block
285
            if ($superTableBlockTypeModel) {
286
                $fieldClasses = FieldHelper::FIELD_CLASSES[FieldHelper::TEXT_FIELD_CLASS_KEY];
287
                $fields = $superTableBlockTypeModel->getFields();
288
289
                foreach ($fields as $field) {
290
                    /** @var array $fieldClasses */
0 ignored issues
show
Coding Style introduced by
The open comment tag must be the only content on the line
Loading history...
Coding Style introduced by
The close comment tag must be the only content on the line
Loading history...
291
                    foreach ($fieldClasses as $fieldClassKey) {
292
                        if ($field instanceof $fieldClassKey) {
293
                            if ($field->handle === $fieldHandle || empty($fieldHandle)) {
294
                                $result .= self::extractTextFromField($block[$field->handle]).' ';
295
                            }
296
                        }
297
                    }
298
                }
299
            }
300
        }
301
302
        return $result;
303
    }
304
305
    /**
306
     * Return the most important keywords extracted from the text as a comma-
307
     * delimited string
308
     *
309
     * @param string $text
0 ignored issues
show
Coding Style introduced by
Missing parameter comment
Loading history...
310
     * @param int    $limit
0 ignored issues
show
Coding Style introduced by
Missing parameter comment
Loading history...
311
     * @param bool   $useStopWords
0 ignored issues
show
Coding Style introduced by
Missing parameter comment
Loading history...
312
     *
313
     * @return string
314
     */
315
    public static function extractKeywords($text, $limit = 15, $useStopWords = true): string
316
    {
317
        if (empty($text)) {
318
            return '';
319
        }
320
        $api = new TextRankFacade();
321
        // Set the stop words that should be ignored
322
        if ($useStopWords) {
323
            $language = strtolower(substr(Seomatic::$language, 0, 2));
324
            $stopWords = self::stopWordsForLanguage($language);
325
            if ($stopWords !== null) {
326
                $api->setStopWords($stopWords);
327
            }
328
        }
329
        // Array of the most important keywords:
330
        $keywords = $api->getOnlyKeyWords(self::cleanupText($text));
331
332
        // If it's empty, just return the text
333
        if (empty($keywords)) {
334
            return $text;
335
        }
336
337
        $result = \is_array($keywords)
0 ignored issues
show
introduced by
The condition is_array($keywords) is always true.
Loading history...
338
            ? implode(', ', \array_slice(array_keys($keywords), 0, $limit))
339
            : (string)$keywords;
340
341
        return self::sanitizeUserInput($result);
342
    }
343
344
    /**
345
     * Extract a summary consisting of the 3 most important sentences from the
346
     * text
347
     *
348
     * @param string $text
0 ignored issues
show
Coding Style introduced by
Missing parameter comment
Loading history...
349
     * @param bool   $useStopWords
0 ignored issues
show
Coding Style introduced by
Missing parameter comment
Loading history...
350
     *
351
     * @return string
352
     */
353
    public static function extractSummary($text, $useStopWords = true): string
354
    {
355
        if (empty($text)) {
356
            return '';
357
        }
358
        $api = new TextRankFacade();
359
        // Set the stop words that should be ignored
360
        if ($useStopWords) {
361
            $language = strtolower(substr(Seomatic::$language, 0, 2));
362
            $stopWords = self::stopWordsForLanguage($language);
363
            if ($stopWords !== null) {
364
                $api->setStopWords($stopWords);
365
            }
366
        }
367
        // Array of the most important keywords:
368
        $sentences = $api->getHighlights(self::cleanupText($text));
369
370
        // If it's empty, just return the text
371
        if (empty($sentences)) {
372
            return $text;
373
        }
374
375
        $result = \is_array($sentences)
0 ignored issues
show
introduced by
The condition is_array($sentences) is always true.
Loading history...
376
            ? implode(' ', $sentences)
377
            : (string)$sentences;
378
379
        return self::sanitizeUserInput($result);
380
    }
381
382
383
    /**
384
     * Sanitize user input by decoding any HTML Entities, URL decoding the text,
385
     * then removing any newlines, stripping tags, stripping Twig tags, and changing
386
     * single {}'s into ()'s
387
     *
388
     * @param $str
0 ignored issues
show
Coding Style introduced by
Missing parameter comment
Loading history...
389
     * @return string
0 ignored issues
show
Coding Style introduced by
Tag @return cannot be grouped with parameter tags in a doc comment
Loading history...
390
     */
391 2
    public static function sanitizeUserInput($str): string
392
    {
393
        // Do some general cleanup
394 2
        $str = html_entity_decode($str, ENT_NOQUOTES, 'UTF-8');
395 2
        $str = urldecode($str);
396
        // Remove any linebreaks
397 2
        $str = (string)preg_replace("/\r|\n/", "", $str);
398 2
        $str = strip_tags($str);
399
        // Remove any embedded Twig code
400 2
        $str = preg_replace('/{{.*?}}/', '', $str);
401 2
        $str = preg_replace('/{%.*?%}/', '', $str);
402
        // Change single brackets to parenthesis
403 2
        $str = preg_replace('/{/', '(', $str);
404 2
        $str = preg_replace('/}/', ')', $str);
405 2
        if (empty($str) || is_array($str)) {
406 2
            $str = '';
407
        }
408
409 2
        return $str;
410
    }
411
412
    /**
413
     * Strip HTML tags, but replace them with a space rather than just eliminating them
414
     *
415
     * @param $str
0 ignored issues
show
Coding Style introduced by
Missing parameter comment
Loading history...
416
     * @return string
0 ignored issues
show
Coding Style introduced by
Tag @return cannot be grouped with parameter tags in a doc comment
Loading history...
417
     */
418
    public static function smartStripTags($str)
419
    {
420
        $str = str_replace('<', ' <', $str);
421
        $str = strip_tags($str);
422
        $str = str_replace('  ', ' ', $str);
423
424
        return $str;
425
    }
426
427
    /**
428
     * Clean up the passed in text by converting it to UTF-8, stripping tags,
429
     * removing whitespace, and decoding HTML entities
430
     *
431
     * @param string $text
0 ignored issues
show
Coding Style introduced by
Missing parameter comment
Loading history...
432
     *
433
     * @return string
434
     */
435
    public static function cleanupText($text): string
436
    {
437
        if (empty($text)) {
438
            return '';
439
        }
440
        // Convert to UTF-8
441
        if (\function_exists('iconv')) {
442
            $text = iconv(mb_detect_encoding($text, mb_detect_order(), true), 'UTF-8//IGNORE', $text);
0 ignored issues
show
Bug introduced by
It seems like mb_detect_order() can also be of type true; however, parameter $encodings of mb_detect_encoding() does only seem to accept array|null|string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

442
            $text = iconv(mb_detect_encoding($text, /** @scrutinizer ignore-type */ mb_detect_order(), true), 'UTF-8//IGNORE', $text);
Loading history...
443
        } else {
444
            ini_set('mbstring.substitute_character', 'none');
445
            $text = mb_convert_encoding($text, 'UTF-8', 'UTF-8');
446
        }
447
        // Strip HTML tags
448
        $text = strip_tags($text);
0 ignored issues
show
Bug introduced by
It seems like $text can also be of type array; however, parameter $string of strip_tags() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

448
        $text = strip_tags(/** @scrutinizer ignore-type */ $text);
Loading history...
449
        // Remove excess whitespace
450
        $text = preg_replace('/\s{2,}/u', ' ', $text);
451
        // Decode any HTML entities
452
        $text = html_entity_decode($text);
453
454
        return $text;
455
    }
456
457
    // Protected Static Methods
458
    // =========================================================================
459
460
    /**
461
     * @param string $language
0 ignored issues
show
Coding Style introduced by
Missing parameter comment
Loading history...
462
     *
463
     * @return null|StopWordsAbstract
464
     */
465
    protected static function stopWordsForLanguage(string $language)
466
    {
467
        $stopWords = null;
468
        if (!empty(self::LANGUAGE_MAP[$language])) {
469
            $language = self::LANGUAGE_MAP[$language];
470
        } else {
471
            $language = 'English';
472
        }
473
474
        $className = 'PhpScience\\TextRank\\Tool\\StopWords\\'.ucfirst($language);
475
        if (class_exists($className)) {
476
            $stopWords = new $className;
477
        }
478
479
        return $stopWords;
480
    }
481
}
482