Passed
Pull Request — v3 (#685)
by Timothy
16:12
created

Text::sanitizeFieldData()   A

Complexity

Conditions 3
Paths 2

Size

Total Lines 17
Code Lines 10

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 11
CRAP Score 3

Importance

Changes 2
Bugs 0 Features 0
Metric Value
cc 3
eloc 10
nc 2
nop 1
dl 0
loc 17
ccs 11
cts 11
cp 1
crap 3
rs 9.9332
c 2
b 0
f 0
1
<?php
2
/**
3
 * SEOmatic plugin for Craft CMS 3.x
4
 *
5
 * A turnkey SEO implementation for Craft CMS that is comprehensive, powerful,
6
 * and flexible
7
 *
8
 * @link      https://nystudio107.com
9
 * @copyright Copyright (c) 2017 nystudio107
10
 */
11
12
namespace nystudio107\seomatic\helpers;
13
14
use nystudio107\seomatic\helpers\Field as FieldHelper;
15
16
use nystudio107\seomatic\Seomatic;
17
18
use craft\elements\db\MatrixBlockQuery;
19
use craft\elements\db\TagQuery;
20
use craft\elements\MatrixBlock;
21
use craft\elements\Tag;
22
23
use yii\base\InvalidConfigException;
24
25
use verbb\supertable\elements\SuperTableBlockElement as SuperTableBlock;
0 ignored issues
show
Bug introduced by
The type verbb\supertable\elements\SuperTableBlockElement was not found. Maybe you did not declare it correctly or list all dependencies?

The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g. excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:

filter:
    dependency_paths: ["lib/*"]

For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths

Loading history...
26
use verbb\supertable\elements\db\SuperTableBlockQuery;
0 ignored issues
show
Bug introduced by
The type verbb\supertable\elements\db\SuperTableBlockQuery was not found. Maybe you did not declare it correctly or list all dependencies?

The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g. excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:

filter:
    dependency_paths: ["lib/*"]

For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths

Loading history...
27
28
use benf\neo\elements\db\BlockQuery as NeoBlockQuery;
0 ignored issues
show
Bug introduced by
The type benf\neo\elements\db\BlockQuery was not found. Maybe you did not declare it correctly or list all dependencies?

The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g. excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:

filter:
    dependency_paths: ["lib/*"]

For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths

Loading history...
29
use benf\neo\elements\Block as NeoBlock;
0 ignored issues
show
Bug introduced by
The type benf\neo\elements\Block was not found. Maybe you did not declare it correctly or list all dependencies?

The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g. excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:

filter:
    dependency_paths: ["lib/*"]

For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths

Loading history...
30
31
use Stringy\Stringy;
32
33
use PhpScience\TextRank\TextRankFacade;
34
use PhpScience\TextRank\Tool\StopWords\StopWordsAbstract;
35
36
/**
37
 * @author    nystudio107
0 ignored issues
show
Coding Style introduced by
The tag in position 1 should be the @package tag
Loading history...
Coding Style introduced by
Content of the @author tag must be in the form "Display Name <[email protected]>"
Loading history...
38
 * @package   Seomatic
39
 * @since     3.0.0
40
 */
41
class Text
42
{
43
    // Constants
44
    // =========================================================================
45
46
    const LANGUAGE_MAP = [
47
        'en' => 'English',
48
        'fr' => 'French',
49
        'de' => 'German',
50
        'it' => 'Italian',
51
        'no' => 'Norwegian',
52
        'es' => 'Spanish',
53
    ];
54
55
    // Public Static Methods
56
    // =========================================================================
57
58
    /**
59
     * Truncates the string to a given length. If $substring is provided, and
60
     * truncating occurs, the string is further truncated so that the substring
61
     * may be appended without exceeding the desired length.
62
     *
63
     * @param  string $string    The string to truncate
64
     * @param  int    $length    Desired length of the truncated string
65
     * @param  string $substring The substring to append if it can fit
66
     *
67
     * @return string with the resulting $str after truncating
68
     */
69
    public static function truncate($string, $length, $substring = '…'): string
70
    {
71
        $result = $string;
72
73
        if (!empty($string)) {
74
            $string = strip_tags($string);
75
            $result = (string)Stringy::create($string)->truncate($length, $substring);
76
        }
77
78
        return $result;
79
    }
80
81
    /**
82
     * Truncates the string to a given length, while ensuring that it does not
83
     * split words. If $substring is provided, and truncating occurs, the
84
     * string is further truncated so that the substring may be appended without
85
     * exceeding the desired length.
86
     *
87
     * @param  string $string    The string to truncate
88
     * @param  int    $length    Desired length of the truncated string
89
     * @param  string $substring The substring to append if it can fit
90
     *
91
     * @return string with the resulting $str after truncating
92
     */
93
    public static function truncateOnWord($string, $length, $substring = '…'): string
94
    {
95
        $result = $string;
96
97
        if (!empty($string)) {
98
            $string = strip_tags($string);
99
            $result = (string)Stringy::create($string)->safeTruncate($length, $substring);
100
        }
101
102
        return $result;
103
    }
104
105
    /**
106
     * Extract plain old text from a field
107
     *
108
     * @param $field
0 ignored issues
show
Coding Style introduced by
Missing parameter comment
Loading history...
109
     *
110
     * @return string
111
     */
112
    public static function extractTextFromField($field): string
113
    {
114
        if (empty($field)) {
115
            return '';
116
        }
117
        if ($field instanceof MatrixBlockQuery
118
            || (\is_array($field) && $field[0] instanceof MatrixBlock)) {
0 ignored issues
show
Coding Style introduced by
Closing parenthesis of a multi-line IF statement must be on a new line
Loading history...
119
            $result = self::extractTextFromMatrix($field);
120
        } elseif ($field instanceof NeoBlockQuery
121
            || (\is_array($field) && $field[0] instanceof NeoBlock)) {
0 ignored issues
show
Coding Style introduced by
Closing parenthesis of a multi-line IF statement must be on a new line
Loading history...
122
            $result = self::extractTextFromNeo($field);
123
        } elseif ($field instanceof SuperTableBlockQuery
124
            || (\is_array($field) && $field[0] instanceof SuperTableBlock)) {
0 ignored issues
show
Coding Style introduced by
Closing parenthesis of a multi-line IF statement must be on a new line
Loading history...
125
            $result = self::extractTextFromSuperTable($field);
126
        } elseif ($field instanceof TagQuery
127
            || (\is_array($field) && $field[0] instanceof Tag)) {
0 ignored issues
show
Coding Style introduced by
Closing parenthesis of a multi-line IF statement must be on a new line
Loading history...
128
            $result = self::extractTextFromTags($field);
129
        } else {
130
            if (\is_array($field)) {
131
                $result = self::smartStripTags((string)$field[0]);
132
            } else {
133
                $result = self::smartStripTags((string)$field);
134
            }
135
        }
136
137
        //return $result;
138
        return self::sanitizeFieldData($result);
139
    }
140
141
    /**
142
     * Extract concatenated text from all of the tags in the $tagElement and
143
     * return as a comma-delimited string
144
     *
145
     * @param TagQuery|Tag[] $tags
0 ignored issues
show
Coding Style introduced by
Missing parameter comment
Loading history...
146
     *
147
     * @return string
148
     */
149
    public static function extractTextFromTags($tags): string
150
    {
151
        if (empty($tags)) {
152
            return '';
153
        }
154
        $result = '';
155
        // Iterate through all of the matrix blocks
156
        if ($tags instanceof TagQuery) {
157
            $tags = $tags->all();
158
        }
159
        foreach ($tags as $tag) {
160
            $result .= $tag->title.', ';
161
        }
162
        $result = rtrim($result, ', ');
163
164
        return $result;
165
    }
166
167
    /**
168
     * Extract text from all of the blocks in a matrix field, concatenating it
169
     * together.
170
     *
171
     * @param MatrixBlockQuery|MatrixBlock[] $blocks
0 ignored issues
show
Coding Style introduced by
Missing parameter comment
Loading history...
172
     * @param string                         $fieldHandle
0 ignored issues
show
Coding Style introduced by
Missing parameter comment
Loading history...
173
     *
174
     * @return string
175
     */
176
    public static function extractTextFromMatrix($blocks, $fieldHandle = ''): string
177
    {
178
        if (empty($blocks)) {
179
            return '';
180
        }
181
        $result = '';
182
        // Iterate through all of the matrix blocks
183
        if ($blocks instanceof MatrixBlockQuery) {
184
            $blocks = $blocks->all();
185
        }
186
        foreach ($blocks as $block) {
187
            try {
188
                $matrixBlockTypeModel = $block->getType();
189
            } catch (InvalidConfigException $e) {
190
                $matrixBlockTypeModel = null;
191
            }
192
            // Find any text fields inside of the matrix block
193
            if ($matrixBlockTypeModel) {
194
                $fieldClasses = FieldHelper::FIELD_CLASSES[FieldHelper::TEXT_FIELD_CLASS_KEY];
195
                $fields = $matrixBlockTypeModel->getFields();
196
197
                foreach ($fields as $field) {
198
                    /** @var array $fieldClasses */
0 ignored issues
show
Coding Style introduced by
The open comment tag must be the only content on the line
Loading history...
Coding Style introduced by
The close comment tag must be the only content on the line
Loading history...
199
                    foreach ($fieldClasses as $fieldClassKey) {
200
                        if ($field instanceof $fieldClassKey) {
201
                            if ($field->handle === $fieldHandle || empty($fieldHandle)) {
202
                                $result .= self::extractTextFromField($block[$field->handle]).' ';
203
                            }
204
                        }
205
                    }
206
                }
207
            }
208
        }
209
210
        return $result;
211
    }
212
213
    /**
214
     * Extract text from all of the blocks in a Neo field, concatenating it
215
     * together.
216
     *
217
     * @param NeoBlockQuery|NeoBlock[] $blocks
0 ignored issues
show
Coding Style introduced by
Missing parameter comment
Loading history...
218
     * @param string                         $fieldHandle
0 ignored issues
show
Coding Style introduced by
Missing parameter comment
Loading history...
Coding Style introduced by
Expected 19 spaces after parameter type; 25 found
Loading history...
219
     *
220
     * @return string
221
     */
222
    public static function extractTextFromNeo($blocks, $fieldHandle = ''): string
223
    {
224
        if (empty($blocks)) {
225
            return '';
226
        }
227
        $result = '';
228
        // Iterate through all of the matrix blocks
229
        if ($blocks instanceof NeoBlockQuery) {
230
            $blocks = $blocks->all();
231
        }
232
        foreach ($blocks as $block) {
233
            try {
234
                $neoBlockTypeModel = $block->getType();
235
            } catch (InvalidConfigException $e) {
236
                $neoBlockTypeModel = null;
237
            }
238
            // Find any text fields inside of the matrix block
239
            if ($neoBlockTypeModel) {
240
                $fieldClasses = FieldHelper::FIELD_CLASSES[FieldHelper::TEXT_FIELD_CLASS_KEY];
241
                $fields = $neoBlockTypeModel->getFields();
242
243
                foreach ($fields as $field) {
244
                    /** @var array $fieldClasses */
0 ignored issues
show
Coding Style introduced by
The open comment tag must be the only content on the line
Loading history...
Coding Style introduced by
The close comment tag must be the only content on the line
Loading history...
245
                    foreach ($fieldClasses as $fieldClassKey) {
246
                        if ($field instanceof $fieldClassKey) {
247
                            if ($field->handle === $fieldHandle || empty($fieldHandle)) {
248
                                $result .= self::extractTextFromField($block[$field->handle]).' ';
249
                            }
250
                        }
251
                    }
252
                }
253
            }
254
        }
255
256
        return $result;
257
    }
258
259
    /**
260
     * Extract text from all of the blocks in a matrix field, concatenating it
261
     * together.
262
     *
263
     * @param SuperTableBlockQuery|SuperTableBlock[] $blocks
0 ignored issues
show
Coding Style introduced by
Missing parameter comment
Loading history...
264
     * @param string                         $fieldHandle
0 ignored issues
show
Coding Style introduced by
Missing parameter comment
Loading history...
Coding Style introduced by
Expected 33 spaces after parameter type; 25 found
Loading history...
265
     *
266
     * @return string
267
     */
268
    public static function extractTextFromSuperTable($blocks, $fieldHandle = ''): string
269
    {
270
        if (empty($blocks)) {
271
            return '';
272
        }
273
        $result = '';
274
        // Iterate through all of the matrix blocks
275
        if ($blocks instanceof SuperTableBlockQuery) {
276
            $blocks = $blocks->all();
277
        }
278
        foreach ($blocks as $block) {
279
            try {
280
                $superTableBlockTypeModel = $block->getType();
281
            } catch (InvalidConfigException $e) {
282
                $superTableBlockTypeModel = null;
283
            }
284
            // Find any text fields inside of the matrix block
285
            if ($superTableBlockTypeModel) {
286
                $fieldClasses = FieldHelper::FIELD_CLASSES[FieldHelper::TEXT_FIELD_CLASS_KEY];
287
                $fields = $superTableBlockTypeModel->getFields();
288
289
                foreach ($fields as $field) {
290
                    /** @var array $fieldClasses */
0 ignored issues
show
Coding Style introduced by
The open comment tag must be the only content on the line
Loading history...
Coding Style introduced by
The close comment tag must be the only content on the line
Loading history...
291
                    foreach ($fieldClasses as $fieldClassKey) {
292
                        if ($field instanceof $fieldClassKey) {
293
                            if ($field->handle === $fieldHandle || empty($fieldHandle)) {
294
                                $result .= self::extractTextFromField($block[$field->handle]).' ';
295
                            }
296
                        }
297
                    }
298
                }
299
            }
300
        }
301
302
        return $result;
303
    }
304
305
    /**
306
     * Return the most important keywords extracted from the text as a comma-
307
     * delimited string
308
     *
309
     * @param string $text
0 ignored issues
show
Coding Style introduced by
Missing parameter comment
Loading history...
310
     * @param int    $limit
0 ignored issues
show
Coding Style introduced by
Missing parameter comment
Loading history...
311
     * @param bool   $useStopWords
0 ignored issues
show
Coding Style introduced by
Missing parameter comment
Loading history...
312
     *
313
     * @return string
314
     */
315
    public static function extractKeywords($text, $limit = 15, $useStopWords = true): string
316
    {
317
        if (empty($text)) {
318
            return '';
319
        }
320
        $api = new TextRankFacade();
321
        // Set the stop words that should be ignored
322
        if ($useStopWords) {
323
            $language = strtolower(substr(Seomatic::$language, 0, 2));
324
            $stopWords = self::stopWordsForLanguage($language);
325
            if ($stopWords !== null) {
326
                $api->setStopWords($stopWords);
327
            }
328
        }
329
        // Array of the most important keywords:
330
        $keywords = $api->getOnlyKeyWords(self::cleanupText($text));
331
332
        // If it's empty, just return the text
333
        if (empty($keywords)) {
334
            return $text;
335
        }
336
337
        $result = \is_array($keywords)
0 ignored issues
show
introduced by
The condition is_array($keywords) is always true.
Loading history...
338
            ? implode(', ', \array_slice(array_keys($keywords), 0, $limit))
339
            : (string)$keywords;
340
341
        return self::sanitizeFieldData($result);
342
    }
343
344
    /**
345
     * Extract a summary consisting of the 3 most important sentences from the
346
     * text
347
     *
348
     * @param string $text
0 ignored issues
show
Coding Style introduced by
Missing parameter comment
Loading history...
349
     * @param bool   $useStopWords
0 ignored issues
show
Coding Style introduced by
Missing parameter comment
Loading history...
350
     *
351
     * @return string
352
     */
353
    public static function extractSummary($text, $useStopWords = true): string
354
    {
355
        if (empty($text)) {
356
            return '';
357
        }
358
        $api = new TextRankFacade();
359
        // Set the stop words that should be ignored
360
        if ($useStopWords) {
361
            $language = strtolower(substr(Seomatic::$language, 0, 2));
362
            $stopWords = self::stopWordsForLanguage($language);
363
            if ($stopWords !== null) {
364
                $api->setStopWords($stopWords);
365
            }
366
        }
367
        // Array of the most important keywords:
368
        $sentences = $api->getHighlights(self::cleanupText($text));
369
370
        // If it's empty, just return the text
371
        if (empty($sentences)) {
372
            return $text;
373
        }
374
375
        $result = \is_array($sentences)
0 ignored issues
show
introduced by
The condition is_array($sentences) is always true.
Loading history...
376
            ? implode(' ', $sentences)
377
            : (string)$sentences;
378
379
        return self::sanitizeFieldData($result);
380
    }
381
382
383
    /**
384
     * Sanitize Twig code out of any extracted field values
385
     *
386
     * @param $str
0 ignored issues
show
Coding Style introduced by
Missing parameter comment
Loading history...
387
     * @return string
0 ignored issues
show
Coding Style introduced by
Tag @return cannot be grouped with parameter tags in a doc comment
Loading history...
388
     */
389 2
    public static function sanitizeFieldData($str): string
390
    {
391
        // Do some general cleanup
392 2
        $str = html_entity_decode($str, ENT_NOQUOTES, 'UTF-8');
393 2
        $str = urldecode($str);
394 2
        $str = strip_tags($str);
395
        // Remove any embedded Twig code
396 2
        $str = preg_replace('/{{.*?}}/', '', $str);
397 2
        $str = preg_replace('/{%.*?%}/', '', $str);
398
        // Change single brackets to parenthesis
399 2
        $str = preg_replace('/{/', '(', $str);
400 2
        $str = preg_replace('/}/', ')', $str);
401 2
        if (empty($str) || is_array($str)) {
402 1
            $str = '';
403
        }
404
405 2
        return $str;
406
    }
407
408
    /**
409
     * Strip HTML tags, but replace them with a space rather than just eliminating them
410
     *
411
     * @param $str
0 ignored issues
show
Coding Style introduced by
Missing parameter comment
Loading history...
412
     * @return string
0 ignored issues
show
Coding Style introduced by
Tag @return cannot be grouped with parameter tags in a doc comment
Loading history...
413
     */
414
    public static function smartStripTags($str)
415
    {
416
        $str = str_replace('<', ' <', $str);
417
        $str = strip_tags($str);
418
        $str = str_replace('  ', ' ', $str);
419
420
        return $str;
421
    }
422
423
    /**
424
     * Clean up the passed in text by converting it to UTF-8, stripping tags,
425
     * removing whitespace, and decoding HTML entities
426
     *
427
     * @param string $text
0 ignored issues
show
Coding Style introduced by
Missing parameter comment
Loading history...
428
     *
429
     * @return string
430
     */
431
    public static function cleanupText($text): string
432
    {
433
        if (empty($text)) {
434
            return '';
435
        }
436
        // Convert to UTF-8
437
        if (\function_exists('iconv')) {
438
            $text = iconv(mb_detect_encoding($text, mb_detect_order(), true), 'UTF-8//IGNORE', $text);
439
        } else {
440
            ini_set('mbstring.substitute_character', 'none');
441
            $text = mb_convert_encoding($text, 'UTF-8', 'UTF-8');
442
        }
443
        // Strip HTML tags
444
        $text = strip_tags($text);
445
        // Remove excess whitespace
446
        $text = preg_replace('/\s{2,}/u', ' ', $text);
447
        // Decode any HTML entities
448
        $text = html_entity_decode($text);
449
450
        return $text;
451
    }
452
453
    // Protected Static Methods
454
    // =========================================================================
455
456
    /**
457
     * @param string $language
0 ignored issues
show
Coding Style introduced by
Missing parameter comment
Loading history...
458
     *
459
     * @return null|StopWordsAbstract
460
     */
461
    protected static function stopWordsForLanguage(string $language)
462
    {
463
        $stopWords = null;
464
        if (!empty(self::LANGUAGE_MAP[$language])) {
465
            $language = self::LANGUAGE_MAP[$language];
466
        } else {
467
            $language = 'English';
468
        }
469
470
        $className = 'PhpScience\\TextRank\\Tool\\StopWords\\'.ucfirst($language);
471
        if (class_exists($className)) {
472
            $stopWords = new $className;
473
        }
474
475
        return $stopWords;
476
    }
477
}
478