Passed
Push — develop ( af1e8c...cc259d )
by Andrew
05:46
created

Text::extractTextFromNeo()   B

Complexity

Conditions 11
Paths 27

Size

Total Lines 35
Code Lines 19

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 11
eloc 19
nc 27
nop 2
dl 0
loc 35
rs 7.3166
c 0
b 0
f 0

How to fix   Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
/**
3
 * SEOmatic plugin for Craft CMS 3.x
4
 *
5
 * A turnkey SEO implementation for Craft CMS that is comprehensive, powerful,
6
 * and flexible
7
 *
8
 * @link      https://nystudio107.com
9
 * @copyright Copyright (c) 2017 nystudio107
10
 */
11
12
namespace nystudio107\seomatic\helpers;
13
14
use nystudio107\seomatic\helpers\Field as FieldHelper;
15
16
use nystudio107\seomatic\Seomatic;
17
18
use craft\elements\db\MatrixBlockQuery;
19
use craft\elements\db\TagQuery;
20
use craft\elements\MatrixBlock;
21
use craft\elements\Tag;
22
23
use yii\base\InvalidConfigException;
24
25
use benf\neo\elements\db\BlockQuery as NeoBlockQuery;
0 ignored issues
show
Bug introduced by
The type benf\neo\elements\db\BlockQuery was not found. Maybe you did not declare it correctly or list all dependencies?

The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g. excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:

filter:
    dependency_paths: ["lib/*"]

For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths

Loading history...
26
use benf\neo\elements\Block as NeoBlock;
0 ignored issues
show
Bug introduced by
The type benf\neo\elements\Block was not found. Maybe you did not declare it correctly or list all dependencies?

The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g. excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:

filter:
    dependency_paths: ["lib/*"]

For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths

Loading history...
27
28
use Stringy\Stringy;
29
30
use PhpScience\TextRank\TextRankFacade;
31
use PhpScience\TextRank\Tool\StopWords\StopWordsAbstract;
32
33
/**
34
 * @author    nystudio107
0 ignored issues
show
Coding Style introduced by
The tag in position 1 should be the @package tag
Loading history...
Coding Style introduced by
Content of the @author tag must be in the form "Display Name <[email protected]>"
Loading history...
35
 * @package   Seomatic
36
 * @since     3.0.0
0 ignored issues
show
Coding Style introduced by
The tag in position 3 should be the @author tag
Loading history...
37
 */
0 ignored issues
show
Coding Style introduced by
Missing @license tag in class comment
Loading history...
Coding Style introduced by
Missing @link tag in class comment
Loading history...
38
class Text
39
{
40
    // Constants
41
    // =========================================================================
42
43
    const LANGUAGE_MAP = [
44
        'en' => 'English',
45
        'fr' => 'French',
46
        'de' => 'German',
47
        'it' => 'Italian',
48
        'no' => 'Norwegian',
49
        'es' => 'Spanish',
50
    ];
51
52
    // Public Static Methods
53
    // =========================================================================
54
55
    /**
56
     * Truncates the string to a given length. If $substring is provided, and
57
     * truncating occurs, the string is further truncated so that the substring
58
     * may be appended without exceeding the desired length.
59
     *
60
     * @param  string $string    The string to truncate
61
     * @param  int    $length    Desired length of the truncated string
62
     * @param  string $substring The substring to append if it can fit
63
     *
64
     * @return string with the resulting $str after truncating
65
     */
66
    public static function truncate($string, $length, $substring = '…'): string
67
    {
68
        $result = $string;
69
70
        if (!empty($string)) {
71
            $string = strip_tags($string);
72
            $result = (string)Stringy::create($string)->truncate($length, $substring);
73
        }
74
75
        return $result;
76
    }
77
78
    /**
79
     * Truncates the string to a given length, while ensuring that it does not
80
     * split words. If $substring is provided, and truncating occurs, the
81
     * string is further truncated so that the substring may be appended without
82
     * exceeding the desired length.
83
     *
84
     * @param  string $string    The string to truncate
85
     * @param  int    $length    Desired length of the truncated string
86
     * @param  string $substring The substring to append if it can fit
87
     *
88
     * @return string with the resulting $str after truncating
89
     */
90
    public static function truncateOnWord($string, $length, $substring = '…'): string
91
    {
92
        $result = $string;
93
94
        if (!empty($string)) {
95
            $string = strip_tags($string);
96
            $result = (string)Stringy::create($string)->safeTruncate($length, $substring);
97
        }
98
99
        return $result;
100
    }
101
102
    /**
0 ignored issues
show
Coding Style introduced by
Parameter $field should have a doc-comment as per coding-style.
Loading history...
103
     * Extract plain old text from a field
104
     *
105
     * @param $field
0 ignored issues
show
Coding Style Documentation introduced by
Missing parameter name
Loading history...
106
     *
107
     * @return string
108
     */
109
    public static function extractTextFromField($field): string
110
    {
111
        if (empty($field)) {
112
            return '';
113
        }
114
        if ($field instanceof MatrixBlockQuery
115
            || (\is_array($field) && $field[0] instanceof MatrixBlock)) {
0 ignored issues
show
Coding Style introduced by
Closing parenthesis of a multi-line IF statement must be on a new line
Loading history...
116
            $result = self::extractTextFromMatrix($field);
117
        } elseif ($field instanceof NeoBlockQuery
118
            || (\is_array($field) && $field[0] instanceof NeoBlock)) {
0 ignored issues
show
Coding Style introduced by
Closing parenthesis of a multi-line IF statement must be on a new line
Loading history...
119
            $result = self::extractTextFromNeo($field);
120
        } elseif ($field instanceof TagQuery
121
            || (\is_array($field) && $field[0] instanceof Tag)) {
0 ignored issues
show
Coding Style introduced by
Closing parenthesis of a multi-line IF statement must be on a new line
Loading history...
122
            $result = self::extractTextFromTags($field);
123
        } else {
124
            if (\is_array($field)) {
125
                $result = strip_tags((string)$field[0]);
126
            } else {
127
                $result = strip_tags((string)$field);
128
            }
129
        }
130
131
        return $result;
132
    }
133
134
    /**
135
     * Extract concatenated text from all of the tags in the $tagElement and
136
     * return as a comma-delimited string
137
     *
138
     * @param TagQuery|Tag[] $tags
0 ignored issues
show
Coding Style introduced by
Missing parameter comment
Loading history...
139
     *
140
     * @return string
141
     */
142
    public static function extractTextFromTags($tags): string
143
    {
144
        if (empty($tags)) {
145
            return '';
146
        }
147
        $result = '';
148
        // Iterate through all of the matrix blocks
149
        if ($tags instanceof TagQuery) {
150
            $tags = $tags->all();
151
        }
152
        foreach ($tags as $tag) {
153
            $result .= $tag->title.', ';
154
        }
155
        $result = rtrim($result, ', ');
156
157
        return $result;
158
    }
159
160
    /**
161
     * Extract text from all of the blocks in a matrix field, concatenating it
162
     * together.
163
     *
164
     * @param MatrixBlockQuery|MatrixBlock[] $blocks
0 ignored issues
show
Coding Style introduced by
Missing parameter comment
Loading history...
165
     * @param string                         $fieldHandle
0 ignored issues
show
Coding Style introduced by
Missing parameter comment
Loading history...
166
     *
167
     * @return string
168
     */
169
    public static function extractTextFromMatrix($blocks, $fieldHandle = ''): string
170
    {
171
        if (empty($blocks)) {
172
            return '';
173
        }
174
        $result = '';
175
        // Iterate through all of the matrix blocks
176
        if ($blocks instanceof MatrixBlockQuery) {
177
            $blocks = $blocks->all();
178
        }
179
        foreach ($blocks as $block) {
180
            try {
181
                $matrixBlockTypeModel = $block->getType();
182
            } catch (InvalidConfigException $e) {
183
                $matrixBlockTypeModel = null;
184
            }
185
            // Find any text fields inside of the matrix block
186
            if ($matrixBlockTypeModel) {
187
                $fieldClasses = FieldHelper::FIELD_CLASSES[FieldHelper::TEXT_FIELD_CLASS_KEY];
188
                $fields = $matrixBlockTypeModel->getFields();
189
190
                foreach ($fields as $field) {
191
                    /** @var array $fieldClasses */
0 ignored issues
show
Coding Style introduced by
The open comment tag must be the only content on the line
Loading history...
Coding Style introduced by
The close comment tag must be the only content on the line
Loading history...
192
                    foreach ($fieldClasses as $fieldClassKey) {
193
                        if ($field instanceof $fieldClassKey) {
194
                            if ($field->handle === $fieldHandle || empty($fieldHandle)) {
195
                                $result .= self::extractTextFromField($block[$field->handle]).' ';
196
                            }
197
                        }
198
                    }
199
                }
200
            }
201
        }
202
203
        return $result;
204
    }
205
206
    /**
207
     * Extract text from all of the blocks in a Neo field, concatenating it
208
     * together.
209
     *
210
     * @param NeoBlockQuery|NeoBlock[] $blocks
0 ignored issues
show
Coding Style introduced by
Missing parameter comment
Loading history...
211
     * @param string                         $fieldHandle
0 ignored issues
show
Coding Style introduced by
Missing parameter comment
Loading history...
Coding Style introduced by
Expected 19 spaces after parameter type; 25 found
Loading history...
212
     *
213
     * @return string
214
     */
215
    public static function extractTextFromNeo($blocks, $fieldHandle = ''): string
216
    {
217
        if (empty($blocks)) {
218
            return '';
219
        }
220
        $result = '';
221
        // Iterate through all of the matrix blocks
222
        if ($blocks instanceof NeoBlockQuery) {
223
            $blocks = $blocks->all();
224
        }
225
        foreach ($blocks as $block) {
226
            try {
227
                $neoBlockTypeModel = $block->getType();
228
            } catch (InvalidConfigException $e) {
229
                $neoBlockTypeModel = null;
230
            }
231
            // Find any text fields inside of the matrix block
232
            if ($neoBlockTypeModel) {
233
                $fieldClasses = FieldHelper::FIELD_CLASSES[FieldHelper::TEXT_FIELD_CLASS_KEY];
234
                $fields = $neoBlockTypeModel->getFields();
235
236
                foreach ($fields as $field) {
237
                    /** @var array $fieldClasses */
0 ignored issues
show
Coding Style introduced by
The open comment tag must be the only content on the line
Loading history...
Coding Style introduced by
The close comment tag must be the only content on the line
Loading history...
238
                    foreach ($fieldClasses as $fieldClassKey) {
239
                        if ($field instanceof $fieldClassKey) {
240
                            if ($field->handle === $fieldHandle || empty($fieldHandle)) {
241
                                $result .= self::extractTextFromField($block[$field->handle]).' ';
242
                            }
243
                        }
244
                    }
245
                }
246
            }
247
        }
248
249
        return $result;
250
    }
251
252
    /**
253
     * Return the most important keywords extracted from the text as a comma-
254
     * delimited string
255
     *
256
     * @param string $text
0 ignored issues
show
Coding Style introduced by
Missing parameter comment
Loading history...
257
     * @param int    $limit
0 ignored issues
show
Coding Style introduced by
Missing parameter comment
Loading history...
258
     * @param bool   $useStopWords
0 ignored issues
show
Coding Style introduced by
Missing parameter comment
Loading history...
259
     *
260
     * @return string
261
     */
262
    public static function extractKeywords($text, $limit = 15, $useStopWords = true): string
263
    {
264
        if (empty($text)) {
265
            return '';
266
        }
267
        $api = new TextRankFacade();
268
        // Set the stop words that should be ignored
269
        if ($useStopWords) {
270
            $language = strtolower(substr(Seomatic::$language, 0, 2));
271
            $stopWords = self::stopWordsForLanguage($language);
272
            if ($stopWords !== null) {
273
                $api->setStopWords($stopWords);
274
            }
275
        }
276
        // Array of the most important keywords:
277
        $keywords = $api->getOnlyKeyWords(self::cleanupText($text));
278
279
        // If it's empty, just return the text
280
        if (empty($keywords)) {
281
            return $text;
282
        }
283
284
        return \is_array($keywords)
0 ignored issues
show
introduced by
The condition is_array($keywords) is always true.
Loading history...
285
            ? implode(', ', \array_slice(array_keys($keywords), 0, $limit))
286
            : (string)$keywords;
287
    }
288
289
    /**
290
     * Extract a summary consisting of the 3 most important sentences from the
291
     * text
292
     *
293
     * @param string $text
0 ignored issues
show
Coding Style introduced by
Missing parameter comment
Loading history...
294
     * @param bool   $useStopWords
0 ignored issues
show
Coding Style introduced by
Missing parameter comment
Loading history...
295
     *
296
     * @return string
297
     */
298
    public static function extractSummary($text, $useStopWords = true): string
299
    {
300
        if (empty($text)) {
301
            return '';
302
        }
303
        $api = new TextRankFacade();
304
        // Set the stop words that should be ignored
305
        if ($useStopWords) {
306
            $language = strtolower(substr(Seomatic::$language, 0, 2));
307
            $stopWords = self::stopWordsForLanguage($language);
308
            if ($stopWords !== null) {
309
                $api->setStopWords($stopWords);
310
            }
311
        }
312
        // Array of the most important keywords:
313
        $sentences = $api->getHighlights(self::cleanupText($text));
314
315
        // If it's empty, just return the text
316
        if (empty($sentences)) {
317
            return $text;
318
        }
319
320
        return \is_array($sentences)
0 ignored issues
show
introduced by
The condition is_array($sentences) is always true.
Loading history...
321
            ? implode(' ', $sentences)
322
            : (string)$sentences;
323
    }
324
325
    /**
326
     * Clean up the passed in text by converting it to UTF-8, stripping tags,
327
     * removing whitespace, and decoding HTML entities
328
     *
329
     * @param string $text
0 ignored issues
show
Coding Style introduced by
Missing parameter comment
Loading history...
330
     *
331
     * @return string
332
     */
333
    public static function cleanupText($text): string
334
    {
335
        if (empty($text)) {
336
            return '';
337
        }
338
        // Convert to UTF-8
339
        if (\function_exists('iconv')) {
340
            $text = iconv(mb_detect_encoding($text, mb_detect_order(), true), 'UTF-8//IGNORE', $text);
341
        } else {
342
            ini_set('mbstring.substitute_character', 'none');
343
            $text = mb_convert_encoding($text, 'UTF-8', 'UTF-8');
344
        }
345
        // Strip HTML tags
346
        $text = strip_tags($text);
347
        // Remove excess whitespace
348
        $text = preg_replace('/\s{2,}/u', ' ', $text);
349
        // Decode any HTML entities
350
        $text = html_entity_decode($text);
351
352
        return $text;
353
    }
354
355
    // Protected Static Methods
356
    // =========================================================================
357
358
    /**
359
     * @param string $language
0 ignored issues
show
Coding Style introduced by
Missing parameter comment
Loading history...
360
     *
361
     * @return null|StopWordsAbstract
362
     */
363
    protected static function stopWordsForLanguage(string $language)
364
    {
365
        $stopWords = null;
366
        if (!empty(self::LANGUAGE_MAP[$language])) {
367
            $language = self::LANGUAGE_MAP[$language];
368
        } else {
369
            $language = 'English';
370
        }
371
372
        $className = 'PhpScience\\TextRank\\Tool\\StopWords\\'.ucfirst($language);
373
        if (class_exists($className)) {
374
            $stopWords = new $className;
375
        }
376
377
        return $stopWords;
378
    }
379
}
380