Issues (25)

src/Traits/NodeCommonTrait.php (1 issue)

Severity
1
<?php declare(strict_types=1);
2
3
namespace Goose\Traits;
4
5
use Goose\Utils\Helper;
6
use DOMWrap\Element;
7
8
/**
9
 * Node Common Trait
10
 *
11
 * @package Goose\Traits
12
 * @license http://www.apache.org/licenses/LICENSE-2.0 Apache License 2.0
13
 */
14
trait NodeCommonTrait {
15
    /**
16
     * Checks the density of links within a node, is there not much text and most of it contains linky shit?
17
     * if so it's no good
18
     *
19
     * @param Element $node
20
     * @param float $limit
21
     *
22
     * @return bool
23
     */
24
    private function isHighLinkDensity(Element $node, float $limit = 1.0): bool {
25
        $links = $node->find('a, [onclick]');
26
27
        if ($links->count() == 0) {
28
            return false;
29
        }
30
31
        $words = preg_split('@[\s]+@iu', $node->text(), -1, PREG_SPLIT_NO_EMPTY);
32
33
        if (!is_array($words) || empty($words)) {
0 ignored issues
show
The condition is_array($words) is always true.
Loading history...
34
            return false;
35
        }
36
37
        $sb = [];
38
        foreach ($links as $link) {
39
            $sb[] = Helper::textNormalise($link->text());
40
        }
41
42
        $linkText = implode('', $sb);
43
        $linkWords = explode(' ', $linkText);
44
        $numberOfLinkWords = count($linkWords);
45
        $numberOfLinks = $links->count();
46
        $linkDivisor = $numberOfLinkWords / count($words);
47
        $score = $linkDivisor * $numberOfLinks;
48
49
        if ($score >= $limit) {
50
            return true;
51
        }
52
53
        return false;
54
    }
55
}
56