Analyzer   A
last analyzed

Complexity

Total Complexity 25

Size/Duplication

Total Lines 127
Duplicated Lines 0 %

Test Coverage

Coverage 93.22%

Importance

Changes 2
Bugs 1 Features 0
Metric Value
eloc 62
c 2
b 1
f 0
dl 0
loc 127
ccs 55
cts 59
cp 0.9322
rs 10
wmc 25

6 Methods

Rating   Name   Duplication   Size   Complexity  
A incrementWordNumber() 0 3 1
A cleanExpr() 0 19 4
C extract() 0 34 12
A __construct() 0 12 3
A get() 0 13 1
A exec() 0 18 4
1
<?php
2
3
namespace PiedWeb\TextAnalyzer;
4
5
class Analyzer
6
{
7
    protected $onlyInSentence;
8
    protected $expressionMaxWords;
9
    protected $keepTrail;
10
11
    protected $text;
12
    protected $expressions = [];
13
    protected $wordNumber = 0;
14
    protected $trail = [];
15
16
    /**
17
     * @param string|\simple_html_dom $text
18
     */
19 3
    public static function get(
20
        $text,
21
        bool $onlySentence = false,
22
        int $expressionMaxWords = 5,
23
        int $keepTrail = 3
24
    ) {
25 3
        $self = new self($text);
26
27 3
        $self->onlyInSentence = $onlySentence;
28 3
        $self->expressionMaxWords = $expressionMaxWords;
29 3
        $self->keepTrail = $keepTrail;
30
31 3
        return $self->exec();
32
    }
33
34
    /**
35
     * @param string|\simple_html_dom $text
36
     */
37 3
    protected function __construct($text)
38
    {
39 3
        $text = is_string($text) ? CleanText::stripHtmlTags($text) : CleanText::stipHtmlTagsFromDom($text);
40 3
        $text = CleanText::fixEncoding($text);
41
42 3
        $text = CleanText::removeDate($text);
43
44 3
        if ($this->onlyInSentence) {
45
            $text = CleanText::keepOnlySentence($text);
46
        }
47
48 3
        $this->text = $text;
49 3
    }
50
51 3
    protected function incrementWordNumber(int $value)
52
    {
53 3
        $this->wordNumber = $this->wordNumber + $value;
54 3
    }
55
56 3
    protected function exec()
57
    {
58 3
        if ($this->onlyInSentence) {
59
            $sentences = [];
60
            foreach (explode(chr(10), $this->text) as $paragraph) {
61
                $sentences = array_merge($sentences, CleanText::getSentences($paragraph));
62
            }
63
        } else {
64 3
            $sentences = explode(chr(10), trim($this->text));
65
        }
66
67 3
        foreach ($sentences as $sentence) {
68 3
            $this->extract($sentence);
69
        }
70
71 3
        arsort($this->expressions);
72
73 3
        return new Analysis($this->expressions, $this->wordNumber, $this->trail);
74
    }
75
76 3
    protected function extract(string $sentence)
77
    {
78 3
        $sentence = CleanText::removePunctuation($sentence);
79
80 3
        $words = explode(' ', trim(strtolower($sentence)));
81
82 3
        $wordsKey = array_keys($words);
83 3
        foreach ($wordsKey as $key) {
84 3
            for ($wordNumber = 1; $wordNumber <= $this->expressionMaxWords; ++$wordNumber) {
85 3
                $expression = '';
86 3
                for ($i = 0; $i < $wordNumber; ++$i) {
87 3
                    if (isset($words[$key + $i])) {
88
                        $expression .= ($i > 0 ? ' ' : '').$words[$key + $i];
89
                    }
90
                }
91 3
92
                $expression = $this->cleanExpr($expression, $wordNumber);
93 3
94 3
                if (empty($expression)
95 3
                    || ((substr_count($expression, ' ') + 1) != $wordNumber) // We avoid sur-pondération
96
                    || ! preg_match('/[a-z]/', $expression) // We avoid number or symbol only result
97 3
                ) {
98 3
                    if (1 === $wordNumber) {
99
                        $this->incrementWordNumber(-1);
100
                    }
101 3
                } else {
102 3
                    $plus = 1 + substr_count(CleanText::removeStopWords($expression), ' ');
103 3
                    $this->expressions[$expression] = (isset($this->expressions[$expression]) ?? 0) + $plus;
104 3
                    if ($this->keepTrail > 0 && $this->expressions[$expression] > $this->keepTrail) {
105
                        $this->trail[$expression][] = $sentence;
106
                    }
107
                }
108 3
            }
109
            $this->incrementWordNumber(1);
110 3
        }
111
    }
112 3
113
    protected function cleanExpr($expression, $wordNumber)
114 3
    {
115 3
        if ($wordNumber <= 2) {
116
            $expression = trim(CleanText::removeStopWords(' '.$expression.' '));
117 3
        } else {
118 3
            $expression = CleanText::removeStopWordsAtExtremity($expression);
119 3
            $expression = CleanText::removeStopWordsAtExtremity($expression);
120 3
            if (false === strpos($expression, ' ')) {
121
                $expression = trim(CleanText::removeStopWords(' '.$expression.' '));
122
            }
123
        }
124
125 3
        // Last Clean
126 3
        $expression = trim(preg_replace('/\s+/', ' ', $expression));
127 3
        if ('' == htmlentities($expression)) { //Avoid �
128
            $expression = '';
129
        }
130 3
131
        return $expression;
132
    }
133
}
134