LanguageProcessingUtility   A
last analyzed

Complexity

Total Complexity 6

Size/Duplication

Total Lines 54
Duplicated Lines 0 %

Coupling/Cohesion

Components 0
Dependencies 4

Test Coverage

Coverage 100%

Importance

Changes 0
Metric Value
wmc 6
lcom 0
cbo 4
dl 0
loc 54
ccs 15
cts 15
cp 1
rs 10
c 0
b 0
f 0

3 Methods

Rating   Name   Duplication   Size   Complexity  
A stem() 0 8 1
A removeStopWords() 0 15 4
A removePunctuations() 0 6 1
1
<?php
2
3
namespace Botonomous\utility;
4
5
use Botonomous\Dictionary;
6
use NlpTools\Stemmers\PorterStemmer;
7
use NlpTools\Tokenizers\WhitespaceTokenizer;
8
9
/**
10
 * Class LanguageProcessingUtility.
11
 */
12
class LanguageProcessingUtility extends AbstractUtility
13
{
14
    /**
15
     * @param $text
16
     *
17
     * @return string
18
     */
19 2
    public function stem($text)
20
    {
21 2
        $tokens = (new WhitespaceTokenizer())->tokenize($text);
22
23 2
        $stemmed = (new PorterStemmer())->stemAll($tokens);
24
25 2
        return implode(' ', $stemmed);
26
    }
27
28
    /**
29
     * @param $text
30
     * @param string $language
31
     *
32
     * @throws \Exception
33
     *
34
     * @return string
35
     */
36 1
    public function removeStopWords($text, $language = 'en')
37
    {
38 1
        $stopWords = (new Dictionary())->get('stopwords-'.$language);
39
40 1
        $words = explode(' ', $text);
41 1
        if (!empty($words)) {
42 1
            foreach ($words as $key => $word) {
43 1
                if (in_array($word, $stopWords)) {
44 1
                    unset($words[$key]);
45
                }
46
            }
47
        }
48
49 1
        return implode(' ', $words);
50
    }
51
52
    /**
53
     * @param $text
54
     *
55
     * @throws \Exception
56
     *
57
     * @return string
58
     */
59 1
    public function removePunctuations($text)
60
    {
61 1
        $punctuations = (new Dictionary())->get('punctuations');
62
63 1
        return str_replace($punctuations, '', $text);
64
    }
65
}
66