PorterStemmer   A
last analyzed

Complexity

Total Complexity 5

Size/Duplication

Total Lines 82
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 0

Test Coverage

Coverage 0%

Importance

Changes 2
Bugs 0 Features 1
Metric Value
wmc 5
c 2
b 0
f 1
lcom 1
cbo 0
dl 0
loc 82
ccs 0
cts 35
cp 0
rs 10

3 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 5 1
A stem() 0 12 3
B slugify() 0 36 1
1
<?php
2
3
namespace TreeHouse\Model\Config\Matcher\Stemmer;
4
5
class PorterStemmer implements StemmerInterface
6
{
7
    /**
8
     * @var string
9
     */
10
    protected $language;
11
12
    /**
13
     * @var string
14
     */
15
    protected $encoding;
16
17
    /**
18
     * @param string $language
19
     * @param string $encoding
20
     */
21
    public function __construct($language = 'dutch', $encoding = 'UTF_8')
22
    {
23
        $this->language = $language;
24
        $this->encoding = $encoding;
25
    }
26
27
    /**
28
     * @param string $word
29
     *
30
     * @return string
31
     */
32
    public function stem($word)
33
    {
34
        if (function_exists('stemmer_stem_word')) {
35
            $func = 'stemmer_stem_word';
36
        } elseif (function_exists('stemword')) {
37
            $func = 'stemword';
38
        } else {
39
            throw new \RuntimeException('The php-stemmer extension was not installed/configured properly');
40
        }
41
42
        return $func($this->slugify($word), $this->language, $this->encoding);
43
    }
44
45
    /**
46
     * @param string $slug
47
     *
48
     * @return string
49
     */
50
    protected function slugify($slug)
51
    {
52
        // replace entities with their ascii equivalents
53
        $slug = html_entity_decode(
54
            preg_replace(
55
                '/&([a-z]{1,2})(grave|acute|cedil|circ|ring|tilde|uml|lig|slash|caron|nof|orn|th);/i',
56
                '$1',
57
                htmlentities($slug, null, 'UTF-8')
58
            ),
59
            null,
60
            'UTF-8'
61
        );
62
63
        // convert to lowerspace
64
        $slug = mb_strtolower($slug);
65
66
        // perform iconv transliteration, this should cover just about every special character remaining
67
        $slug = iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $slug);
68
69
        // string is now in ascii
70
71
        // convert illegal characters to spaces
72
        $slug = preg_replace('/[^a-z0-9\_\-\s]/', ' ', $slug);
73
74
        // trim
75
        $slug = trim($slug);
76
77
        // convert remaining unwanted characters to dashes
78
        $slug = preg_replace('/[^a-z0-9]/', '-', $slug);
79
80
        // convert 2 or more consecutive dashes to one
81
        $slug = preg_replace('/\-{2,}/', '-', $slug);
82
83
        // return rawurlencoded (just to be sure)
84
        return rawurlencode($slug);
85
    }
86
}
87