Html   A
last analyzed

Complexity

Total Complexity 21

Size/Duplication

Total Lines 179
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 0

Importance

Changes 0
Metric Value
wmc 21
c 0
b 0
f 0
lcom 1
cbo 0
dl 0
loc 179
rs 10

6 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 12 1
A purify() 0 16 4
A validateOptions() 0 6 3
B convertLinks() 0 45 6
A removeTagDuplicates() 0 11 1
B convertCode() 0 34 6
1
<?php
2
3
namespace Sugarcrm\UpgradeSpec\Purifier;
4
5
class Html implements PurifierInterface
6
{
7
    /**
8
     * @var string
9
     */
10
    private $baseUrl;
11
12
    /**
13
     * @var array
14
     */
15
    private $options;
16
17
    /**
18
     * Html constructor.
19
     *
20
     * @param string $baseUrl
21
     * @param array  $options
22
     */
23
    public function __construct($baseUrl = '', array $options = [])
24
    {
25
        $this->baseUrl = $baseUrl;
26
27
        $this->options = array_merge([
28
            'absolute_urls' => false,
29
            'no_tag_duplicates' => false,
30
            'pre_to_code' => false,
31
        ], $options);
32
33
        $this->validateOptions($baseUrl);
0 ignored issues
show
Unused Code introduced by
The call to Html::validateOptions() has too many arguments starting with $baseUrl.

This check compares calls to functions or methods with their respective definitions. If the call has more arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress.

In this case you can add the @ignore PhpDoc annotation to the duplicate definition and it will be ignored.

Loading history...
34
    }
35
36
    /**
37
     * Purifies html.
38
     *
39
     * @param $html
40
     *
41
     * @return string
42
     */
43
    public function purify($html)
44
    {
45
        if ($this->options['absolute_urls']) {
46
            $html = $this->convertLinks($html);
0 ignored issues
show
Coding Style introduced by
Consider using a different name than the parameter $html. This often makes code more readable.
Loading history...
47
        }
48
49
        if ($this->options['no_tag_duplicates']) {
50
            $html = $this->removeTagDuplicates($html);
0 ignored issues
show
Coding Style introduced by
Consider using a different name than the parameter $html. This often makes code more readable.
Loading history...
51
        }
52
53
        if ($this->options['pre_to_code']) {
54
            $html = $this->convertCode($html);
0 ignored issues
show
Coding Style introduced by
Consider using a different name than the parameter $html. This often makes code more readable.
Loading history...
55
        }
56
57
        return $html;
58
    }
59
60
    /**
61
     * Validates options.
62
     */
63
    private function validateOptions()
64
    {
65
        if (empty($this->baseUrl) && $this->options['absolute_urls']) {
66
            throw new \InvalidArgumentException('"absolute_urls" requires not empty base url');
67
        }
68
    }
69
70
    /**
71
     * Converts all relative links (@href) to absolute ones.
72
     *
73
     * @param $content
74
     *
75
     * @return mixed
76
     */
77
    private function convertLinks($content)
78
    {
79
        // href pattern
80
        $pattern = '/(?<=href=("|\'))[^"\']+(?=("|\'))/';
81
82
        $base = $this->baseUrl;
83
        $host = parse_url($base, PHP_URL_HOST);
84
        $path = parse_url($base, PHP_URL_PATH);
85
        $scheme = parse_url($base, PHP_URL_SCHEME);
86
87
        return preg_replace_callback($pattern, function ($matches) use ($base, $scheme, $host, $path) {
88
            $hrefValue = $matches[0];
89
90
            if (mb_strpos($hrefValue, '//') === 0) {
91
                return $scheme . ':' . $hrefValue;
92
            }
93
94
            // return if already absolute URL
95
            if (parse_url($hrefValue, PHP_URL_SCHEME) != '') {
96
                return $hrefValue;
97
            }
98
99
            // queries and anchors
100
            if ($hrefValue[0] == '#' || $hrefValue[0] == '?') {
101
                return $base . $hrefValue;
102
            }
103
104
            // remove non-directory element from path
105
            $path = preg_replace('#/[^/]*$#', '', $path);
0 ignored issues
show
Bug introduced by
Consider using a different name than the imported variable $path, or did you forget to import by reference?

It seems like you are assigning to a variable which was imported through a use statement which was not imported by reference.

For clarity, we suggest to use a different name or import by reference depending on whether you would like to have the change visibile in outer-scope.

Change not visible in outer-scope

$x = 1;
$callable = function() use ($x) {
    $x = 2; // Not visible in outer scope. If you would like this, how
            // about using a different variable name than $x?
};

$callable();
var_dump($x); // integer(1)

Change visible in outer-scope

$x = 1;
$callable = function() use (&$x) {
    $x = 2;
};

$callable();
var_dump($x); // integer(2)
Loading history...
106
107
            // destroy path if relative url points to root
108
            if ($hrefValue[0] == '/') {
109
                $path = '';
0 ignored issues
show
Bug introduced by
Consider using a different name than the imported variable $path, or did you forget to import by reference?

It seems like you are assigning to a variable which was imported through a use statement which was not imported by reference.

For clarity, we suggest to use a different name or import by reference depending on whether you would like to have the change visibile in outer-scope.

Change not visible in outer-scope

$x = 1;
$callable = function() use ($x) {
    $x = 2; // Not visible in outer scope. If you would like this, how
            // about using a different variable name than $x?
};

$callable();
var_dump($x); // integer(1)

Change visible in outer-scope

$x = 1;
$callable = function() use (&$x) {
    $x = 2;
};

$callable();
var_dump($x); // integer(2)
Loading history...
110
            }
111
112
            // dirty absolute URL
113
            $abs = $host . $path . '/' . $hrefValue;
114
115
            // replace '//', '/./', '/foo/../' with '/'
0 ignored issues
show
Unused Code Comprehensibility introduced by
43% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
116
            $abs = preg_replace('/\/[^\/]+\/\.\.\//', '/', str_replace(['//', '/./'], '/', $abs));
117
118
            // absolute URL is ready
119
            return  $scheme . '://' . $abs;
120
        }, $content);
121
    }
122
123
    /**
124
     * Removes duplicated tags.
125
     *
126
     * @param $content
127
     *
128
     * @return mixed
129
     */
130
    private function removeTagDuplicates($content)
131
    {
132
        // strong -> b
133
        $content = str_replace(['<strong>', '</strong>'], ['<b>', '</b>'], $content);
0 ignored issues
show
Coding Style introduced by
Consider using a different name than the parameter $content. This often makes code more readable.
Loading history...
134
135
        // unite duplicates
136
        $content = preg_replace('/(<\/b>\s*)+/', '</b> ', preg_replace('/(<b>\s*)+/', '<b>', $content));
0 ignored issues
show
Coding Style introduced by
Consider using a different name than the parameter $content. This often makes code more readable.
Loading history...
137
138
        // cleanup
139
        return preg_replace('/(<\/b>\s*<b>)+/', ' ', $content);
140
    }
141
142
    /**
143
     * Converts "pre" to "code".
144
     *
145
     * @param $content
146
     *
147
     * @return mixed
148
     */
149
    private function convertCode($content)
150
    {
151
        // strpos with array support
152
        $strposa = function ($haystack, $needles = []) {
153
            $chr = [];
154
            foreach ($needles as $needle) {
155
                $res = mb_strpos($haystack, $needle);
156
                if ($res !== false) {
157
                    $chr[$needle] = $res;
158
                }
159
            }
160
161
            if (empty($chr)) {
162
                return false;
163
            }
164
165
            return min($chr);
166
        };
167
168
        $content = str_replace(['<pre>', '<pre ', '</pre>'], ['<code>', '<code ', '</code>'], $content);
0 ignored issues
show
Coding Style introduced by
Consider using a different name than the parameter $content. This often makes code more readable.
Loading history...
169
170
        return preg_replace_callback('/<code(.*?)>(.*?)<\/code>/s', function ($matches) use ($strposa) {
171
            $noLineBreaks = str_replace(["\r\n", "\r", "\n"], '<br />', $matches[0]);
172
            $code = str_replace(['<br></br>', '<br>', '<br/>', '<br />'], PHP_EOL, $noLineBreaks);
173
174
            // if multiline or real code snippet
175
            if (false !== $strposa($code, ['function', 'class', 'array'])
176
                || false !== mb_strpos($code, PHP_EOL)) {
177
                return '<br />' . $code . '<br />';
178
            }
179
180
            return $code;
181
        }, $content);
182
    }
183
}
184