StripHtmlStringFilter   A
last analyzed

Complexity

Total Complexity 6

Size/Duplication

Total Lines 106
Duplicated Lines 0 %

Coupling/Cohesion

Components 0
Dependencies 0

Importance

Changes 0
Metric Value
wmc 6
lcom 0
cbo 0
dl 0
loc 106
rs 10
c 0
b 0
f 0

3 Methods

Rating   Name   Duplication   Size   Complexity  
A filter() 0 22 1
A setNewlinesAfterClosingTags() 0 34 3
A limitConsecutiveNewlines() 0 15 2
1
<?php
2
3
namespace CultuurNet\UDB3\StringFilter;
4
5
class StripHtmlStringFilter implements StringFilterInterface
6
{
7
    /**
8
     * {@inheritdoc}
9
     */
10
    public function filter($string)
11
    {
12
        // Add one newline after each break tag.
13
        $string = $this->setNewlinesAfterClosingTags($string, 'br', 1, true);
14
15
        // Add two newlines after each closing paragraph tag.
16
        $string = $this->setNewlinesAfterClosingTags($string, 'p', 2);
17
18
        // Decode all HTML entities, like &amp;, so they are human-readable.
19
        $string = html_entity_decode($string);
20
21
        // Strip all HTML tags.
22
        $string = strip_tags($string);
23
24
        // Remove any excessive consecutive newlines.
25
        $string = $this->limitConsecutiveNewlines($string, 2);
26
27
        // Trim any whitespace or newlines from the start and/or end of the string.
28
        $string = trim($string);
29
30
        return $string;
31
    }
32
33
    /**
34
     * Sets a specific amount of newlines after each occurrence of a specific closing HTML tag.
35
     *
36
     * @param string $string
37
     *   String to set newlines in.
38
     * @param string $tag
39
     *   Label name. For example "br" to set a newline after each "<br />" or "<br>" (if self-closing flag is set), or
40
     *   "p" to set a newline after each "</p>" (if not self-closing).
41
     * @param int $newlineCount
42
     *   Amount of newlines to set after the closing tag. If any newlines are set already, they will be removed.
43
     * @param bool $selfClosing
44
     *   Indicates whether the tag is self-closing (<br />) or not (<p></p>).
45
     *
46
     * @return string
47
     *   Processed string.
48
     */
49
    protected function setNewlinesAfterClosingTags($string, $tag, $newlineCount = 1, $selfClosing = false)
50
    {
51
        // Start of the pattern.
52
        $pattern = '/';
53
54
        if ($selfClosing) {
55
            // Find the self-closing tag, including its attributes and optionally a closing slash.
56
            // .*? means: Get any characters, 0 or more, but non-greedy so stop when the first / or > is encountered.
57
            $pattern .= '(<' . $tag . '.*?[\\/]?>)';
58
        } else {
59
            // Find the closing tag.
60
            $pattern .= '(<\\/' . $tag . '>)';
61
        }
62
63
        // Capture any newlines after the tag as well.
64
        $pattern .= '([\\n]*)';
65
66
        // End of the pattern. Use i to make it case-insensitive, as HTML tags can be both uppercase and lowercase.
67
        $pattern .= '/i';
68
69
        // Append all pattern matches with a newline character (or more if specified).
70
        $newlines = '';
71
        for ($i = 0; $i < $newlineCount; $i++) {
72
            $newlines .= PHP_EOL;
73
        }
74
75
        // Loop over all matching tags from the string.
76
        return preg_replace_callback($pattern, function ($match) use ($newlines) {
77
            // Return the tag appended by the specified amount of newlines. Note that $match[0] is the full captured
78
            // match, so it also includes the newlines after the tag. $match[1] is just the tag itself, and $match[2]
79
            // are the newlines following it (if any).
80
            return $match[1] . $newlines;
81
        }, $string);
82
    }
83
84
    /**
85
     * Restricts the number of consecutive newlines in a specific string.
86
     *
87
     * @param string $string
88
     *   String to limit consecutive newlines in.
89
     * @param int $limit
90
     *   Limit of consecutive newlines. (Defaults to 2.)
91
     *
92
     * @return string
93
     *   Processed string.
94
     */
95
    protected function limitConsecutiveNewlines($string, $limit = 2)
96
    {
97
        // Pattern that finds any consecutive newlines that exceed the allowed limit.
98
        $exceeded = $limit + 1;
99
        $pattern = '/((\\n){' . $exceeded . ',})/';
100
101
        // Create a string with the maximum number of allowed newlines.
102
        $newlines = '';
103
        for ($i = 0; $i < $limit; $i++) {
104
            $newlines .= PHP_EOL;
105
        }
106
107
        // Find each match and replace it with the maximum number of newlines.
108
        return preg_replace($pattern, $newlines, $string);
109
    }
110
}
111