Completed
Branch master (1f2f65)
by Billie
03:02
created

Parser   A

Complexity

Total Complexity 18

Size/Duplication

Total Lines 189
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 0

Test Coverage

Coverage 100%

Importance

Changes 1
Bugs 0 Features 0
Metric Value
wmc 18
c 1
b 0
f 0
lcom 1
cbo 0
dl 0
loc 189
ccs 59
cts 59
cp 1
rs 10

10 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 6 1
A startElement() 0 10 2
A blockBegin() 0 6 1
A appendBlockText() 0 4 1
A setBlockText() 0 5 1
A getBlockText() 0 4 1
C endElement() 0 31 8
A blockFinished() 0 8 1
A characterData() 0 4 1
A getText() 0 7 1
1
<?php
2
3
namespace PurpleBooth;
4
5
/**
6
 * This class is designed to by hooked into an XML parser to convert HTML to text.
7
 *
8
 * @see     xml_parser_create()
9
 * @see     HtmlStripperExtension
10
 *
11
 * @package PurpleBooth
12
 */
13
class Parser
14
{
15
16
    /**
17
     * @var string
18
     */
19
    private $text = "";
20
21
    /**
22
     * @var \SplStack
23
     */
24
    private $transformedTextStack;
25
26
    /**
27
     * @var \SplStack
28
     */
29
    private $blockTypeStack;
30
31
    /**
32
     * @var \SplStack
33
     */
34
    private $blockAttributesStack;
35
36
    /**
37
     * Parser constructor.
38
     *
39
     * Sets up the stacks
40
     */
41 6
    public function __construct()
42
    {
43 6
        $this->transformedTextStack = new \SplStack();
44 6
        $this->blockTypeStack       = new \SplStack();
45 6
        $this->blockAttributesStack = new \SplStack();
46 6
    }
47
48
    /**
49
     * Function called on the start of an element
50
     *
51
     * Mostly used to prepend text to things, like the "*"s on LIs
52
     *
53
     * @see xml_set_element_handler()
54
     *
55
     * @param resource $parser
56
     * @param string   $name
57
     * @param array    $attrs
58
     */
59 6
    public function startElement($parser, $name, $attrs)
60
    {
61 6
        $this->blockBegin($name, $attrs);
62
63
        switch ($name) {
64 6
            case "LI":
65 2
                $this->appendBlockText("* ");
66 2
                break;
67
        }
68 6
    }
69
70
    /**
71
     * Called when we begin a block.
72
     *
73
     * We build a series of stacks to represent the tree of the document
74
     *
75
     * We operate at this level of the stack when we're editing content
76
     *
77
     * @param string $name
78
     * @param array  $attributes
79
     */
80 6
    private function blockBegin($name, $attributes)
81
    {
82 6
        $this->transformedTextStack->push("");
83 6
        $this->blockTypeStack->push($name);
84 6
        $this->blockAttributesStack->push($attributes);
85 6
    }
86
87
    /**
88
     * Append some text to the current level of the stack
89
     *
90
     * @param string $value
91
     */
92 6
    private function appendBlockText($value)
93
    {
94 6
        $this->setBlockText($this->getBlockText() . $value);
95 6
    }
96
97
    /**
98
     * Set the text for a block
99
     *
100
     * @param string $value
101
     */
102 6
    private function setBlockText($value)
103
    {
104 6
        $this->transformedTextStack->pop();
105 6
        $this->transformedTextStack->push($value);
106 6
    }
107
108
    /**
109
     * Get the current text that's in this block
110
     *
111
     * @return string
112
     */
113 6
    private function getBlockText()
114
    {
115 6
        return $this->transformedTextStack->top();
116
    }
117
118
    /**
119
     * When we reach a closing element do something
120
     *
121
     * This is mostly used to add stuff to the end of a statement, like putting new lines where div tags close
122
     *
123
     * @see xml_set_element_handler()
124
     *
125
     * @param resource $parser
126
     * @param string   $name
127
     */
128 6
    public function endElement($parser, $name)
129
    {
130
        switch ($name) {
131 6
            case "P":
132 2
                $this->appendBlockText("\n\n");
133 2
                break;
134 6
            case "UL":
135 2
                $this->appendBlockText("\n\n");
136 2
                break;
137 6
            case "LI":
138 2
                $this->appendBlockText("\n");
139 2
                break;
140 6
            case "DIV":
141 2
                $this->appendBlockText("\n\n\n");
142 2
                break;
143 6
            case "A":
144 1
                $attrs = $this->blockAttributesStack->top();
145
146 1
                if (isset($attrs['HREF'])) {
147 1
                    $this->appendBlockText(" ({$attrs['HREF']})");
148
                }
149
        }
150
151 6
        $blockContent = $this->blockFinished();
152
153 6
        if (count($this->transformedTextStack)) {
154 6
            $this->appendBlockText($blockContent);
155
        } else {
156 6
            $this->text .= $blockContent;
157
        }
158 6
    }
159
160
    /**
161
     * Get the transformed text off the stack, and clear down the other stacks
162
     *
163
     * @return string
164
     */
165 6
    private function blockFinished()
166
    {
167 6
        $transformedText = $this->transformedTextStack->pop();
168 6
        $this->blockTypeStack->pop();
169 6
        $this->blockAttributesStack->pop();
170
171 6
        return $transformedText;
172
    }
173
174
    /**
175
     * This converts character data to human character data
176
     *
177
     * Primarily this is used for removing newlines and replacing them with spaces.
178
     *
179
     * @see xml_set_character_data_handler()
180
     *
181
     * @param resource $parser
182
     * @param string   $data
183
     */
184 6
    public function characterData($parser, $data)
185
    {
186 6
        $this->appendBlockText(str_replace("\n", " ", $data));
187 6
    }
188
189
    /**
190
     * This gets the text that has been parsed and returns it
191
     *
192
     * @return string
193
     */
194 6
    public function getText()
195
    {
196 6
        $text  = trim($this->text);
197 6
        $lines = explode("\n", $text);
198
199 6
        return implode("\n", array_map("trim", $lines));
200
    }
201
}
202