Parser::setBlockText()   A
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 5

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 4
CRAP Score 1

Importance

Changes 0
Metric Value
cc 1
nc 1
nop 1
dl 0
loc 5
ccs 4
cts 4
cp 1
crap 1
rs 10
c 0
b 0
f 0
1
<?php
2
3
/*
4
 * Copyright (C) 2016 Billie Thompson
5
 *
6
 * This software may be modified and distributed under the terms
7
 * of the MIT license.  See the LICENSE file for details.
8
 */
9
10
namespace PurpleBooth;
11
12
/**
13
 * This class is designed to by hooked into an XML parser to convert HTML to text.
14
 *
15
 * @see     xml_parser_create()
16
 * @see     HtmlStripperExtension
17
 */
18
class Parser
19
{
20
    /**
21
     * @var string
22
     */
23
    private $text = '';
24
25
    /**
26
     * @var \SplStack
27
     */
28
    private $transformedTextStack;
29
30
    /**
31
     * @var \SplStack
32
     */
33
    private $blockTypeStack;
34
35
    /**
36
     * @var \SplStack
37
     */
38
    private $blockAttributesStack;
39
40
    /**
41
     * Parser constructor.
42
     *
43
     * Sets up the stacks
44
     */
45 6
    public function __construct()
46
    {
47 6
        $this->transformedTextStack = new \SplStack();
48 6
        $this->blockTypeStack = new \SplStack();
49 6
        $this->blockAttributesStack = new \SplStack();
50 6
    }
51
52
    /**
53
     * Function called on the start of an element.
54
     *
55
     * Mostly used to prepend text to things, like the "*"s on LIs
56
     *
57
     * @see xml_set_element_handler()
58
     *
59
     * @param resource $parser
60
     * @param string   $name
61
     * @param array    $attrs
62
     */
63 6
    public function startElement($parser, $name, $attrs)
64
    {
65 6
        $this->blockBegin($name, $attrs);
66
67
        switch ($name) {
68 6
            case 'LI':
69 2
                $this->appendBlockText('* ');
70
71 2
                break;
72
        }
73 6
    }
74
75
    /**
76
     * Called when we begin a block.
77
     *
78
     * We build a series of stacks to represent the tree of the document
79
     *
80
     * We operate at this level of the stack when we're editing content
81
     *
82
     * @param string $name
83
     * @param array  $attributes
84
     */
85 6
    private function blockBegin($name, $attributes)
86
    {
87 6
        $this->transformedTextStack->push('');
88 6
        $this->blockTypeStack->push($name);
89 6
        $this->blockAttributesStack->push($attributes);
90 6
    }
91
92
    /**
93
     * Append some text to the current level of the stack.
94
     *
95
     * @param string $value
96
     */
97 6
    private function appendBlockText($value)
98
    {
99 6
        $this->setBlockText($this->getBlockText().$value);
100 6
    }
101
102
    /**
103
     * Set the text for a block.
104
     *
105
     * @param string $value
106
     */
107 6
    private function setBlockText($value)
108
    {
109 6
        $this->transformedTextStack->pop();
110 6
        $this->transformedTextStack->push($value);
111 6
    }
112
113
    /**
114
     * Get the current text that's in this block.
115
     *
116
     * @return string
117
     */
118 6
    private function getBlockText()
119
    {
120 6
        return $this->transformedTextStack->top();
121
    }
122
123
    /**
124
     * When we reach a closing element do something.
125
     *
126
     * This is mostly used to add stuff to the end of a statement, like putting new lines where div tags close
127
     *
128
     * @see xml_set_element_handler()
129
     *
130
     * @param resource $parser
131
     * @param string   $name
132
     */
133 6
    public function endElement($parser, $name)
134
    {
135
        switch ($name) {
136 6
            case 'P':
137 2
                $this->appendBlockText("\n\n");
138
139 2
                break;
140 6
            case 'UL':
141 2
                $this->appendBlockText("\n\n");
142
143 2
                break;
144 6
            case 'LI':
145 2
                $this->appendBlockText("\n");
146
147 2
                break;
148 6
            case 'DIV':
149 2
                $this->appendBlockText("\n\n\n");
150
151 2
                break;
152 6
            case 'A':
153 1
                $attrs = $this->blockAttributesStack->top();
154
155 1
                if (isset($attrs['HREF'])) {
156 1
                    $this->appendBlockText(" ({$attrs['HREF']})");
157
                }
158
        }
159
160 6
        $blockContent = $this->blockFinished();
161
162 6
        if (count($this->transformedTextStack)) {
163 6
            $this->appendBlockText($blockContent);
164
        } else {
165 6
            $this->text .= $blockContent;
166
        }
167 6
    }
168
169
    /**
170
     * Get the transformed text off the stack, and clear down the other stacks.
171
     *
172
     * @return string
173
     */
174 6
    private function blockFinished()
175
    {
176 6
        $transformedText = $this->transformedTextStack->pop();
177 6
        $this->blockTypeStack->pop();
178 6
        $this->blockAttributesStack->pop();
179
180 6
        return $transformedText;
181
    }
182
183
    /**
184
     * This converts character data to human character data.
185
     *
186
     * Primarily this is used for removing newlines and replacing them with spaces.
187
     *
188
     * @see xml_set_character_data_handler()
189
     *
190
     * @param resource $parser
191
     * @param string   $data
192
     */
193 6
    public function characterData($parser, $data)
194
    {
195 6
        $this->appendBlockText(str_replace("\n", ' ', $data));
196 6
    }
197
198
    /**
199
     * This gets the text that has been parsed and returns it.
200
     *
201
     * @return string
202
     */
203 6
    public function getText()
204
    {
205 6
        $text = trim($this->text);
206 6
        $lines = explode("\n", $text);
207
208 6
        return implode("\n", array_map('trim', $lines));
209
    }
210
}
211