|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
/** |
|
4
|
|
|
* @file HTMLConverter.php |
|
5
|
|
|
* @brief This file contains the HTMLConverter class. |
|
6
|
|
|
* @details |
|
7
|
|
|
* @author Filippo F. Fadda |
|
8
|
|
|
*/ |
|
9
|
|
|
|
|
10
|
|
|
|
|
11
|
|
|
namespace Converter; |
|
12
|
|
|
|
|
13
|
|
|
/** |
|
14
|
|
|
* @brief A rudimental converter that takes as input HTML and replaces tags with related BBCodes. |
|
15
|
|
|
* @details This converter doesn't touch the HTML inside pre or code tags. |
|
16
|
|
|
*/ |
|
17
|
|
|
class HTMLConverter extends Converter { |
|
18
|
|
|
protected $snippets = []; |
|
19
|
|
|
|
|
20
|
|
|
|
|
21
|
|
|
/** |
|
22
|
|
|
* @brief Finds all code snippets inside the body, replacing them with appropriate markers. |
|
23
|
|
|
* @details The code can be inside `<pre></pre>`, `<code></code>`, or `[code][/code]` in case you are using BBCode |
|
24
|
|
|
* markup language. |
|
25
|
|
|
*/ |
|
26
|
|
|
protected function removeSnippets() { |
|
27
|
|
|
$pattern = '%(?P<openpre><pre>)(?P<contentpre>[\W\D\w\s]*?)(?P<closepre></pre>)|(?P<opencode><code>)(?P<contentcode>[\W\D\w\s]*?)(?P<closecode></code>)|(?P<openbbcode>\[code=?\w*\])(?P<contentbbcode>[\W\D\w\s]*?)(?P<closebbcode>\[/code\])%iu'; |
|
28
|
|
|
|
|
29
|
|
|
if (preg_match_all($pattern, $this->text, $this->snippets)) { |
|
30
|
|
|
|
|
31
|
|
|
$pattern = '%<pre>[\W\D\w\s]*?</pre>|<code>[\W\D\w\s]*?</code>|\[code=?\w*\][\W\D\w\s]*?\[/code\]%iu'; |
|
32
|
|
|
|
|
33
|
|
|
// Replaces the code snippet with a special marker to be able to inject the code in place. |
|
34
|
|
|
$this->text = preg_replace($pattern, '___SNIPPET___', $this->text); |
|
35
|
|
|
} |
|
36
|
|
|
} |
|
37
|
|
|
|
|
38
|
|
|
|
|
39
|
|
|
/** |
|
40
|
|
|
* @brief Restores the snippets, converting the HTML tags to BBCode tags. |
|
41
|
|
|
*/ |
|
42
|
|
|
protected function restoreSnippets() { |
|
43
|
|
|
$snippetsCount = count($this->snippets[0]); |
|
44
|
|
|
|
|
45
|
|
|
for ($i = 0; $i < $snippetsCount; $i++) { |
|
46
|
|
|
// We try to determine which tags the code is inside: <pre></pre>, <code></code>, [code][/code] |
|
47
|
|
|
if (!empty($this->snippets['openpre'][$i])) |
|
48
|
|
|
$snippet = "[code]".PHP_EOL.trim($this->snippets['contentpre'][$i]).PHP_EOL."[/code]"; |
|
49
|
|
|
elseif (!empty($this->snippets['opencode'][$i])) |
|
50
|
|
|
$snippet = "[code]".PHP_EOL.trim($this->snippets['contentcode'][$i]).PHP_EOL."[/code]"; |
|
51
|
|
|
else |
|
52
|
|
|
$snippet = $this->snippets['openbbcode'][$i].PHP_EOL.trim($this->snippets['contentbbcode'][$i]).PHP_EOL.$this->snippets['closebbcode'][$i]; |
|
53
|
|
|
|
|
54
|
|
|
$this->text = preg_replace('/___SNIPPET___/', PHP_EOL.trim($snippet).PHP_EOL, $this->text, 1); |
|
55
|
|
|
} |
|
56
|
|
|
} |
|
57
|
|
|
|
|
58
|
|
|
|
|
59
|
|
|
/** |
|
60
|
|
|
* @brief Replace links. |
|
61
|
|
|
*/ |
|
62
|
|
|
protected function replaceLinks() { |
|
63
|
|
|
|
|
64
|
|
|
$this->text = preg_replace_callback('%<a[^>]+>(.+?)</a>%iu', |
|
65
|
|
|
|
|
66
|
|
|
function ($matches) { |
|
67
|
|
|
|
|
68
|
|
|
// Extracts the url. |
|
69
|
|
|
if (preg_match('/\s*href\s*=\s*("([^"]*")|\'[^\']*\'|([^\'">\s]+))/iu', $matches[0], $others) === 1) { |
|
70
|
|
|
$href = trim($others[1], '"'); |
|
71
|
|
|
|
|
72
|
|
|
// Extracts the target. |
|
73
|
|
View Code Duplication |
if (preg_match('/\s*target\s*=\s*("([^"]*")|\'[^\']*\'|([^\'">\s]+))/iu', $matches[0], $others) === 1) |
|
|
|
|
|
|
74
|
|
|
$target = strtolower(trim($others[1], '"')); |
|
75
|
|
|
else |
|
76
|
|
|
$target = "_self"; |
|
77
|
|
|
} |
|
78
|
|
|
else |
|
79
|
|
|
throw new \RuntimeException(sprintf("Text identified by '%d' has malformed links", $this->id)); |
|
80
|
|
|
|
|
81
|
|
|
return "[url=".$href." t=".$target."]".$matches[1]."[/url]"; |
|
82
|
|
|
|
|
83
|
|
|
}, |
|
84
|
|
|
|
|
85
|
|
|
$this->text |
|
86
|
|
|
); |
|
87
|
|
|
|
|
88
|
|
|
} |
|
89
|
|
|
|
|
90
|
|
|
|
|
91
|
|
|
/** |
|
92
|
|
|
* @brief Replace images. |
|
93
|
|
|
*/ |
|
94
|
|
|
protected function replaceImages() { |
|
95
|
|
|
$this->text = preg_replace_callback('/<img[^>]+>/iu', |
|
96
|
|
|
|
|
97
|
|
|
function ($matches) { |
|
98
|
|
|
|
|
99
|
|
|
// Extracts the src. |
|
100
|
|
View Code Duplication |
if (preg_match('/\s*src\s*=\s*("([^"]*")|\'[^\']*\'|([^\'">\s]+))/iu', $matches[0], $others) === 1) |
|
|
|
|
|
|
101
|
|
|
$src = trim($others[1], '"'); |
|
102
|
|
|
else |
|
103
|
|
|
throw new \RuntimeException(sprintf("Text identified by '%d' has malformed images", $this->id)); |
|
104
|
|
|
|
|
105
|
|
|
return "[img]".$src."[/img]"; |
|
106
|
|
|
|
|
107
|
|
|
}, |
|
108
|
|
|
|
|
109
|
|
|
$this->text |
|
110
|
|
|
); |
|
111
|
|
|
|
|
112
|
|
|
} |
|
113
|
|
|
|
|
114
|
|
|
|
|
115
|
|
|
/** |
|
116
|
|
|
* @brief Replace all other simple tags, even the lists. |
|
117
|
|
|
*/ |
|
118
|
|
|
protected function replaceOtherTags() { |
|
119
|
|
|
$this->text = preg_replace_callback('%</?[a-z][a-z0-9]*[^<>]*>%iu', |
|
120
|
|
|
|
|
121
|
|
|
function ($matches) { |
|
122
|
|
|
$tag = strtolower($matches[0]); |
|
123
|
|
|
|
|
124
|
|
|
switch ($tag) { |
|
125
|
|
|
case ($tag == '<strong>' || $tag == '<b>'): |
|
126
|
|
|
return '[b]'; |
|
127
|
|
|
|
|
128
|
|
|
case ($tag == '</strong>' || $tag == '</b>'): |
|
129
|
|
|
return '[/b]'; |
|
130
|
|
|
|
|
131
|
|
|
case ($tag == '<em>' || $tag == '<i>'): |
|
132
|
|
|
return '[i]'; |
|
133
|
|
|
|
|
134
|
|
|
case ($tag == '</em>' || $tag == '</i>'): |
|
135
|
|
|
return '[/i]'; |
|
136
|
|
|
|
|
137
|
|
|
case '<u>': |
|
138
|
|
|
return '[u]'; |
|
139
|
|
|
|
|
140
|
|
|
case '</u>': |
|
141
|
|
|
return '[/u]'; |
|
142
|
|
|
|
|
143
|
|
|
case ($tag == '<strike>' || $tag == '<del>'): |
|
144
|
|
|
return '[s]'; |
|
145
|
|
|
|
|
146
|
|
|
case ($tag == '</strike>' || $tag == '</del>'): |
|
147
|
|
|
return '[/s]'; |
|
148
|
|
|
|
|
149
|
|
|
case '<ul>': |
|
150
|
|
|
return '[list]'; |
|
151
|
|
|
|
|
152
|
|
|
case '</ul>': |
|
153
|
|
|
return '[/list]'; |
|
154
|
|
|
|
|
155
|
|
|
case '<ol>': |
|
156
|
|
|
return '[list=1]'; |
|
157
|
|
|
|
|
158
|
|
|
case '</ol>': |
|
159
|
|
|
return '[/list]'; |
|
160
|
|
|
|
|
161
|
|
|
case '<li>': |
|
162
|
|
|
return '[*]'; |
|
163
|
|
|
|
|
164
|
|
|
case '</li>': |
|
165
|
|
|
return ''; |
|
166
|
|
|
|
|
167
|
|
|
case '<center>': |
|
168
|
|
|
return '[center]'; |
|
169
|
|
|
|
|
170
|
|
|
case '</center>': |
|
171
|
|
|
return '[/center]'; |
|
172
|
|
|
|
|
173
|
|
|
case ($tag == '<br>' || $tag == '<br/>' || $tag == '<br />'): |
|
174
|
|
|
return PHP_EOL; |
|
175
|
|
|
|
|
176
|
|
|
default: |
|
177
|
|
|
return $tag; |
|
178
|
|
|
} |
|
179
|
|
|
}, |
|
180
|
|
|
|
|
181
|
|
|
$this->text |
|
182
|
|
|
); |
|
183
|
|
|
|
|
184
|
|
|
} |
|
185
|
|
|
|
|
186
|
|
|
|
|
187
|
|
|
/** |
|
188
|
|
|
* @brief Converts the provided HTML text into BBCode. |
|
189
|
|
|
*/ |
|
190
|
|
|
public function toBBCode() { |
|
191
|
|
|
// We don't want any HTML entities. |
|
192
|
|
|
$this->text = htmlspecialchars_decode($this->text); |
|
193
|
|
|
|
|
194
|
|
|
$this->removeSnippets(); |
|
195
|
|
|
$this->replaceLinks(); |
|
196
|
|
|
$this->replaceImages(); |
|
197
|
|
|
$this->replaceOtherTags(); |
|
198
|
|
|
$this->text = strip_tags($this->text); |
|
199
|
|
|
$this->restoreSnippets(); |
|
200
|
|
|
|
|
201
|
|
|
return $this->text; |
|
202
|
|
|
} |
|
203
|
|
|
|
|
204
|
|
|
} |
|
205
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.