Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
1 | <?php |
||
17 | class HtmlConverter |
||
18 | { |
||
19 | /** |
||
20 | * @var Environment |
||
21 | */ |
||
22 | protected $environment; |
||
23 | |||
24 | /** |
||
25 | * @var array |
||
26 | */ |
||
27 | protected $whiteTags = array(); |
||
28 | |||
29 | /** |
||
30 | * @var string |
||
31 | */ |
||
32 | protected $wildCard = ''; |
||
33 | |||
34 | /** |
||
35 | * Constructor |
||
36 | * |
||
37 | * @param array $options Configuration options |
||
38 | */ |
||
39 | 78 | public function __construct(array $options = array()) |
|
40 | { |
||
41 | $defaults = array( |
||
42 | 78 | 'header_style' => 'setext', // Set to 'atx' to output H1 and H2 headers as # Header1 and ## Header2 |
|
43 | 78 | 'suppress_errors' => true, // Set to false to show warnings when loading malformed HTML |
|
44 | 78 | 'strip_tags' => false, // Set to true to strip tags that don't have markdown equivalents. N.B. Strips tags, not their content. Useful to clean MS Word HTML output. |
|
45 | 78 | 'bold_style' => '**', // Set to '__' if you prefer the underlined style |
|
46 | 78 | 'italic_style' => '_', // Set to '*' if you prefer the asterisk style |
|
47 | 78 | 'remove_nodes' => '', // space-separated list of dom nodes that should be removed. example: 'meta style script' |
|
48 | 78 | 'white_tags' => array(), // Array with allowed html tags |
|
49 | 78 | 'white_tag_wildcard' => '|', // Use a non common character |
|
50 | 78 | ); |
|
51 | |||
52 | 78 | $this->environment = Environment::createDefaultEnvironment($defaults); |
|
53 | |||
54 | 78 | $this->environment->getConfig()->merge($options); |
|
55 | 78 | } |
|
56 | |||
57 | /** |
||
58 | * @return Environment |
||
59 | */ |
||
60 | 3 | public function getEnvironment() |
|
61 | { |
||
62 | 3 | return $this->environment; |
|
63 | } |
||
64 | |||
65 | /** |
||
66 | * @return Configuration |
||
67 | */ |
||
68 | 72 | public function getConfig() |
|
69 | { |
||
70 | 72 | return $this->environment->getConfig(); |
|
71 | } |
||
72 | |||
73 | /** |
||
74 | * Convert |
||
75 | * |
||
76 | * @see HtmlConverter::convert |
||
77 | * |
||
78 | * @param string $html |
||
79 | * |
||
80 | * @return string The Markdown version of the html |
||
81 | */ |
||
82 | 3 | public function __invoke($html) |
|
83 | { |
||
84 | 3 | return $this->convert($html); |
|
85 | 3 | } |
|
86 | |||
87 | /** |
||
88 | * Convert |
||
89 | * |
||
90 | * Loads HTML and passes to getMarkdown() |
||
91 | * |
||
92 | * @param $html |
||
93 | * |
||
94 | * @return string The Markdown version of the html |
||
95 | */ |
||
96 | 75 | public function convert($html) |
|
97 | 3 | { |
|
98 | 75 | if (trim($html) === '') { |
|
99 | 3 | return ''; |
|
100 | } |
||
101 | |||
102 | 72 | $this->setWhiteTagVariables(); |
|
103 | |||
104 | 72 | $html = $this->escapeWhiteTags($html); |
|
105 | |||
106 | 72 | $document = $this->createDOMDocument($html); |
|
107 | |||
108 | // Work on the entire DOM tree (including head and body) |
||
109 | 72 | if (!($root = $document->getElementsByTagName('html')->item(0))) { |
|
110 | throw new \InvalidArgumentException('Invalid HTML was provided'); |
||
111 | } |
||
112 | |||
113 | 72 | $rootElement = new Element($root); |
|
114 | 72 | $this->convertChildren($rootElement); |
|
115 | |||
116 | // Store the now-modified DOMDocument as a string |
||
117 | 72 | $markdown = $document->saveHTML(); |
|
118 | |||
119 | 72 | $markdown = $this->sanitize($markdown); |
|
120 | |||
121 | 72 | $markdown = $this->removeEscapedWhiteTags($markdown); |
|
122 | |||
123 | 72 | return $markdown; |
|
124 | } |
||
125 | |||
126 | /** |
||
127 | * Set the values for use after |
||
128 | */ |
||
129 | 72 | protected function setWhiteTagVariables() |
|
130 | { |
||
131 | 72 | $this->whiteTags = $this->getConfig()->getOption('white_tags'); |
|
|
|||
132 | 72 | $this->wildCard = $this->getConfig()->getOption('white_tag_wildcard'); |
|
133 | 72 | } |
|
134 | |||
135 | /** |
||
136 | * Add each "whiteTag" into <code> tags and add the "wildCard" before and after the "<code>" tag |
||
137 | * for avoid convert into markdown and indentify them later |
||
138 | * |
||
139 | * @param string $html |
||
140 | * |
||
141 | * @return string |
||
142 | */ |
||
143 | 72 | View Code Duplication | protected function escapeWhiteTags($html) |
144 | { |
||
145 | 72 | if (count($this->whiteTags) > 0) { |
|
146 | 3 | foreach ($this->whiteTags as $whiteTag) { |
|
147 | //Search and replace the "<openTag" for "wildCard<code><openTag" |
||
148 | 3 | $openTag = $this->getOpenTag($whiteTag); |
|
149 | 3 | $replaceTag = sprintf('%s<code>%s', $this->wildCard, $openTag); |
|
150 | 3 | $html = str_replace($openTag, $replaceTag, $html); |
|
151 | |||
152 | //Search and replace the "closeTag>" for "closeTag></code>wildCard" |
||
153 | 3 | $closeTag = $this->getCloseTag($whiteTag); |
|
154 | 3 | $replaceTag = sprintf('%s</code>%s', $closeTag, $this->wildCard); |
|
155 | 3 | $html = str_replace($closeTag, $replaceTag, $html); |
|
156 | 3 | } |
|
157 | 3 | } |
|
158 | |||
159 | 72 | return $html; |
|
160 | } |
||
161 | |||
162 | /** |
||
163 | * @param string $tag |
||
164 | * |
||
165 | * @return string |
||
166 | */ |
||
167 | 3 | protected function getOpenTag($tag) |
|
168 | { |
||
169 | 3 | return sprintf('<%s', $tag); |
|
170 | } |
||
171 | |||
172 | /** |
||
173 | * @param string $tag |
||
174 | * |
||
175 | * @return string |
||
176 | */ |
||
177 | 3 | protected function getCloseTag($tag) |
|
181 | |||
182 | /** |
||
183 | * @param string $html |
||
184 | * |
||
185 | * @return \DOMDocument |
||
186 | */ |
||
187 | 72 | private function createDOMDocument($html) |
|
188 | { |
||
189 | 72 | $document = new \DOMDocument(); |
|
206 | |||
207 | /** |
||
208 | * Convert Children |
||
209 | * |
||
210 | * Recursive function to drill into the DOM and convert each node into Markdown from the inside out. |
||
211 | * |
||
212 | * Finds children of each node and convert those to #text nodes containing their Markdown equivalent, |
||
213 | * starting with the innermost element and working up to the outermost element. |
||
214 | * |
||
215 | * @param ElementInterface $element |
||
216 | */ |
||
217 | 72 | private function convertChildren(ElementInterface $element) |
|
239 | |||
240 | /** |
||
241 | * Convert to Markdown |
||
242 | * |
||
243 | * Converts an individual node into a #text node containing a string of its Markdown equivalent. |
||
244 | * |
||
245 | * Example: An <h3> node with text content of 'Title' becomes a text node with content of '### Title' |
||
246 | * |
||
247 | * @param ElementInterface $element |
||
248 | * |
||
249 | * @return string The converted HTML as Markdown |
||
250 | */ |
||
251 | 72 | protected function convertToMarkdown(ElementInterface $element) |
|
265 | |||
266 | /** |
||
267 | * @param string $markdown |
||
268 | * |
||
269 | * @return string |
||
270 | */ |
||
271 | 72 | protected function sanitize($markdown) |
|
281 | |||
282 | /** |
||
283 | * Remove the previously added <code> for the "whiteTags" marked by the "wildCard" |
||
284 | * to return the "html" as the user typed |
||
285 | * |
||
286 | * @param string $markdown |
||
287 | * |
||
288 | * @return string |
||
289 | */ |
||
290 | 72 | View Code Duplication | protected function removeEscapedWhiteTags($markdown) |
308 | } |
||
309 |
Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.
Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..