1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace Sioen; |
4
|
|
|
|
5
|
|
|
use Sioen\Types\BlockquoteConverter; |
6
|
|
|
use Sioen\Types\HeadingConverter; |
7
|
|
|
use Sioen\Types\IframeConverter; |
8
|
|
|
use Sioen\Types\ImageConverter; |
9
|
|
|
use Sioen\Types\ListConverter; |
10
|
|
|
use Sioen\Types\ParagraphConverter; |
11
|
|
|
use Sioen\Types\BaseConverter; |
12
|
|
|
|
13
|
|
|
/** |
14
|
|
|
* Class HtmlToJson |
15
|
|
|
* |
16
|
|
|
* Converts html to a json object that can be understood by Sir Trevor |
17
|
|
|
* |
18
|
|
|
* @version 1.1.0 |
19
|
|
|
* @author Wouter Sioen <[email protected]> |
20
|
|
|
* @license http://www.opensource.org/licenses/mit-license.php MIT |
21
|
|
|
*/ |
22
|
|
|
class HtmlToJson |
23
|
|
|
{ |
24
|
|
|
/** |
25
|
|
|
* Converts html to the json Sir Trevor requires |
26
|
|
|
* |
27
|
|
|
* @param string $html |
28
|
|
|
* @return string The json string |
29
|
|
|
*/ |
30
|
|
|
public function toJson($html) |
31
|
|
|
{ |
32
|
|
|
// Strip white space between tags to prevent creation of empty #text nodes |
33
|
|
|
$html = preg_replace('~>\s+<~', '><', $html); |
34
|
|
|
$document = new \DOMDocument(); |
35
|
|
|
|
36
|
|
|
// Load UTF-8 HTML hack (from http://bit.ly/pVDyCt) |
37
|
|
|
$document->loadHTML('<?xml encoding="UTF-8">' . $html); |
38
|
|
|
$document->encoding = 'UTF-8'; |
39
|
|
|
|
40
|
|
|
// fetch the body of the document. All html is stored in there |
41
|
|
|
$body = $document->getElementsByTagName("body")->item(0); |
42
|
|
|
|
43
|
|
|
$data = array(); |
44
|
|
|
|
45
|
|
|
// loop trough the child nodes and convert them |
46
|
|
|
if ($body) { |
47
|
|
|
foreach ($body->childNodes as $node) { |
48
|
|
|
$data[] = $this->convert($node->nodeName, $node); |
49
|
|
|
} |
50
|
|
|
} |
51
|
|
|
|
52
|
|
|
return json_encode(array('data' => $data)); |
53
|
|
|
} |
54
|
|
|
|
55
|
|
|
private function convert($nodeName, \DOMElement $node) |
56
|
|
|
{ |
57
|
|
View Code Duplication |
switch ($nodeName) { |
|
|
|
|
58
|
|
|
case 'p': |
59
|
|
|
$converter = new ParagraphConverter(); |
60
|
|
|
break; |
61
|
|
|
case 'h2': |
62
|
|
|
$converter = new HeadingConverter(); |
63
|
|
|
break; |
64
|
|
|
case 'ul': |
65
|
|
|
$converter = new ListConverter(); |
66
|
|
|
break; |
67
|
|
|
case 'blockquote': |
68
|
|
|
$converter = new BlockquoteConverter(); |
69
|
|
|
break; |
70
|
|
|
case 'iframe': |
71
|
|
|
$converter = new IframeConverter(); |
72
|
|
|
break; |
73
|
|
|
case 'img': |
74
|
|
|
$converter = new ImageConverter(); |
75
|
|
|
break; |
76
|
|
|
default: |
77
|
|
|
$converter = new BaseConverter(); |
78
|
|
|
break; |
79
|
|
|
} |
80
|
|
|
|
81
|
|
|
return $converter->toJson($node); |
82
|
|
|
} |
83
|
|
|
} |
84
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.