1
|
|
|
<?php |
2
|
|
|
namespace vipnytt; |
3
|
|
|
|
4
|
|
|
use DOMDocument; |
5
|
|
|
use SimpleXMLElement; |
6
|
|
|
use vipnytt\OPMLParser\Exceptions; |
7
|
|
|
use vipnytt\OPMLParser\OPMLInterface; |
8
|
|
|
|
9
|
|
|
class OPMLParser implements OPMLInterface |
10
|
|
|
{ |
11
|
|
|
/** |
12
|
|
|
* XML content |
13
|
|
|
* @var string |
14
|
|
|
*/ |
15
|
|
|
protected $xml; |
16
|
|
|
|
17
|
|
|
/** |
18
|
|
|
* Array containing the parsed XML |
19
|
|
|
* @var array |
20
|
|
|
*/ |
21
|
|
|
protected $result = []; |
22
|
|
|
|
23
|
|
|
/** |
24
|
|
|
* Constructor |
25
|
|
|
* |
26
|
|
|
* @param string $xml is the string we want to parse |
27
|
|
|
* @throws Exceptions\ParseException |
28
|
|
|
*/ |
29
|
|
|
public function __construct($xml) |
30
|
|
|
{ |
31
|
|
|
$this->xml = $xml; |
32
|
|
|
$dom = new DOMDocument(); |
33
|
|
|
$dom->recover = true; |
34
|
|
|
$dom->strictErrorChecking = false; |
35
|
|
|
$dom->loadXML($this->xml, LIBXML_NOCDATA); |
36
|
|
|
$dom->encoding = self::ENCODING; |
37
|
|
|
|
38
|
|
|
$opml = simplexml_import_dom($dom); |
39
|
|
|
|
40
|
|
|
if ($opml === false) { |
41
|
|
|
throw new Exceptions\ParseException('Provided XML document is not valid'); |
42
|
|
|
} |
43
|
|
|
|
44
|
|
|
$this->result = [ |
45
|
|
|
'version' => (string)$opml['version'], |
46
|
|
|
'head' => [], |
47
|
|
|
'body' => [] |
48
|
|
|
]; |
49
|
|
|
|
50
|
|
|
if (!isset($opml->head)) { |
51
|
|
|
throw new Exceptions\ParseException('Provided XML is not an valid OPML document'); |
52
|
|
|
} |
53
|
|
|
// First, we get all "head" elements. Head is required but its sub-elements are optional. |
54
|
|
|
foreach ($opml->head->children() as $key => $value) { |
55
|
|
|
if (in_array($key, self::OPTIONAL_HEAD_ELEMENTS, true)) { |
56
|
|
|
$this->result['head'][$key] = (string)$value; |
57
|
|
|
} |
58
|
|
|
} |
59
|
|
|
if (!isset($opml->body)) { |
60
|
|
|
return; |
61
|
|
|
} |
62
|
|
|
// Then, we get body outlines. Body must contain at least one outline element. |
63
|
|
|
foreach ($opml->body->children() as $key => $value) { |
64
|
|
|
if ($key === 'outline') { |
65
|
|
|
$this->result['body'][] = $this->parseOutline($value); |
66
|
|
|
} |
67
|
|
|
} |
68
|
|
|
} |
69
|
|
|
|
70
|
|
|
/** |
71
|
|
|
* Parse an XML object as an outline object and return corresponding array |
72
|
|
|
* |
73
|
|
|
* @param SimpleXMLElement $outlineXML the XML object we want to parse |
74
|
|
|
* @return array corresponding to an outline and following format described above |
75
|
|
|
*/ |
76
|
|
|
protected function parseOutline(SimpleXMLElement $outlineXML) |
77
|
|
|
{ |
78
|
|
|
$outline = []; |
79
|
|
|
foreach ($outlineXML->attributes() as $key => $value) { |
80
|
|
|
$outline[$key] = (string)$value; |
81
|
|
|
} |
82
|
|
|
// Bug fix for OPMLs witch contains `title` but not the required `text` |
83
|
|
|
if (empty($outline['text']) && isset($outline['title'])) { |
84
|
|
|
$outline['text'] = $outline['title']; |
85
|
|
|
} |
86
|
|
|
foreach ($outlineXML->children() as $key => $value) { |
87
|
|
|
// An outline may contain any number of outline children |
88
|
|
|
if ($key === 'outline') { |
89
|
|
|
$outline['@outlines'][] = $this->parseOutline($value); |
90
|
|
|
} |
91
|
|
|
} |
92
|
|
|
return $outline; |
93
|
|
|
} |
94
|
|
|
|
95
|
|
|
/** |
96
|
|
|
* Return the parsed XML as an Array |
97
|
|
|
* |
98
|
|
|
* @return array |
99
|
|
|
*/ |
100
|
|
|
public function getResult() |
101
|
|
|
{ |
102
|
|
|
return $this->result; |
103
|
|
|
} |
104
|
|
|
|
105
|
|
|
/** |
106
|
|
|
* Validate the parsed XML array |
107
|
|
|
* Note: The parser support parsing of OPMLs with missing content |
108
|
|
|
* |
109
|
|
|
* @return \SimpleXMLElement|false Validated object on success, false on failure |
110
|
|
|
*/ |
111
|
|
|
public function validate() |
112
|
|
|
{ |
113
|
|
|
try { |
114
|
|
|
$render = new OPMLParser\Render($this->result); |
115
|
|
|
} catch (Exceptions\RenderException $e) { |
116
|
|
|
return false; |
117
|
|
|
} |
118
|
|
|
return $render->asXMLObject(); |
119
|
|
|
} |
120
|
|
|
} |
121
|
|
|
|