1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace Fillet\Parser; |
4
|
|
|
|
5
|
|
|
/** |
6
|
|
|
* Converts a Wordpress Export XML file into data that Fillet can parse into HTML for Sculpin |
7
|
|
|
* |
8
|
|
|
* @package Fillet\Parser |
9
|
|
|
*/ |
10
|
|
|
class WordpressExport implements ParserInterface |
11
|
|
|
{ |
12
|
|
|
/** |
13
|
|
|
* Parses a specific XML file |
14
|
|
|
* |
15
|
|
|
* @param string $inputFile File to parse |
16
|
|
|
* @return \Generator |
17
|
|
|
*/ |
18
|
|
|
public function parse($inputFile) |
19
|
|
|
{ |
20
|
|
|
$DCNamespace = 'http://purl.org/rss/1.0/modules/content/'; |
21
|
|
|
$WPNamespace = 'http://wordpress.org/export/1.2/'; |
22
|
|
|
$reader = new \XMLReader(); |
23
|
|
|
$dom = new \DOMDocument('1.0', 'UTF-8'); |
24
|
|
|
$reader->open($inputFile); |
25
|
|
|
|
26
|
|
|
while ($reader->read() && $reader->name !== 'item'); |
27
|
|
|
|
28
|
|
|
while($reader->name == 'item') { |
29
|
|
|
$xml = simplexml_import_dom($dom->importNode($reader->expand(), true)); |
30
|
|
|
$wpItems = $xml->children($WPNamespace); |
31
|
|
|
$content = $xml->children($DCNamespace)->encoded; |
32
|
|
|
|
33
|
|
|
$categories = []; |
34
|
|
|
$tags = []; |
35
|
|
|
|
36
|
|
|
foreach($xml->category as $category) { |
37
|
|
|
if('category' == $category->attributes()->domain) { |
38
|
|
|
$categories[] = (string)$category; |
39
|
|
|
} |
40
|
|
|
|
41
|
|
|
if('post_tag' == $category->attributes()->domain) { |
42
|
|
|
$tags[] = (string)$category; |
43
|
|
|
} |
44
|
|
|
} |
45
|
|
|
|
46
|
|
|
if($wpItems) { |
47
|
|
|
$post_type = (string)$wpItems->post_type; |
48
|
|
|
$data = [ |
49
|
|
|
'type' => $post_type, |
50
|
|
|
'post_date' => new \DateTime((string)$wpItems->post_date), |
51
|
|
|
'title' => (string)$xml->title, |
52
|
|
|
'content' => (string)$content, |
53
|
|
|
'tags' => $tags, |
54
|
|
|
'categories' => $categories, |
55
|
|
|
]; |
56
|
|
|
yield $data; |
57
|
|
|
} |
58
|
|
|
|
59
|
|
|
$reader->next('item'); |
60
|
|
|
} |
61
|
|
|
} |
62
|
|
|
} |