1 | <?php |
||
17 | class IncrementalXmlDumpReader implements Iterator { |
||
18 | |||
19 | /** |
||
20 | * @var resource |
||
21 | */ |
||
22 | private $fileStream; |
||
23 | |||
24 | /** |
||
25 | * @var Deserializer |
||
26 | */ |
||
27 | private $entityDeserializer; |
||
28 | |||
29 | /** |
||
30 | * @var EntityDocument|null |
||
31 | */ |
||
32 | private $currentEntity = null; |
||
33 | |||
34 | /** |
||
35 | * @var LoggerInterface |
||
36 | */ |
||
37 | private $logger; |
||
38 | |||
39 | /** |
||
40 | * @param string $fileName |
||
41 | * @param Deserializer $entityDeserializer |
||
42 | * @param LoggerInterface $logger |
||
43 | */ |
||
44 | 2 | public function __construct( $fileName, Deserializer $entityDeserializer, LoggerInterface $logger ) { |
|
45 | 2 | $this->fileStream = fopen( $fileName, 'r' ); |
|
46 | 2 | $this->entityDeserializer = $entityDeserializer; |
|
47 | 2 | $this->logger = $logger; |
|
48 | 2 | } |
|
49 | |||
50 | 2 | public function __destruct() { |
|
53 | |||
54 | /** |
||
55 | * @see Iterator::current |
||
56 | */ |
||
57 | 2 | public function current() { |
|
60 | |||
61 | /** |
||
62 | * @see Iterator::next |
||
63 | */ |
||
64 | 2 | public function next() { |
|
65 | 2 | $this->currentEntity = null; |
|
66 | |||
67 | 2 | $isEntity = false; |
|
68 | 2 | $text = ''; |
|
69 | 2 | while( true ) { |
|
70 | 2 | $line = fgets( $this->fileStream ); |
|
71 | |||
72 | 2 | if( $line === false ) { |
|
73 | 2 | return; |
|
74 | } |
||
75 | |||
76 | 2 | if( preg_match( '/<model>wikibase-[a-z]+<\/model>/', $line ) ) { |
|
77 | 2 | $isEntity = true; |
|
78 | 2 | } elseif( preg_match( '/<text.*>(.*)<\/text>/', $line, $m ) ) { |
|
79 | 2 | $text = $m[1]; |
|
80 | 2 | } elseif( preg_match( '/<\/page>/', $line, $m ) && $isEntity ) { |
|
81 | |||
82 | 2 | $json = json_decode( html_entity_decode( $text ), true ); |
|
83 | 2 | if( $json === null ) { |
|
84 | 2 | $this->logger->error( 'The serialization is not a valid JSON document.' ); |
|
85 | 2 | continue; |
|
86 | } |
||
87 | |||
88 | 2 | if( array_key_exists( 'redirect', $json ) ) { |
|
89 | 2 | $this->logger->info( 'Entity redirection not supported.' ); |
|
90 | 2 | continue; |
|
91 | } |
||
92 | |||
93 | try { |
||
94 | 2 | $this->currentEntity = $this->entityDeserializer->deserialize( $json ); |
|
95 | 2 | return; |
|
96 | } catch( DeserializationException $e ) { |
||
97 | $id = array_key_exists( 'id', $json ) ? $json['id'] : ''; |
||
98 | $this->logger->error( 'Deserialization of entity ' . $id . ' failed: ' . $e->getMessage() ); |
||
99 | } |
||
100 | } |
||
101 | 2 | } |
|
102 | } |
||
103 | |||
104 | /** |
||
105 | * @see Iterator::key |
||
106 | */ |
||
107 | public function key() { |
||
108 | if ( $this->currentEntity === null ) { |
||
109 | return null; |
||
110 | } |
||
111 | |||
112 | return $this->currentEntity->getId()->getSerialization(); |
||
113 | } |
||
114 | |||
115 | /** |
||
116 | * @see Iterator::valid |
||
117 | */ |
||
118 | 2 | public function valid() { |
|
121 | |||
122 | /** |
||
123 | * @see Iterator::rewind |
||
124 | */ |
||
125 | 2 | public function rewind() { |
|
129 | } |
||
130 |