1 | <?php |
||
33 | class EntityDumpIteratorTest extends TestCase { |
||
34 | |||
35 | private function newIteratorForFile( $filePath, callable $onError = null ) { |
||
36 | return ( new JsonDumpFactory() )->newEntityDumpIterator( |
||
37 | new ExtractedDumpReader( $filePath ), |
||
38 | $this->newCurrentEntityDeserializer(), |
||
39 | $onError |
||
40 | ); |
||
41 | } |
||
42 | |||
43 | private function newCurrentEntityDeserializer() { |
||
44 | $factory = new DeserializerFactory( |
||
45 | $this->newDataValueDeserializer(), |
||
46 | new BasicEntityIdParser() |
||
47 | ); |
||
48 | |||
49 | return $factory->newEntityDeserializer(); |
||
50 | } |
||
51 | |||
52 | private function newDataValueDeserializer() { |
||
53 | $dataValueClasses = [ |
||
54 | 'boolean' => BooleanValue::class, |
||
55 | 'number' => NumberValue::class,SeekableDumpReader |
||
56 | 'string' => StringValue::class, |
||
|
|||
57 | 'unknown' => UnknownValue::class, |
||
58 | 'globecoordinate' => GlobeCoordinateValue::class, |
||
59 | 'monolingualtext' => MonolingualTextValue::class, |
||
60 | 'multilingualtext' => MultilingualTextValue::class, |
||
61 | 'quantity' => QuantityValue::class, |
||
62 | 'time' => TimeValue::class, |
||
63 | 'wikibase-entityid' => EntityIdValue::class, |
||
64 | ]; |
||
65 | |||
66 | return new DataValueDeserializer( $dataValueClasses ); |
||
67 | } |
||
68 | |||
69 | private function assertFindsEntities( array $expectedIds, Iterator $dumpIterator, $message = '' ) { |
||
70 | $actualIds = []; |
||
71 | |||
72 | foreach ( $dumpIterator as $entity ) { |
||
73 | $actualIds[] = $entity->getId()->getSerialization(); |
||
74 | } |
||
75 | |||
76 | $this->assertEquals( $expectedIds, $actualIds, $message ); |
||
77 | } |
||
78 | |||
79 | public function testGivenFileWithNoEntities_noEntitiesAreReturned() { |
||
80 | $iterator = $this->newIteratorForFile( ( new \JsonDumpData() )->getEmptyDumpPath() ); |
||
81 | |||
82 | $this->assertFindsEntities( [], $iterator ); |
||
83 | } |
||
84 | |||
85 | public function testGivenFileWithOneEntity_oneEntityIsFound() { |
||
86 | $iterator = $this->newIteratorForFile( ( new \JsonDumpData() )->getOneItemDumpPath() ); |
||
87 | |||
88 | $this->assertFindsEntities( [ 'Q1' ], $iterator ); |
||
89 | } |
||
90 | |||
91 | public function testGivenFileWithFiveEntities_fiveEntityAreFound() { |
||
92 | $iterator = $this->newIteratorForFile( ( new \JsonDumpData() )->getFiveEntitiesDumpPath() ); |
||
93 | |||
94 | $this->assertFindsEntities( [ 'Q1', 'Q8', 'P16', 'P19', 'P22' ], $iterator ); |
||
95 | } |
||
96 | |||
97 | public function testGivenFileWithInvalidEntity_noEntityIsFound() { |
||
98 | $iterator = $this->newIteratorForFile( __DIR__ . '/../data/invalid-item.json' ); |
||
99 | $this->assertFindsEntities( [], $iterator ); |
||
100 | } |
||
101 | |||
102 | public function testGivenFileWithInvalidEntities_validEntitiesAreFound() { |
||
103 | $iterator = $this->newIteratorForFile( __DIR__ . '/../data/3valid-2invalid.json' ); |
||
104 | $this->assertFindsEntities( [ 'Q1', 'P16', 'P22' ], $iterator ); |
||
105 | } |
||
106 | |||
107 | public function testCanDoMultipleIterations() { |
||
108 | $iterator = $this->newIteratorForFile( ( new \JsonDumpData() )->getFiveEntitiesDumpPath() ); |
||
109 | |||
110 | $this->assertFindsEntities( [ 'Q1', 'Q8', 'P16', 'P19', 'P22' ], $iterator, 'first iteration' ); |
||
111 | $this->assertFindsEntities( [ 'Q1', 'Q8', 'P16', 'P19', 'P22' ], $iterator, 'second iteration' ); |
||
112 | } |
||
113 | |||
114 | public function testInitialPosition() { |
||
115 | $reader = new ExtractedDumpReader( ( new \JsonDumpData() )->getFiveEntitiesDumpPath() ); |
||
116 | |||
117 | $iterator = new EntityDumpIterator( |
||
118 | ( new JsonDumpFactory() )->newObjectDumpIterator( $reader ), |
||
119 | $this->newCurrentEntityDeserializer() |
||
120 | ); |
||
121 | |||
122 | $iterator->rewind(); |
||
123 | $this->assertSame( 'Q1', $iterator->current()->getId()->getSerialization() ); |
||
124 | |||
125 | $iterator->next(); |
||
126 | $this->assertSame( 'Q8', $iterator->current()->getId()->getSerialization() ); |
||
127 | |||
128 | $newReader = new ExtractedDumpReader( |
||
129 | ( new \JsonDumpData() )->getFiveEntitiesDumpPath(), |
||
130 | $reader->getPosition() |
||
131 | ); |
||
132 | |||
133 | $newIterator = new EntityDumpIterator( |
||
134 | ( new JsonDumpFactory() )->newObjectDumpIterator( $newReader ), |
||
135 | $this->newCurrentEntityDeserializer() |
||
136 | ); |
||
137 | |||
138 | $this->assertFindsEntities( [ 'P16', 'P19', 'P22' ], $newIterator ); |
||
139 | } |
||
140 | |||
141 | public function testGivenFileWithInvalidEntities_errorsAreReported() { |
||
142 | $errors = []; |
||
143 | |||
144 | $iterator = $this->newIteratorForFile( |
||
145 | __DIR__ . '/../data/3valid-2invalid.json', |
||
146 | function( $errorMessage ) use ( &$errors ) { |
||
147 | $errors[] = $errorMessage; |
||
148 | } |
||
149 | ); |
||
150 | |||
151 | $iterator->rewind(); |
||
152 | while ( $iterator->valid() ) { |
||
153 | $iterator->next(); |
||
154 | } |
||
155 | |||
156 | $this->assertContainsOnly( 'string', $errors ); |
||
157 | $this->assertCount( 2, $errors ); |
||
158 | } |
||
159 | |||
160 | public function testGivenFileWithInvalidJsonLine_errorIsRecorded() { |
||
161 | $errors = []; |
||
162 | |||
163 | $iterator = $this->newIteratorForFile( |
||
164 | __DIR__ . '/../data/invalid-json.json', |
||
165 | function( $errorMessage ) use ( &$errors ) { |
||
166 | $errors[] = $errorMessage; |
||
167 | } |
||
168 | ); |
||
169 | |||
170 | iterator_to_array( $iterator ); |
||
171 | |||
172 | $this->assertContainsOnly( 'string', $errors ); |
||
173 | $this->assertCount( 1, $errors ); |
||
174 | } |
||
175 | |||
176 | } |