1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace TreeHouse\IoBundle\Test\Scrape\Parser; |
4
|
|
|
|
5
|
|
|
use Symfony\Component\Finder\Finder; |
6
|
|
|
use TreeHouse\IoBundle\Entity\Scraper; |
7
|
|
|
use TreeHouse\IoBundle\Scrape\Parser\ParserBuilder; |
8
|
|
|
use TreeHouse\IoBundle\Scrape\Parser\ParserInterface; |
9
|
|
|
use TreeHouse\IoBundle\Scrape\ScrapedItemBag; |
10
|
|
|
use TreeHouse\IoBundle\Test\Item\ItemFixture; |
11
|
|
|
use TreeHouse\IoBundle\Test\TestCase; |
12
|
|
|
|
13
|
|
|
abstract class ParserTypeTestCase extends TestCase |
14
|
|
|
{ |
15
|
|
|
/** |
16
|
|
|
* @var string |
17
|
|
|
*/ |
18
|
|
|
protected static $url = 'http://example.org'; |
19
|
|
|
|
20
|
|
|
/** |
21
|
|
|
* @return string |
22
|
|
|
*/ |
23
|
|
|
abstract protected function getParserType(); |
24
|
|
|
|
25
|
|
|
/** |
26
|
|
|
* @param Scraper $scraper |
27
|
|
|
* |
28
|
|
|
* @return ParserInterface |
29
|
|
|
*/ |
30
|
|
View Code Duplication |
protected function getParser(Scraper $scraper) |
|
|
|
|
31
|
|
|
{ |
32
|
|
|
$parserType = $this->get('tree_house.io.scrape.scraper_factory')->getParserType($scraper->getParser()); |
33
|
|
|
$options = array_merge(['scraper' => $scraper], $scraper->getParserOptions()); |
34
|
|
|
|
35
|
|
|
return (new ParserBuilder())->build($parserType, $options); |
36
|
|
|
} |
37
|
|
|
|
38
|
|
|
/** |
39
|
|
|
* @param string $parser |
40
|
|
|
* |
41
|
|
|
* @return Scraper |
42
|
|
|
*/ |
43
|
|
|
protected function getScraperEntity($parser) |
44
|
|
|
{ |
45
|
|
|
return $this |
46
|
|
|
->getEntityManager() |
47
|
|
|
->createQuery('SELECT s, o FROM TreeHouseIoBundle:Scraper s JOIN s.origin o WHERE s.parser = :parser') |
48
|
|
|
->setParameter('parser', $parser) |
49
|
|
|
->setMaxResults(1) |
50
|
|
|
->getOneOrNullResult() |
51
|
|
|
; |
52
|
|
|
} |
53
|
|
|
|
54
|
|
|
/** |
55
|
|
|
* @return string[] |
56
|
|
|
*/ |
57
|
|
View Code Duplication |
public function getFixtureNames() |
|
|
|
|
58
|
|
|
{ |
59
|
|
|
$fixtures = []; |
60
|
|
|
|
61
|
|
|
$refl = new \ReflectionClass(get_class($this)); |
62
|
|
|
$dir = sprintf('%s/fixtures/%s', dirname($refl->getFilename()), $this->getParserType()); |
63
|
|
|
|
64
|
|
|
$files = Finder::create()->files()->name('*.html')->in($dir); |
65
|
|
|
|
66
|
|
|
/** @var \SplFileInfo $file */ |
67
|
|
|
foreach ($files as $file) { |
68
|
|
|
$fixtures[] = [$file->getBasename('.html')]; |
69
|
|
|
} |
70
|
|
|
|
71
|
|
|
if (empty($fixtures)) { |
72
|
|
|
$this->markTestSkipped(sprintf('No fixtures for %s created', $this->getParserType())); |
73
|
|
|
} |
74
|
|
|
|
75
|
|
|
return $fixtures; |
76
|
|
|
} |
77
|
|
|
|
78
|
|
|
/** |
79
|
|
|
* @dataProvider getFixtureNames |
80
|
|
|
* |
81
|
|
|
* @param string $fixtureName |
82
|
|
|
*/ |
83
|
|
|
public function testFixtures($fixtureName) |
84
|
|
|
{ |
85
|
|
|
$this->assertFixture($this->getItemFixture($this->getParserType(), $fixtureName)); |
86
|
|
|
} |
87
|
|
|
/** |
88
|
|
|
* @param ItemFixture $fixture |
89
|
|
|
*/ |
90
|
|
|
protected function assertOriginalId(ItemFixture $fixture) |
91
|
|
|
{ |
92
|
|
|
$this->assertEquals( |
93
|
|
|
$fixture->getExpectedItem()->getOriginalId(), |
94
|
|
|
$fixture->getActualItem()->getOriginalId() |
95
|
|
|
); |
96
|
|
|
} |
97
|
|
|
/** |
98
|
|
|
* @param ItemFixture $fixture |
99
|
|
|
*/ |
100
|
|
|
protected function assertOriginalUrl(ItemFixture $fixture) |
101
|
|
|
{ |
102
|
|
|
$this->assertEquals( |
103
|
|
|
$fixture->getExpectedItem()->getOriginalUrl(), |
104
|
|
|
$fixture->getActualItem()->getOriginalUrl() |
105
|
|
|
); |
106
|
|
|
} |
107
|
|
|
/** |
108
|
|
|
* @param ItemFixture $fixture |
109
|
|
|
*/ |
110
|
|
View Code Duplication |
protected function assertFixture(ItemFixture $fixture) |
|
|
|
|
111
|
|
|
{ |
112
|
|
|
$this->assertOriginalId($fixture); |
113
|
|
|
$this->assertOriginalUrl($fixture); |
114
|
|
|
$expected = $fixture->getExpectedItem()->all(); |
115
|
|
|
$actual = $fixture->getActualItem()->all(); |
116
|
|
|
foreach ($expected as $key => $expectedValue) { |
117
|
|
|
$this->assertArrayHasKey( |
118
|
|
|
$key, |
119
|
|
|
$actual, |
120
|
|
|
sprintf('Key "%s" is not in item, when it should be', $key) |
121
|
|
|
); |
122
|
|
|
$actualValue = $actual[$key]; |
123
|
|
|
$this->normalizeValues($key, $expectedValue, $actualValue); |
124
|
|
|
$this->assertValue($key, $expectedValue, $actualValue); |
125
|
|
|
unset($actual[$key]); |
126
|
|
|
} |
127
|
|
|
if (!empty($actual)) { |
128
|
|
|
$this->fail( |
129
|
|
|
sprintf('The following keys in the modified item are not tested: %s', json_encode(array_keys($actual))) |
130
|
|
|
); |
131
|
|
|
} |
132
|
|
|
} |
133
|
|
|
|
134
|
|
|
/** |
135
|
|
|
* Asserts a value. |
136
|
|
|
* |
137
|
|
|
* @param $key |
138
|
|
|
* @param $expectedValue |
139
|
|
|
* @param $actualValue |
140
|
|
|
*/ |
141
|
|
View Code Duplication |
protected function assertValue($key, $expectedValue, $actualValue) |
|
|
|
|
142
|
|
|
{ |
143
|
|
|
// if either actual or expected is an object, use equality assertion (==), |
144
|
|
|
// otherwise use identity assertion (===) |
145
|
|
|
$isObject = false; |
146
|
|
|
foreach ([$expectedValue, $actualValue] as $test) { |
147
|
|
|
$isObject = is_object($test) || (is_array($test) && isset($test[0]) && is_object($test[0])); |
148
|
|
|
if ($isObject) { |
149
|
|
|
break; |
150
|
|
|
} |
151
|
|
|
} |
152
|
|
|
|
153
|
|
|
$assert = $isObject ? 'assertEquals' : 'assertSame'; |
154
|
|
|
$this->$assert($expectedValue, $actualValue, sprintf('Key "%s" is not modified properly', $key)); |
155
|
|
|
} |
156
|
|
|
|
157
|
|
|
/** |
158
|
|
|
* Normalizes values before asserting them. |
159
|
|
|
* |
160
|
|
|
* @param string $key |
161
|
|
|
* @param mixed $expectedValue |
162
|
|
|
* @param mixed $actualValue |
163
|
|
|
*/ |
164
|
|
View Code Duplication |
protected function normalizeValues($key, &$expectedValue, &$actualValue) |
|
|
|
|
165
|
|
|
{ |
166
|
|
|
// some integers are modified to doubles, this is ok though |
167
|
|
|
if (is_integer($expectedValue) && is_double($actualValue)) { |
168
|
|
|
$expectedValue = (double) $expectedValue; |
169
|
|
|
} |
170
|
|
|
|
171
|
|
|
// the order of non-associative arrays does not matter |
172
|
|
|
if (is_array($expectedValue) && is_numeric(key($expectedValue)) && is_array($actualValue)) { |
173
|
|
|
sort($expectedValue); |
174
|
|
|
sort($actualValue); |
175
|
|
|
} |
176
|
|
|
|
177
|
|
|
// only test the day of dates |
178
|
|
|
foreach (['expectedValue', 'actualValue'] as $var) { |
179
|
|
|
if (is_string($$var) && preg_match('/^(\d{4}\-\d{2}\-\d{2})T[0-9\:\+]+$/', $$var, $matches)) { |
180
|
|
|
$$var = $matches[1]; |
181
|
|
|
} |
182
|
|
|
} |
183
|
|
|
} |
184
|
|
|
|
185
|
|
|
/** |
186
|
|
|
* @param string $parserType |
187
|
|
|
* @param string $fixtureName |
188
|
|
|
* |
189
|
|
|
* @return ItemFixture |
190
|
|
|
*/ |
191
|
|
View Code Duplication |
protected function getItemFixture($parserType, $fixtureName) |
|
|
|
|
192
|
|
|
{ |
193
|
|
|
if (null === $scraper = $this->getScraperEntity($parserType)) { |
194
|
|
|
$this->markTestSkipped(sprintf('Add an origin with a %s scraper to the database first', $parserType)); |
195
|
|
|
} |
196
|
|
|
|
197
|
|
|
$actual = $this->getActualItemFixture($scraper, $parserType, $fixtureName); |
198
|
|
|
$expected = $this->getExpectedItemFixture($scraper, $parserType, $fixtureName); |
199
|
|
|
|
200
|
|
|
return new ItemFixture($actual, $expected); |
201
|
|
|
} |
202
|
|
|
|
203
|
|
|
/** |
204
|
|
|
* @param Scraper $scraper |
205
|
|
|
* @param string $parserType |
206
|
|
|
* @param string $fixtureName |
207
|
|
|
* |
208
|
|
|
* @return ScrapedItemBag |
209
|
|
|
*/ |
210
|
|
|
protected function getActualItemFixture(Scraper $scraper, $parserType, $fixtureName) |
211
|
|
|
{ |
212
|
|
|
$refl = new \ReflectionClass(get_class($this)); |
213
|
|
|
$html = file_get_contents(sprintf('%s/fixtures/%s/%s.html', dirname($refl->getFilename()), $parserType, $fixtureName)); |
214
|
|
|
|
215
|
|
|
$item = new ScrapedItemBag($scraper, static::$url, $html); |
216
|
|
|
|
217
|
|
|
$parser = $this->getParser($scraper); |
218
|
|
|
$parser->parse($item); |
219
|
|
|
|
220
|
|
|
return $item; |
221
|
|
|
} |
222
|
|
|
|
223
|
|
|
/** |
224
|
|
|
* @param Scraper $scraper |
225
|
|
|
* @param string $parserType |
226
|
|
|
* @param string $fixtureName |
227
|
|
|
* |
228
|
|
|
* @return ScrapedItemBag |
229
|
|
|
*/ |
230
|
|
|
protected function getExpectedItemFixture(Scraper $scraper, $parserType, $fixtureName) |
231
|
|
|
{ |
232
|
|
|
$refl = new \ReflectionClass(get_class($this)); |
233
|
|
|
$phpFile = sprintf('%s/fixtures/%s/%s.php', dirname($refl->getFilename()), $parserType, $fixtureName); |
234
|
|
|
|
235
|
|
|
/** @var array $expected */ |
236
|
|
|
$expected = include $phpFile; |
237
|
|
|
|
238
|
|
|
$item = new ScrapedItemBag($scraper, static::$url, ''); |
239
|
|
|
$item->add($expected['item']); |
240
|
|
|
|
241
|
|
|
if (isset($expected['id'])) { |
242
|
|
|
$item->setOriginalId($expected['id']); |
243
|
|
|
} |
244
|
|
|
|
245
|
|
|
if (isset($expected['url'])) { |
246
|
|
|
$item->setOriginalUrl($expected['url']); |
247
|
|
|
} |
248
|
|
|
|
249
|
|
|
if (isset($expected['date'])) { |
250
|
|
|
$item->setDatetimeModified($expected['date']); |
251
|
|
|
} |
252
|
|
|
|
253
|
|
|
return $item; |
254
|
|
|
} |
255
|
|
|
} |
256
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.