1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
/* |
4
|
|
|
* This file is part of the Spider package. |
5
|
|
|
* |
6
|
|
|
* (c) Matthijs van den Bos <[email protected]> |
7
|
|
|
* |
8
|
|
|
* For the full copyright and license information, please view the LICENSE |
9
|
|
|
* file that was distributed with this source code. |
10
|
|
|
*/ |
11
|
|
|
|
12
|
|
|
namespace VDB\Spider\Tests\Downloader; |
13
|
|
|
|
14
|
|
|
use VDB\Spider\Tests\TestCase; |
15
|
|
|
use VDB\Spider\PersistenceHandler\FileSerializedResourcePersistenceHandler; |
16
|
|
|
|
17
|
|
|
/** |
18
|
|
|
* |
19
|
|
|
*/ |
20
|
|
|
class FileSerializedResourcePersistenceHandlerTest extends TestCase |
21
|
|
|
{ |
22
|
|
|
/** |
23
|
|
|
* @var FileSerializedResourcePersistenceHandler |
24
|
|
|
*/ |
25
|
|
|
protected $handler; |
26
|
|
|
|
27
|
|
|
protected $persistenceRootPath; |
28
|
|
|
|
29
|
|
|
|
30
|
|
|
public function setUp(): void |
31
|
|
|
{ |
32
|
|
|
$this->persistenceRootPath = sys_get_temp_dir() . DIRECTORY_SEPARATOR . 'spider-UT' . DIRECTORY_SEPARATOR; |
33
|
|
|
exec('rm -rf ' . $this->persistenceRootPath); |
34
|
|
|
|
35
|
|
|
$this->handler = new FileSerializedResourcePersistenceHandler(sys_get_temp_dir()); |
36
|
|
|
$this->handler->setSpiderId('spider-UT'); |
37
|
|
|
} |
38
|
|
|
|
39
|
|
|
/** |
40
|
|
|
* @covers VDB\Spider\PersistenceHandler\FileSerializedResourcePersistenceHandler |
41
|
|
|
* @covers VDB\Spider\PersistenceHandler\FilePersistenceHandler |
42
|
|
|
* |
43
|
|
|
* @dataProvider persistenceProvider |
44
|
|
|
*/ |
45
|
|
|
public function testPersist($resource, $expectedFilePath, $expectedFileContents) |
46
|
|
|
{ |
47
|
|
|
$this->handler->persist($resource); |
48
|
|
|
|
49
|
|
|
$this->assertFileExists($expectedFilePath); |
50
|
|
|
|
51
|
|
|
$savedResource = unserialize(file_get_contents($expectedFilePath)); |
52
|
|
|
$this->assertEquals( |
53
|
|
|
$expectedFileContents, |
54
|
|
|
$savedResource->getResponse()->getBody() |
55
|
|
|
); |
56
|
|
|
} |
57
|
|
|
|
58
|
|
|
/** |
59
|
|
|
* @covers VDB\Spider\PersistenceHandler\FileSerializedResourcePersistenceHandler |
60
|
|
|
* @covers VDB\Spider\PersistenceHandler\FilePersistenceHandler |
61
|
|
|
* |
62
|
|
|
* @dataProvider persistenceWithoutFilenameProvider |
63
|
|
|
*/ |
64
|
|
|
public function testPersistResourcesWithoutFilename($resource, $expectedFilePath, $expectedFileContents) |
65
|
|
|
{ |
66
|
|
|
$this->handler->persist($resource); |
67
|
|
|
|
68
|
|
|
$this->assertFileExists($expectedFilePath); |
69
|
|
|
|
70
|
|
|
$savedResource = unserialize(file_get_contents($expectedFilePath)); |
71
|
|
|
$this->assertEquals( |
72
|
|
|
$expectedFileContents, |
73
|
|
|
$savedResource->getResponse()->getBody() |
74
|
|
|
); |
75
|
|
|
} |
76
|
|
|
|
77
|
|
|
public function persistenceWithoutFilenameProvider() |
78
|
|
|
{ |
79
|
|
|
// This must be set here instead of in setup methods, because providers |
80
|
|
|
// get executed first |
81
|
|
|
if (is_null($this->persistenceRootPath)) { |
82
|
|
|
$this->persistenceRootPath = sys_get_temp_dir() . DIRECTORY_SEPARATOR . 'spider-UT' . DIRECTORY_SEPARATOR; |
83
|
|
|
} |
84
|
|
|
|
85
|
|
|
$data = []; |
86
|
|
|
|
87
|
|
|
$data[] = $this->buildPersistenceProviderRecord( |
88
|
|
|
__DIR__ . '/../Fixtures/DownloaderTestHTMLResource.html', |
89
|
|
|
'http://example.org/domains/Internet/' |
90
|
|
|
); |
91
|
|
|
|
92
|
|
|
$data[] = $this->buildPersistenceProviderRecord( |
93
|
|
|
__DIR__ . '/../Fixtures/DownloaderTestHTMLResource.html', |
94
|
|
|
'http://example.org/domains/Internet/Abuse/' |
95
|
|
|
); |
96
|
|
|
|
97
|
|
|
return $data; |
98
|
|
|
} |
99
|
|
|
|
100
|
|
|
public function persistenceProvider() |
101
|
|
|
{ |
102
|
|
|
// This must be set here instead of in setup methods, because providers |
103
|
|
|
// get executed first |
104
|
|
|
if (is_null($this->persistenceRootPath)) { |
105
|
|
|
$this->persistenceRootPath = sys_get_temp_dir() . DIRECTORY_SEPARATOR . 'spider-UT' . DIRECTORY_SEPARATOR; |
106
|
|
|
} |
107
|
|
|
|
108
|
|
|
$data = []; |
109
|
|
|
|
110
|
|
|
$data[] = $this->buildPersistenceProviderRecord( |
111
|
|
|
__DIR__ . '/../Fixtures/DownloaderTestHTMLResource.html', |
112
|
|
|
'http://example.org/domains/special/test1.html' |
113
|
|
|
); |
114
|
|
|
|
115
|
|
|
$data[] = $this->buildPersistenceProviderRecord( |
116
|
|
|
__DIR__ . '/../Fixtures/DownloaderTestHTMLResource.html', |
117
|
|
|
'http://example.org/domains/special/test2.html' |
118
|
|
|
); |
119
|
|
|
|
120
|
|
|
$data[] = $this->buildPersistenceProviderRecord( |
121
|
|
|
__DIR__ . '/../Fixtures/DownloaderTestHTMLResource.html', |
122
|
|
|
'http://example.org/domains/special/subdir/test3.html' |
123
|
|
|
); |
124
|
|
|
|
125
|
|
|
return $data; |
126
|
|
|
} |
127
|
|
|
|
128
|
|
|
protected function buildPersistenceProviderRecord($fixturePath, $uriString) |
129
|
|
|
{ |
130
|
|
|
$resource = $this->buildResourceFromFixture( |
131
|
|
|
$fixturePath, |
132
|
|
|
$uriString |
133
|
|
|
); |
134
|
|
|
$expectedFileContents = $this->getFixtureContent(__DIR__ . '/../Fixtures/DownloaderTestHTMLResource.html'); |
135
|
|
|
$expectedFilePath = $this->buildExpectedFilePath($uriString); |
136
|
|
|
|
137
|
|
|
return [$resource, $expectedFilePath, $expectedFileContents]; |
138
|
|
|
} |
139
|
|
|
|
140
|
|
|
protected function buildExpectedFilePath($uriString) |
141
|
|
|
{ |
142
|
|
|
$expectedFilePath = $this->persistenceRootPath . parse_url($uriString)['host'] . parse_url($uriString)['path']; |
143
|
|
|
if (substr($expectedFilePath, -1, 1) === '/') { |
144
|
|
|
$expectedFilePath .= 'index.html'; |
145
|
|
|
} |
146
|
|
|
|
147
|
|
|
return $expectedFilePath; |
148
|
|
|
} |
149
|
|
|
} |
150
|
|
|
|