|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
namespace whm\Smoke\Extensions\SmokeResponseRetriever\Retriever\ListRetriever; |
|
4
|
|
|
|
|
5
|
|
|
use Ivory\HttpAdapter\HttpAdapterInterface; |
|
6
|
|
|
use Ivory\HttpAdapter\MultiHttpAdapterException; |
|
7
|
|
|
use Psr\Http\Message\UriInterface; |
|
8
|
|
|
use whm\Crawler\Http\RequestFactory; |
|
9
|
|
|
use whm\Html\Uri; |
|
10
|
|
|
use whm\Smoke\Extensions\SmokeResponseRetriever\Retriever\Retriever as SmokeRetriever; |
|
11
|
|
|
use whm\Smoke\Scanner\SessionContainer; |
|
12
|
|
|
|
|
13
|
|
|
class Retriever implements SmokeRetriever |
|
14
|
|
|
{ |
|
15
|
|
|
private $urls = []; |
|
16
|
|
|
|
|
17
|
|
|
/** |
|
18
|
|
|
* @var HttpAdapterInterface |
|
19
|
|
|
*/ |
|
20
|
|
|
private $httpClient; |
|
21
|
|
|
private $urlStack = []; |
|
22
|
|
|
|
|
23
|
|
|
private $redirects = array(); |
|
24
|
|
|
|
|
25
|
|
|
/** |
|
26
|
|
|
* @var SessionContainer |
|
27
|
|
|
*/ |
|
28
|
|
|
private $sessionContainer; |
|
29
|
|
|
|
|
30
|
|
|
public function init($urls) |
|
31
|
|
|
{ |
|
32
|
|
|
if (is_array($urls)) { |
|
33
|
|
|
foreach ($urls as $key => $urlList) { |
|
34
|
|
|
foreach ($urlList as $url) { |
|
35
|
|
|
if (is_array($url)) { |
|
36
|
|
|
$uri = new Uri($url['url']); |
|
37
|
|
|
if (array_key_exists('cookies', $url)) { |
|
38
|
|
|
foreach ($url['cookies'] as $cookie) { |
|
39
|
|
|
foreach ($cookie as $key => $value) { |
|
40
|
|
|
$uri->addCookie($key, $value); |
|
41
|
|
|
} |
|
42
|
|
|
} |
|
43
|
|
|
} |
|
44
|
|
|
if (array_key_exists('session', $url)) { |
|
45
|
|
|
$sessionName = $url['session']; |
|
46
|
|
|
$uri->setSessionIdentifier($sessionName); |
|
47
|
|
|
} |
|
48
|
|
|
$this->urls[$url['url']] = ['url' => $uri, 'system' => $key]; |
|
49
|
|
|
} else { |
|
50
|
|
|
$this->urls[$url] = ['url' => new Uri($url), 'system' => $key]; |
|
51
|
|
|
} |
|
52
|
|
|
} |
|
53
|
|
|
} |
|
54
|
|
|
$this->urlStack = $this->urls; |
|
55
|
|
|
} |
|
56
|
|
|
} |
|
57
|
|
|
|
|
58
|
|
|
/** |
|
59
|
|
|
* @param Uri $uri |
|
60
|
|
|
* |
|
61
|
|
|
* @return \Ivory\HttpAdapter\Message\Request |
|
62
|
|
|
*/ |
|
63
|
|
|
private function createRequest(Uri $uri) |
|
64
|
|
|
{ |
|
65
|
|
|
$headers = ['Accept-Encoding' => 'gzip', 'Connection' => 'keep-alive']; |
|
66
|
|
|
|
|
67
|
|
|
if ($uri->getSessionIdentifier()) { |
|
68
|
|
|
$session = $this->sessionContainer->getSession($uri->getSessionIdentifier()); |
|
69
|
|
|
|
|
70
|
|
|
foreach ($session->getCookies() as $key => $value) { |
|
71
|
|
|
$uri->addCookie($key, $value); |
|
72
|
|
|
} |
|
73
|
|
|
} |
|
74
|
|
|
|
|
75
|
|
|
if ($uri->hasCookies()) { |
|
76
|
|
|
$headers['Cookie'] = $uri->getCookieString(); |
|
77
|
|
|
} |
|
78
|
|
|
|
|
79
|
|
|
$request = RequestFactory::getRequest($uri, 'GET', 'php://memory', $headers); |
|
80
|
|
|
|
|
81
|
|
|
return $request; |
|
82
|
|
|
} |
|
83
|
|
|
|
|
84
|
|
|
public function next() |
|
85
|
|
|
{ |
|
86
|
|
|
if (empty($this->urlStack)) { |
|
87
|
|
|
return false; |
|
|
|
|
|
|
88
|
|
|
} |
|
89
|
|
|
|
|
90
|
|
|
$url = array_pop($this->urlStack); |
|
91
|
|
|
|
|
92
|
|
|
if ($url['url'] instanceof UriInterface) { |
|
93
|
|
|
$urlObject = $url['url']; |
|
94
|
|
|
}else { |
|
95
|
|
|
$urlObject = new Uri($url['url']); |
|
96
|
|
|
} |
|
97
|
|
|
|
|
98
|
|
|
$request = $this->createRequest($urlObject); |
|
99
|
|
|
|
|
100
|
|
|
try { |
|
101
|
|
|
$responses = $this->httpClient->sendRequests(array($request)); |
|
102
|
|
|
} catch (MultiHttpAdapterException $e) { |
|
103
|
|
|
$exceptions = $e->getExceptions(); |
|
104
|
|
|
/* @var \Exception[] $exceptions */ |
|
105
|
|
|
$errorMessages = ''; |
|
106
|
|
|
foreach ($exceptions as $exception) { |
|
107
|
|
|
// @fixme this must be part of the http client |
|
108
|
|
|
$message = $exception->getMessage(); |
|
109
|
|
|
if (strpos($message, 'An error occurred when fetching the URI') === 0) { |
|
110
|
|
|
$corruptUrl = substr($message, '41', strpos($message, '"', 41) - 41); |
|
111
|
|
|
if (strpos($corruptUrl, '/') === 0) { |
|
112
|
|
|
/* @var \Ivory\HttpAdapter\HttpAdapterException $exception */ |
|
113
|
|
|
|
|
114
|
|
|
$mainUri = $request->getUri(); |
|
115
|
|
|
$this->redirects[(string)$mainUri->getScheme() . '://' . $mainUri->getHost() . $corruptUrl] = (string)$mainUri; |
|
116
|
|
|
|
|
117
|
|
|
$this->urls[] = ['url' => $mainUri->getScheme() . '://' . $mainUri->getHost() . $corruptUrl, 'system' => $url['system']]; |
|
118
|
|
|
$this->urlStack[] = ['url' => $mainUri->getScheme() . '://' . $mainUri->getHost() . $corruptUrl, 'system' => $url['system']]; |
|
119
|
|
|
|
|
120
|
|
|
return $this->next(); |
|
121
|
|
|
} |
|
122
|
|
|
|
|
123
|
|
|
// the error handling should be done withing the calling class |
|
124
|
|
|
echo "\n " . $exception->getMessage() . "\n"; |
|
125
|
|
|
|
|
126
|
|
|
return $this->next(); |
|
127
|
|
|
} else { |
|
128
|
|
|
$errorMessages .= $exception->getMessage() . "\n"; |
|
129
|
|
|
} |
|
130
|
|
|
} |
|
131
|
|
|
if ($errorMessages !== '') { |
|
132
|
|
|
throw new \RuntimeException($errorMessages); |
|
133
|
|
|
} |
|
134
|
|
|
} |
|
135
|
|
|
|
|
136
|
|
|
return $responses[0]; |
|
137
|
|
|
} |
|
138
|
|
|
|
|
139
|
|
View Code Duplication |
public function getOriginUri(UriInterface $uri) |
|
|
|
|
|
|
140
|
|
|
{ |
|
141
|
|
|
if (array_key_exists((string)$uri, $this->redirects)) { |
|
142
|
|
|
return $this->urls[$this->redirects[(string)$uri]]['url']; |
|
143
|
|
|
} |
|
144
|
|
|
|
|
145
|
|
|
return $uri; |
|
146
|
|
|
} |
|
147
|
|
|
|
|
148
|
|
|
public function getComingFrom(UriInterface $uri) |
|
149
|
|
|
{ |
|
150
|
|
|
return $uri; |
|
151
|
|
|
} |
|
152
|
|
|
|
|
153
|
|
View Code Duplication |
public function getSystem(UriInterface $uri) |
|
|
|
|
|
|
154
|
|
|
{ |
|
155
|
|
|
if (array_key_exists((string)$uri, $this->redirects)) { |
|
156
|
|
|
return $this->urls[$this->redirects[(string)$uri]]['system']; |
|
157
|
|
|
} |
|
158
|
|
|
|
|
159
|
|
|
return $this->urls[(string)$uri]['system']; |
|
160
|
|
|
} |
|
161
|
|
|
|
|
162
|
|
|
public function getSystems() |
|
163
|
|
|
{ |
|
164
|
|
|
$systems = []; |
|
165
|
|
|
foreach ($this->urls as $key => $url) { |
|
166
|
|
|
$systems[] = $url['system']; |
|
167
|
|
|
} |
|
168
|
|
|
|
|
169
|
|
|
return $systems; |
|
170
|
|
|
} |
|
171
|
|
|
|
|
172
|
|
|
public function setHttpClient(HttpAdapterInterface $httpClient) |
|
173
|
|
|
{ |
|
174
|
|
|
$this->httpClient = $httpClient; |
|
175
|
|
|
} |
|
176
|
|
|
|
|
177
|
|
|
public function setSessionContainer(SessionContainer $sessionContainer) |
|
178
|
|
|
{ |
|
179
|
|
|
$this->sessionContainer = $sessionContainer; |
|
180
|
|
|
} |
|
181
|
|
|
} |
|
182
|
|
|
|
If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.
Let’s take a look at an example:
Our function
my_functionexpects aPostobject, and outputs the author of the post. The base classPostreturns a simple string and outputting a simple string will work just fine. However, the child classBlogPostwhich is a sub-type ofPostinstead decided to return anobject, and is therefore violating the SOLID principles. If aBlogPostwere passed tomy_function, PHP would not complain, but ultimately fail when executing thestrtouppercall in its body.