1
|
|
|
<?php |
2
|
|
|
namespace VDB\Spider; |
3
|
|
|
|
4
|
|
|
use Guzzle\Http\Message\Response; |
5
|
|
|
use Symfony\Component\DomCrawler\Crawler; |
6
|
|
|
use VDB\Spider\Uri\DiscoveredUri; |
7
|
|
|
|
8
|
|
|
/** |
9
|
|
|
* @author Matthijs van den Bos |
10
|
|
|
* @copyright 2013 Matthijs van den Bos |
11
|
|
|
*/ |
12
|
|
|
class Resource |
13
|
|
|
{ |
14
|
|
|
/** @var DiscoveredUri */ |
15
|
|
|
protected $uri; |
16
|
|
|
|
17
|
|
|
/** @var Response */ |
18
|
|
|
protected $response; |
19
|
|
|
|
20
|
|
|
/** @var Crawler */ |
21
|
|
|
protected $crawler; |
22
|
|
|
|
23
|
|
|
/** @var string */ |
24
|
|
|
protected $body; |
25
|
|
|
|
26
|
|
|
/** |
27
|
|
|
* @param DiscoveredUri $uri |
28
|
|
|
* @param Response $response |
29
|
|
|
*/ |
30
|
|
|
public function __construct(DiscoveredUri $uri, Response $response) |
31
|
|
|
{ |
32
|
|
|
$this->uri = $uri; |
33
|
|
|
$this->response = $response; |
34
|
|
|
|
35
|
|
|
// we store the response manually, because otherwise it will not get serialized. It is a php://temp stream |
36
|
|
|
$this->body = $response->getBody(true); |
37
|
|
|
} |
38
|
|
|
|
39
|
|
|
/** |
40
|
|
|
* Lazy loads a Crawler object based on the Response; |
41
|
|
|
* @return Crawler |
42
|
|
|
*/ |
43
|
|
|
public function getCrawler() |
44
|
|
|
{ |
45
|
|
|
if (!$this->crawler instanceof Crawler) { |
46
|
|
|
$this->crawler = new Crawler('', $this->getUri()->toString()); |
47
|
|
|
$this->crawler->addContent( |
48
|
|
|
$this->getResponse()->getBody(true), |
49
|
|
|
$this->getResponse()->getHeader('Content-Type', true) |
|
|
|
|
50
|
|
|
); |
51
|
|
|
} |
52
|
|
|
return $this->crawler; |
53
|
|
|
} |
54
|
|
|
|
55
|
|
|
/** |
56
|
|
|
* @return UriInterface |
|
|
|
|
57
|
|
|
*/ |
58
|
|
|
public function getUri() |
59
|
|
|
{ |
60
|
|
|
return $this->uri; |
61
|
|
|
} |
62
|
|
|
|
63
|
|
|
/** |
64
|
|
|
* @return Response |
65
|
|
|
*/ |
66
|
|
|
public function getResponse() |
67
|
|
|
{ |
68
|
|
|
return $this->response; |
69
|
|
|
} |
70
|
|
|
|
71
|
|
|
public function __sleep() |
72
|
|
|
{ |
73
|
|
|
/* |
74
|
|
|
* Because the Crawler isn't serialized correctly, we exclude it from serialization |
75
|
|
|
* It will be available again after wakeup through lazy loading with getCrawler() |
76
|
|
|
*/ |
77
|
|
|
return array( |
78
|
|
|
'uri', |
79
|
|
|
'response', |
80
|
|
|
'body' |
81
|
|
|
); |
82
|
|
|
} |
83
|
|
|
|
84
|
|
|
/** |
85
|
|
|
* We need to set the body again after deserialization because it was a stream that didn't get serialized |
86
|
|
|
*/ |
87
|
|
|
public function __wakeup() |
88
|
|
|
{ |
89
|
|
|
$this->response->setBody($this->body); |
90
|
|
|
} |
91
|
|
|
} |
92
|
|
|
|
This check looks at variables that are passed out again to other methods.
If the outgoing method call has stricter type requirements than the method itself, an issue is raised.
An additional type check may prevent trouble.