mvdbos /
php-spider
| 1 | <?php |
||||
| 2 | namespace VDB\Spider; |
||||
| 3 | |||||
| 4 | use GuzzleHttp\Psr7; |
||||
| 5 | use Psr\Http\Message\ResponseInterface; |
||||
| 6 | use Symfony\Component\DomCrawler\Crawler; |
||||
| 7 | use VDB\Spider\Uri\DiscoveredUri; |
||||
| 8 | |||||
| 9 | /** |
||||
| 10 | * @author Matthijs van den Bos |
||||
| 11 | * @copyright 2013 Matthijs van den Bos |
||||
| 12 | */ |
||||
| 13 | class Resource |
||||
| 14 | { |
||||
| 15 | /** @var DiscoveredUri */ |
||||
| 16 | protected $uri; |
||||
| 17 | |||||
| 18 | /** @var ResponseInterface */ |
||||
| 19 | protected $response; |
||||
| 20 | |||||
| 21 | /** @var Crawler */ |
||||
| 22 | protected $crawler; |
||||
| 23 | |||||
| 24 | /** @var string */ |
||||
| 25 | protected $body; |
||||
| 26 | |||||
| 27 | /** |
||||
| 28 | * @param DiscoveredUri $uri |
||||
| 29 | * @param ResponseInterface $response |
||||
| 30 | */ |
||||
| 31 | public function __construct(DiscoveredUri $uri, ResponseInterface $response) |
||||
| 32 | { |
||||
| 33 | $this->uri = $uri; |
||||
| 34 | $this->response = $response; |
||||
| 35 | } |
||||
| 36 | |||||
| 37 | /** |
||||
| 38 | * Lazy loads a Crawler object based on the ResponseInterface; |
||||
| 39 | * @return Crawler |
||||
| 40 | */ |
||||
| 41 | public function getCrawler() |
||||
| 42 | { |
||||
| 43 | if (!$this->crawler instanceof Crawler) { |
||||
|
0 ignored issues
–
show
introduced
by
Loading history...
|
|||||
| 44 | $this->crawler = new Crawler('', $this->getUri()->toString()); |
||||
| 45 | $this->crawler->addContent( |
||||
| 46 | $this->getResponse()->getBody()->__toString(), |
||||
| 47 | $this->getResponse()->getHeaderLine('Content-Type') |
||||
| 48 | ); |
||||
| 49 | } |
||||
| 50 | return $this->crawler; |
||||
| 51 | } |
||||
| 52 | |||||
| 53 | /** |
||||
| 54 | * @return DiscoveredUri |
||||
| 55 | */ |
||||
| 56 | public function getUri() |
||||
| 57 | { |
||||
| 58 | return $this->uri; |
||||
| 59 | } |
||||
| 60 | |||||
| 61 | /** |
||||
| 62 | * @return ResponseInterface |
||||
| 63 | */ |
||||
| 64 | public function getResponse() |
||||
| 65 | { |
||||
| 66 | return $this->response; |
||||
| 67 | } |
||||
| 68 | |||||
| 69 | public function __sleep() |
||||
| 70 | { |
||||
| 71 | /* |
||||
| 72 | * Because the Crawler isn't serialized correctly, we exclude it from serialization |
||||
| 73 | * It will be available again after wakeup through lazy loading with getCrawler() |
||||
| 74 | */ |
||||
| 75 | |||||
| 76 | // we store the response manually, because otherwise it will not get serialized. |
||||
| 77 | $this->body = Psr7\str($this->response); |
||||
|
0 ignored issues
–
show
The function
str was not found. Maybe you did not declare it correctly or list all dependencies?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||
| 78 | |||||
| 79 | return array( |
||||
| 80 | 'uri', |
||||
| 81 | 'body' |
||||
| 82 | ); |
||||
| 83 | } |
||||
| 84 | |||||
| 85 | /** |
||||
| 86 | * We need to set the body again after deserialization because it was a stream that didn't get serialized |
||||
| 87 | */ |
||||
| 88 | public function __wakeup() |
||||
| 89 | { |
||||
| 90 | $this->response = Psr7\parse_response($this->body); |
||||
|
0 ignored issues
–
show
The function
parse_response was not found. Maybe you did not declare it correctly or list all dependencies?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||
| 91 | } |
||||
| 92 | } |
||||
| 93 |