1 | <?php |
||
8 | abstract class BaseReporter implements CrawlObserver |
||
9 | { |
||
10 | const UNRESPONSIVE_HOST = 'Host did not respond'; |
||
11 | |||
12 | /** |
||
13 | * @var array |
||
14 | */ |
||
15 | protected $urlsGroupedByStatusCode = []; |
||
16 | |||
17 | /** |
||
18 | * Called when the crawler will crawl the url. |
||
19 | * |
||
20 | * @param \Spatie\Crawler\Url $url |
||
21 | */ |
||
22 | public function willCrawl(Url $url) |
||
25 | |||
26 | /** |
||
27 | * Called when the crawler has crawled the given url. |
||
28 | * |
||
29 | * @param \Spatie\Crawler\Url $url |
||
30 | * @param \Psr\Http\Message\ResponseInterface|null $response |
||
31 | * @param \Spatie\Crawler\Url $foundOnUrl |
||
32 | * |
||
33 | * @return string |
||
34 | */ |
||
35 | public function hasBeenCrawled(Url $url, $response, Url $foundOnUrl = null) |
||
36 | { |
||
37 | $statusCode = $response ? $response->getStatusCode() : static::UNRESPONSIVE_HOST; |
||
38 | |||
39 | if (!$this->isExcludedStatusCode($statusCode)) { |
||
40 | $this->urlsGroupedByStatusCode[$statusCode][] = $url; |
||
41 | } |
||
42 | |||
43 | return $statusCode; |
||
44 | } |
||
45 | |||
46 | /** |
||
47 | * Determine if the statuscode concerns a successful or |
||
48 | * redirect response. |
||
49 | * |
||
50 | * @param int|string $statusCode |
||
51 | * @return bool |
||
52 | */ |
||
53 | protected function isSuccessOrRedirect($statusCode): bool |
||
54 | { |
||
55 | return starts_with($statusCode, ['2', '3']); |
||
56 | } |
||
57 | |||
58 | /** |
||
59 | * Determine if the crawler saw some bad urls. |
||
60 | */ |
||
61 | protected function crawledBadUrls(): bool |
||
67 | |||
68 | /** |
||
69 | * Determine if the statuscode should be excluded' |
||
70 | * from the reporter. |
||
71 | * |
||
72 | * @param int|string $statusCode |
||
73 | * |
||
74 | * @return bool |
||
75 | */ |
||
76 | protected function isExcludedStatusCode($statusCode): bool |
||
82 | } |
||
83 |