@@ -2,9 +2,9 @@ |
||
2 | 2 | |
3 | 3 | namespace Spatie\Crawler; |
4 | 4 | |
5 | -use Psr\Http\Message\UriInterface; |
|
6 | -use Psr\Http\Message\ResponseInterface; |
|
7 | 5 | use GuzzleHttp\Exception\RequestException; |
6 | +use Psr\Http\Message\ResponseInterface; |
|
7 | +use Psr\Http\Message\UriInterface; |
|
8 | 8 | |
9 | 9 | abstract class CrawlObserver |
10 | 10 | { |
@@ -2,9 +2,9 @@ |
||
2 | 2 | |
3 | 3 | namespace Spatie\Crawler; |
4 | 4 | |
5 | -use Spatie\Robots\RobotsMeta; |
|
6 | -use Spatie\Robots\RobotsHeaders; |
|
7 | 5 | use Psr\Http\Message\ResponseInterface; |
6 | +use Spatie\Robots\RobotsHeaders; |
|
7 | +use Spatie\Robots\RobotsMeta; |
|
8 | 8 | |
9 | 9 | class CrawlerRobots |
10 | 10 | { |
@@ -3,20 +3,20 @@ |
||
3 | 3 | namespace Spatie\Crawler; |
4 | 4 | |
5 | 5 | use Generator; |
6 | -use Tree\Node\Node; |
|
7 | -use GuzzleHttp\Pool; |
|
8 | 6 | use GuzzleHttp\Client; |
9 | -use GuzzleHttp\Psr7\Uri; |
|
7 | +use GuzzleHttp\Pool; |
|
10 | 8 | use GuzzleHttp\Psr7\Request; |
11 | -use Spatie\Robots\RobotsTxt; |
|
9 | +use GuzzleHttp\Psr7\Uri; |
|
12 | 10 | use GuzzleHttp\RequestOptions; |
13 | 11 | use Psr\Http\Message\UriInterface; |
14 | 12 | use Spatie\Browsershot\Browsershot; |
13 | +use Spatie\Crawler\CrawlQueue\CollectionCrawlQueue; |
|
15 | 14 | use Spatie\Crawler\CrawlQueue\CrawlQueue; |
15 | +use Spatie\Crawler\Exception\InvalidCrawlRequestHandler; |
|
16 | 16 | use Spatie\Crawler\Handlers\CrawlRequestFailed; |
17 | 17 | use Spatie\Crawler\Handlers\CrawlRequestFulfilled; |
18 | -use Spatie\Crawler\CrawlQueue\CollectionCrawlQueue; |
|
19 | -use Spatie\Crawler\Exception\InvalidCrawlRequestHandler; |
|
18 | +use Spatie\Robots\RobotsTxt; |
|
19 | +use Tree\Node\Node; |
|
20 | 20 | |
21 | 21 | class Crawler |
22 | 22 | { |
@@ -340,6 +340,9 @@ |
||
340 | 340 | } |
341 | 341 | } |
342 | 342 | |
343 | + /** |
|
344 | + * @return Node |
|
345 | + */ |
|
343 | 346 | public function addToDepthTree(UriInterface $url, UriInterface $parentUrl, Node $node = null): ?Node |
344 | 347 | { |
345 | 348 | if (is_null($this->maximumDepth)) { |
@@ -74,6 +74,9 @@ |
||
74 | 74 | $this->crawler->getCrawlObservers()->crawled($crawlUrl, $response); |
75 | 75 | } |
76 | 76 | |
77 | + /** |
|
78 | + * @param integer $readMaximumBytes |
|
79 | + */ |
|
77 | 80 | protected function convertBodyToString(StreamInterface $bodyStream, $readMaximumBytes = 1024 * 1024 * 2): string |
78 | 81 | { |
79 | 82 | $bodyStream->rewind(); |
@@ -3,15 +3,15 @@ |
||
3 | 3 | namespace Spatie\Crawler\Handlers; |
4 | 4 | |
5 | 5 | use GuzzleHttp\Psr7\Uri; |
6 | -use Spatie\Crawler\Crawler; |
|
7 | -use Spatie\Crawler\CrawlUrl; |
|
8 | -use Spatie\Crawler\LinkAdder; |
|
9 | -use Spatie\Crawler\CrawlerRobots; |
|
10 | 6 | use GuzzleHttp\RedirectMiddleware; |
7 | +use Psr\Http\Message\ResponseInterface; |
|
8 | +use Psr\Http\Message\StreamInterface; |
|
11 | 9 | use Psr\Http\Message\UriInterface; |
12 | 10 | use Spatie\Crawler\CrawlSubdomains; |
13 | -use Psr\Http\Message\StreamInterface; |
|
14 | -use Psr\Http\Message\ResponseInterface; |
|
11 | +use Spatie\Crawler\CrawlUrl; |
|
12 | +use Spatie\Crawler\Crawler; |
|
13 | +use Spatie\Crawler\CrawlerRobots; |
|
14 | +use Spatie\Crawler\LinkAdder; |
|
15 | 15 | use function GuzzleHttp\Psr7\stream_for; |
16 | 16 | |
17 | 17 | class CrawlRequestFulfilled |