@@ 7-23 (lines=17) @@ | ||
4 | ||
5 | use Symfony\Component\DomCrawler\Crawler; |
|
6 | ||
7 | class FluxScraper extends Scraper implements ScraperInterface |
|
8 | { |
|
9 | public function recognizes($url) |
|
10 | { |
|
11 | return strpos($url, 'flux.no'); |
|
12 | } |
|
13 | ||
14 | public function scrape(Crawler $crawler) |
|
15 | { |
|
16 | $texts = $crawler->filter('.productPageBody > p')->each(function (Crawler $node) { |
|
17 | return $node->text(); |
|
18 | }); |
|
19 | $text = implode('\n\n', $texts); |
|
20 | ||
21 | return $this->returnResult($text, 'Flux forlag'); |
|
22 | } |
|
23 | } |
|
24 |
@@ 7-22 (lines=16) @@ | ||
4 | ||
5 | use Symfony\Component\DomCrawler\Crawler; |
|
6 | ||
7 | class LocScraper extends Scraper implements ScraperInterface |
|
8 | { |
|
9 | public function recognizes($url) |
|
10 | { |
|
11 | return strpos($url, 'loc.gov'); |
|
12 | } |
|
13 | ||
14 | public function scrape(Crawler $crawler) |
|
15 | { |
|
16 | $text = $crawler->filter('body')->first()->text(); |
|
17 | $texts = preg_split("/\r\n|\n\n/", $text); |
|
18 | $text = $this->getLongestText($texts); |
|
19 | ||
20 | return $this->returnResult($text, 'Library of Congress'); |
|
21 | } |
|
22 | } |
|
23 |