Bouss /
boussimmo
| 1 | <?php |
||||
| 2 | |||||
| 3 | namespace App\Parser; |
||||
| 4 | |||||
| 5 | use App\DataProvider\ProviderProvider; |
||||
| 6 | use App\DTO\Property; |
||||
| 7 | use App\DTO\PropertyAd; |
||||
| 8 | use App\Enum\PropertyFilter; |
||||
| 9 | use App\Exception\ParseException; |
||||
| 10 | use App\Util\NumericUtil; |
||||
| 11 | use DateTime; |
||||
| 12 | use Exception; |
||||
| 13 | use Psr\Log\LoggerInterface; |
||||
| 14 | use Symfony\Component\DomCrawler\Crawler; |
||||
| 15 | use function array_filter; |
||||
| 16 | use function Symfony\Component\String\u; |
||||
| 17 | |||||
| 18 | abstract class AbstractParser implements ParserInterface |
||||
| 19 | { |
||||
| 20 | // Redefined in the child classes |
||||
| 21 | protected const PROVIDER = null; |
||||
| 22 | |||||
| 23 | protected const SELECTOR_AD_WRAPPER = null; |
||||
| 24 | protected const SELECTOR_PRICE = null; |
||||
| 25 | protected const SELECTOR_AREA = null; |
||||
| 26 | protected const SELECTOR_ROOMS_COUNT = null; |
||||
| 27 | protected const SELECTOR_LOCATION = null; |
||||
| 28 | protected const SELECTOR_BUILDING_NAME = null; |
||||
| 29 | protected const SELECTOR_TITLE = null; |
||||
| 30 | protected const SELECTOR_DESCRIPTION = null; |
||||
| 31 | protected const SELECTOR_PHOTO = 'img:first-child'; |
||||
| 32 | protected const SELECTOR_URL = 'a:first-child'; |
||||
| 33 | |||||
| 34 | private const NEW_BUILD_KEYWORDS = ['neuf', 'livraison', 'programme', 'neuve', 'nouveau', 'nouvelle', 'remise']; |
||||
| 35 | |||||
| 36 | 17 | public function __construct( |
|||
| 37 | private ProviderProvider $providerProvider, |
||||
| 38 | protected LoggerInterface $logger |
||||
| 39 | 17 | ) {} |
|||
| 40 | |||||
| 41 | /** |
||||
| 42 | * {@inheritDoc} |
||||
| 43 | */ |
||||
| 44 | 17 | public function parse(string $html, array $filters = [], array $params = []): array |
|||
| 45 | { |
||||
| 46 | 17 | $properties = []; |
|||
| 47 | |||||
| 48 | // Iterate over all DOM elements wrapping a property ad |
||||
| 49 | 17 | ($this->createCrawler($html))->filter(static::SELECTOR_AD_WRAPPER)->each(function (Crawler $node) use (&$properties, $params) { |
|||
|
0 ignored issues
–
show
Bug
introduced
by
Loading history...
|
|||||
| 50 | try { |
||||
| 51 | 17 | $properties[] = $this->parseOne($node, $params['date']); |
|||
| 52 | } catch (Exception $e) { |
||||
| 53 | $this->logger->warning('Error while parsing a property: ' . $e->getMessage(), $params); |
||||
| 54 | } |
||||
| 55 | 17 | }); |
|||
| 56 | |||||
| 57 | 17 | if (empty($properties)) { |
|||
| 58 | throw new ParseException('No property parsed'); |
||||
| 59 | } |
||||
| 60 | |||||
| 61 | // Filter the properties |
||||
| 62 | 17 | return array_filter($properties, static fn(Property $ad) => isset($filters[PropertyFilter::NEW_BUILD]) ? $ad->isNewBuild() : true); |
|||
| 63 | } |
||||
| 64 | |||||
| 65 | /** |
||||
| 66 | * Enable to modify the DOM before parsing |
||||
| 67 | * |
||||
| 68 | * @param string $html |
||||
| 69 | * |
||||
| 70 | * @return Crawler |
||||
| 71 | */ |
||||
| 72 | 15 | protected function createCrawler(string $html): Crawler |
|||
| 73 | { |
||||
| 74 | 15 | return new Crawler($html); |
|||
| 75 | } |
||||
| 76 | |||||
| 77 | 15 | protected function parsePrice(Crawler $crawler): ?float |
|||
| 78 | { |
||||
| 79 | 15 | if (null === static::SELECTOR_PRICE) { |
|||
|
0 ignored issues
–
show
|
|||||
| 80 | 14 | return NumericUtil::parsePrice($crawler->html()); |
|||
| 81 | } |
||||
| 82 | |||||
| 83 | try { |
||||
| 84 | 1 | $priceStr = trim($crawler->filter(static::SELECTOR_PRICE)->text()); |
|||
| 85 | } catch (Exception) { |
||||
| 86 | return null; |
||||
| 87 | } |
||||
| 88 | |||||
| 89 | 1 | return NumericUtil::parsePrice($priceStr); |
|||
| 90 | } |
||||
| 91 | |||||
| 92 | 17 | protected function parseArea(Crawler $crawler): ?float |
|||
| 93 | { |
||||
| 94 | 17 | if (null === static::SELECTOR_AREA) { |
|||
|
0 ignored issues
–
show
|
|||||
| 95 | 17 | return NumericUtil::parseArea($crawler->html()); |
|||
| 96 | } |
||||
| 97 | |||||
| 98 | try { |
||||
| 99 | $areaStr = trim($crawler->filter(static::SELECTOR_AREA)->text()); |
||||
| 100 | } catch (Exception) { |
||||
| 101 | return null; |
||||
| 102 | } |
||||
| 103 | |||||
| 104 | return NumericUtil::parseArea($areaStr); |
||||
| 105 | } |
||||
| 106 | |||||
| 107 | 17 | protected function parseRoomsCount(Crawler $crawler): ?int |
|||
| 108 | { |
||||
| 109 | 17 | if (null === static::SELECTOR_ROOMS_COUNT) { |
|||
|
0 ignored issues
–
show
|
|||||
| 110 | 16 | return NumericUtil::parseRoomsCount($crawler->html()); |
|||
| 111 | } |
||||
| 112 | |||||
| 113 | try { |
||||
| 114 | 1 | $roomsCountStr = trim($crawler->filter(static::SELECTOR_ROOMS_COUNT)->text()); |
|||
| 115 | } catch (Exception) { |
||||
| 116 | return null; |
||||
| 117 | } |
||||
| 118 | |||||
| 119 | 1 | return NumericUtil::parseRoomsCount($roomsCountStr); |
|||
| 120 | } |
||||
| 121 | |||||
| 122 | 15 | protected function parseLocation(Crawler $crawler): ?string |
|||
| 123 | { |
||||
| 124 | 15 | if (null === static::SELECTOR_LOCATION) { |
|||
|
0 ignored issues
–
show
|
|||||
| 125 | 1 | return null; |
|||
| 126 | } |
||||
| 127 | |||||
| 128 | try { |
||||
| 129 | 14 | return trim($crawler->filter(static::SELECTOR_LOCATION)->text()); |
|||
| 130 | } catch (Exception) { |
||||
| 131 | return null; |
||||
| 132 | } |
||||
| 133 | } |
||||
| 134 | |||||
| 135 | 17 | protected function parseBuildingName(Crawler $crawler): ?string |
|||
| 136 | { |
||||
| 137 | 17 | if (null === static::SELECTOR_BUILDING_NAME) { |
|||
|
0 ignored issues
–
show
|
|||||
| 138 | 10 | return null; |
|||
| 139 | } |
||||
| 140 | |||||
| 141 | try { |
||||
| 142 | 7 | return trim($crawler->filter(static::SELECTOR_BUILDING_NAME)->text()); |
|||
| 143 | } catch (Exception) { |
||||
| 144 | return null; |
||||
| 145 | } |
||||
| 146 | } |
||||
| 147 | |||||
| 148 | 17 | protected function parseTitle(Crawler $crawler): ?string |
|||
| 149 | { |
||||
| 150 | 17 | if (null === static::SELECTOR_TITLE) { |
|||
|
0 ignored issues
–
show
|
|||||
| 151 | 14 | return null; |
|||
| 152 | } |
||||
| 153 | |||||
| 154 | try { |
||||
| 155 | 3 | return trim($crawler->filter(static::SELECTOR_TITLE)->text()); |
|||
| 156 | } catch (Exception) { |
||||
| 157 | return null; |
||||
| 158 | } |
||||
| 159 | } |
||||
| 160 | |||||
| 161 | 17 | protected function parseDescription(Crawler $crawler): ?string |
|||
| 162 | { |
||||
| 163 | 17 | if (null === static::SELECTOR_DESCRIPTION) { |
|||
|
0 ignored issues
–
show
|
|||||
| 164 | 10 | return null; |
|||
| 165 | } |
||||
| 166 | |||||
| 167 | try { |
||||
| 168 | 7 | return trim($crawler->filter(static::SELECTOR_DESCRIPTION)->text()); |
|||
| 169 | 1 | } catch (Exception) { |
|||
| 170 | 1 | return null; |
|||
| 171 | } |
||||
| 172 | } |
||||
| 173 | |||||
| 174 | /** |
||||
| 175 | * @throws ParseException |
||||
| 176 | */ |
||||
| 177 | 15 | protected function parsePhoto(Crawler $crawler): ?string |
|||
| 178 | { |
||||
| 179 | try { |
||||
| 180 | 15 | return $crawler->filter(static::SELECTOR_PHOTO)->attr('src'); |
|||
| 181 | } catch (Exception $e) { |
||||
| 182 | throw new ParseException('Error while parsing the photo: ' . $e->getMessage()); |
||||
| 183 | } |
||||
| 184 | } |
||||
| 185 | |||||
| 186 | /** |
||||
| 187 | * @throws ParseException |
||||
| 188 | */ |
||||
| 189 | 17 | protected function parseUrl(Crawler $crawler): string |
|||
| 190 | { |
||||
| 191 | try { |
||||
| 192 | 17 | return $crawler->filter(static::SELECTOR_URL)->attr('href'); |
|||
|
0 ignored issues
–
show
|
|||||
| 193 | } catch (Exception $e) { |
||||
| 194 | throw new ParseException('Error while parsing the URL: ' . $e->getMessage()); |
||||
| 195 | } |
||||
| 196 | } |
||||
| 197 | |||||
| 198 | /** |
||||
| 199 | * @throws ParseException |
||||
| 200 | */ |
||||
| 201 | 17 | private function parseOne(Crawler $crawler, DateTime $publishedAt): Property |
|||
| 202 | { |
||||
| 203 | 17 | $propertyAd = (new PropertyAd) |
|||
| 204 | 17 | ->setProvider(static::PROVIDER) |
|||
|
0 ignored issues
–
show
static::PROVIDER of type null is incompatible with the type string expected by parameter $provider of App\DTO\PropertyAd::setProvider().
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||
| 205 | 17 | ->setTitle($this->parseTitle($crawler)) |
|||
|
0 ignored issues
–
show
Are you sure the usage of
$this->parseTitle($crawler) targeting App\Parser\AbstractParser::parseTitle() seems to always return null.
This check looks for function or method calls that always return null and whose return value is used. class A
{
function getObject()
{
return null;
}
}
$a = new A();
if ($a->getObject()) {
The method The reason is most likely that a function or method is imcomplete or has been reduced for debug purposes. Loading history...
|
|||||
| 206 | 17 | ->setDescription($this->parseDescription($crawler)) |
|||
|
0 ignored issues
–
show
Are you sure the usage of
$this->parseDescription($crawler) targeting App\Parser\AbstractParser::parseDescription() seems to always return null.
This check looks for function or method calls that always return null and whose return value is used. class A
{
function getObject()
{
return null;
}
}
$a = new A();
if ($a->getObject()) {
The method The reason is most likely that a function or method is imcomplete or has been reduced for debug purposes. Loading history...
|
|||||
| 207 | 17 | ->setPhoto($this->parsePhoto($crawler)) |
|||
|
0 ignored issues
–
show
It seems like
$this->parsePhoto($crawler) can also be of type null; however, parameter $photo of App\DTO\PropertyAd::setPhoto() does only seem to accept string, maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||
| 208 | 17 | ->setUrl($this->parseUrl($crawler)) |
|||
| 209 | 17 | ->setPublishedAt($publishedAt); |
|||
| 210 | |||||
| 211 | 17 | $property = (new Property) |
|||
| 212 | 17 | ->setPrice($this->parsePrice($crawler)) |
|||
| 213 | 17 | ->setArea($this->parseArea($crawler)) |
|||
| 214 | 17 | ->setRoomsCount($this->parseRoomsCount($crawler)) |
|||
| 215 | 17 | ->setLocation($this->parseLocation($crawler)) |
|||
|
0 ignored issues
–
show
Are you sure the usage of
$this->parseLocation($crawler) targeting App\Parser\AbstractParser::parseLocation() seems to always return null.
This check looks for function or method calls that always return null and whose return value is used. class A
{
function getObject()
{
return null;
}
}
$a = new A();
if ($a->getObject()) {
The method The reason is most likely that a function or method is imcomplete or has been reduced for debug purposes. Loading history...
|
|||||
| 216 | 17 | ->setBuildingName($this->parseBuildingName($crawler)) |
|||
|
0 ignored issues
–
show
Are you sure the usage of
$this->parseBuildingName($crawler) targeting App\Parser\AbstractParser::parseBuildingName() seems to always return null.
This check looks for function or method calls that always return null and whose return value is used. class A
{
function getObject()
{
return null;
}
}
$a = new A();
if ($a->getObject()) {
The method The reason is most likely that a function or method is imcomplete or has been reduced for debug purposes. Loading history...
|
|||||
| 217 | 17 | ->setAd($propertyAd); |
|||
| 218 | |||||
| 219 | 17 | if ((null !== $provider = $this->providerProvider->find(static::PROVIDER)) && $provider->isNewBuildOnly()) { |
|||
|
0 ignored issues
–
show
static::PROVIDER of type null is incompatible with the type string expected by parameter $name of App\DataProvider\ProviderProvider::find().
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||
| 220 | 6 | $property->setNewBuild(true); |
|||
| 221 | } else { |
||||
| 222 | 11 | $property->setNewBuild(u($propertyAd->getTitle() . $propertyAd->getDescription())->containsAny(self::NEW_BUILD_KEYWORDS)); |
|||
| 223 | } |
||||
| 224 | |||||
| 225 | 17 | return $property; |
|||
| 226 | } |
||||
| 227 | } |
||||
| 228 |