1
|
|
|
<?php |
2
|
|
|
/* |
3
|
|
|
* This file is part of dispositif/wikibot application (@github) |
4
|
|
|
* 2019-2023 © Philippe M./Irønie <[email protected]> |
5
|
|
|
* For the full copyright and MIT license information, view the license file. |
6
|
|
|
*/ |
7
|
|
|
|
8
|
|
|
declare(strict_types=1); |
9
|
|
|
|
10
|
|
|
namespace App\Domain\ExternLink; |
11
|
|
|
|
12
|
|
|
use Psr\Log\LoggerInterface; |
13
|
|
|
use Psr\Log\NullLogger; |
14
|
|
|
|
15
|
|
|
/** |
16
|
|
|
* todo Refac |
17
|
|
|
* Doc : https://developer.mozilla.org/fr/docs/Web/HTTP/Status/503 |
18
|
|
|
*/ |
19
|
|
|
class ExternHttpErrorLogic |
20
|
|
|
{ |
21
|
|
|
final public const LOG_REQUEST_ERROR = __DIR__ . '/../../Application/resources/external_request_error.log'; |
22
|
|
|
protected const LOOSE = true; |
23
|
|
|
|
24
|
|
|
public function __construct( |
25
|
|
|
protected DeadLinkTransformer $deadLinkTransformer, |
26
|
|
|
private readonly LoggerInterface $log = new NullLogger() |
27
|
|
|
) |
28
|
|
|
{ |
29
|
|
|
} |
30
|
|
|
|
31
|
|
|
public function manageByHttpErrorMessage(string $errorMessage, string $url): string |
32
|
|
|
{ |
33
|
|
|
// "410 gone" => {lien brisé} |
34
|
|
|
if (preg_match('#410 Gone#i', $errorMessage)) { |
35
|
|
|
$this->log->notice('410 Gone'); |
36
|
|
|
|
37
|
|
|
if (ExternRefTransformer::REPLACE_410) { |
38
|
|
|
return $this->deadLinkTransformer->formatFromUrl($url); |
39
|
|
|
} |
40
|
|
|
return $url; |
41
|
|
|
} |
42
|
|
|
if (preg_match('#400 Bad Request#i', $errorMessage)) { |
43
|
|
|
$this->log->warning('400 Bad Request : ' . $url); |
44
|
|
|
|
45
|
|
|
return $url; |
46
|
|
|
} |
47
|
|
|
if (preg_match('#(403 Forbidden|403 Access Forbidden)#i', $errorMessage)) { |
48
|
|
|
$this->log->warning('403 Forbidden : ' . $url); |
49
|
|
|
// TODO return blankLienWeb without consulté le=... |
50
|
|
|
|
51
|
|
|
return $url; |
52
|
|
|
} |
53
|
|
|
if (preg_match('#404 Not Found#i', $errorMessage)) { |
54
|
|
|
$this->log->notice('404 Not Found'); |
55
|
|
|
|
56
|
|
|
if (ExternRefTransformer::REPLACE_404) { |
57
|
|
|
return $this->deadLinkTransformer->formatFromUrl($url); |
58
|
|
|
} |
59
|
|
|
return $url; |
60
|
|
|
} |
61
|
|
|
if (preg_match('#401 (Unauthorized|Authorization Required)#i', $errorMessage)) { |
62
|
|
|
$this->log->notice('401 Unauthorized : skip ' . $url); |
63
|
|
|
|
64
|
|
|
return $url; |
65
|
|
|
} |
66
|
|
|
|
67
|
|
|
|
68
|
|
|
if (self::LOOSE && preg_match('#500 Internal Server Error#i', $errorMessage)) { |
69
|
|
|
$this->log->notice('500 Internal Server Error'); |
70
|
|
|
|
71
|
|
|
return $this->deadLinkTransformer->formatFromUrl($url); |
72
|
|
|
} |
73
|
|
|
if (self::LOOSE && preg_match('#502 Bad Gateway#i', $errorMessage)) { |
74
|
|
|
$this->log->notice('502 Bad Gateway'); |
75
|
|
|
|
76
|
|
|
return $this->deadLinkTransformer->formatFromUrl($url); |
77
|
|
|
} |
78
|
|
|
if (self::LOOSE && preg_match('#cURL error 52: Empty reply from server#i', $errorMessage)) { |
79
|
|
|
$this->log->notice('cURL error 52: Empty reply from server'); |
80
|
|
|
|
81
|
|
|
return $this->deadLinkTransformer->formatFromUrl($url); |
82
|
|
|
} |
83
|
|
|
if (self::LOOSE && preg_match('#cURL error 6: Could not resolve host#i', $errorMessage)) { |
84
|
|
|
$this->log->notice('cURL error 6: Could not resolve host'); |
85
|
|
|
|
86
|
|
|
return $this->deadLinkTransformer->formatFromUrl($url); |
87
|
|
|
} |
88
|
|
|
|
89
|
|
|
// Faux-positif : cURL error 7: Failed to receive SOCKS5 connect request ack |
90
|
|
|
if (self::LOOSE |
91
|
|
|
&& ( |
92
|
|
|
preg_match("#cURL error 97: Can't complete SOCKS5 connection#i", $errorMessage) |
93
|
|
|
|| preg_match("#cURL error 7: Can't complete SOCKS5 connection to 0.0.0.0:0#i", $errorMessage) |
94
|
|
|
) |
95
|
|
|
) { |
96
|
|
|
// remote endpoint connection failure |
97
|
|
|
$this->log->notice("Can't complete SOCKS5 connection"); |
98
|
|
|
|
99
|
|
|
return $this->deadLinkTransformer->formatFromUrl($url); |
100
|
|
|
} |
101
|
|
|
|
102
|
|
|
// DEFAULT (not filtered) |
103
|
|
|
// autre : ne pas générer de {lien brisé}, car peut-être 404 temporaire |
104
|
|
|
// "URL rejected: No host part in the URL (see https://curl.haxx.se/libcurl/c/libcurl-errors.html) |
105
|
|
|
// "cURL error 28: Connection timed out after 20005 milliseconds (see https://curl.haxx.se/libcurl/c/libcurl-errors.html) |
106
|
|
|
//"cURL error 28: Connection timed out after 20005 milliseconds (see https://curl.haxx.se/libcurl/c/libcurl-errors.html) |
107
|
|
|
$this->log->notice('erreur non gérée sur extractWebData: "' . $errorMessage . "\" URL: " . $url); |
108
|
|
|
|
109
|
|
|
//file_put_contents(self::LOG_REQUEST_ERROR, $this->domain."\n", FILE_APPEND); |
110
|
|
|
|
111
|
|
|
return $url; |
112
|
|
|
} |
113
|
|
|
} |