1
|
|
|
<?php |
2
|
|
|
/* |
3
|
|
|
* This file is part of dispositif/wikibot application (@github) |
4
|
|
|
* 2019-2023 © Philippe M./Irønie <[email protected]> |
5
|
|
|
* For the full copyright and MIT license information, view the license file. |
6
|
|
|
*/ |
7
|
|
|
|
8
|
|
|
declare(strict_types=1); |
9
|
|
|
|
10
|
|
|
namespace App\Domain\ExternLink; |
11
|
|
|
|
12
|
|
|
use App\Infrastructure\Monitor\NullLogger; |
13
|
|
|
use Psr\Log\LoggerInterface; |
14
|
|
|
|
15
|
|
|
/** |
16
|
|
|
* todo Refac |
17
|
|
|
* Doc : https://developer.mozilla.org/fr/docs/Web/HTTP/Status/503 |
18
|
|
|
*/ |
19
|
|
|
class ExternHttpErrorLogic |
20
|
|
|
{ |
21
|
|
|
final public const LOG_REQUEST_ERROR = __DIR__ . '/../../Application/resources/external_request_error.log'; |
22
|
|
|
protected const LOOSE = true; |
23
|
|
|
|
24
|
|
|
public function __construct( |
25
|
|
|
protected DeadLinkTransformer $deadLinkTransformer, |
26
|
|
|
private readonly LoggerInterface $log = new NullLogger() |
27
|
|
|
) |
28
|
|
|
{ |
29
|
|
|
} |
30
|
|
|
|
31
|
|
|
public function manageByHttpErrorMessage(string $errorMessage, string $url): string |
32
|
|
|
{ |
33
|
|
|
// "410 gone" => {lien brisé} |
34
|
|
|
if (preg_match('#410 Gone#i', $errorMessage)) { |
35
|
|
|
$this->log->notice('410 Gone', ['stats' => 'externHttpErrorLogic.410']); |
36
|
|
|
|
37
|
|
|
if (ExternRefTransformer::REPLACE_410) { |
38
|
|
|
return $this->deadLinkTransformer->formatFromUrl($url); |
39
|
|
|
} |
40
|
|
|
return $url; |
41
|
|
|
} |
42
|
|
|
if (preg_match('#400 Bad Request#i', $errorMessage)) { |
43
|
|
|
$this->log->warning('400 Bad Request : ' . $url, ['stats' => 'externHttpErrorLogic.400']); |
44
|
|
|
|
45
|
|
|
return $url; |
46
|
|
|
} |
47
|
|
|
if (preg_match('#(403 Forbidden|403 Access Forbidden)#i', $errorMessage)) { |
48
|
|
|
$this->log->warning('403 Forbidden : ' . $url, ['stats' => 'externHttpErrorLogic.403']); |
49
|
|
|
// TODO return blankLienWeb without consulté le=... |
50
|
|
|
|
51
|
|
|
return $url; |
52
|
|
|
} |
53
|
|
|
if (preg_match('#404 Not Found#i', $errorMessage)) { |
54
|
|
|
$this->log->notice('404 Not Found', ['stats' => 'externHttpErrorLogic.404']); |
55
|
|
|
|
56
|
|
|
if (ExternRefTransformer::REPLACE_404) { |
57
|
|
|
return $this->deadLinkTransformer->formatFromUrl($url); |
58
|
|
|
} |
59
|
|
|
return $url; |
60
|
|
|
} |
61
|
|
|
if (preg_match('#401 (Unauthorized|Authorization Required)#i', $errorMessage)) { |
62
|
|
|
$this->log->notice('401 Unauthorized : skip ' . $url, ['stats' => 'externHttpErrorLogic.401']); |
63
|
|
|
|
64
|
|
|
return $url; |
65
|
|
|
} |
66
|
|
|
|
67
|
|
|
|
68
|
|
|
if (self::LOOSE && preg_match('#500 Internal Server Error#i', $errorMessage)) { |
69
|
|
|
$this->log->notice('500 Internal Server Error', ['stats' => 'externHttpErrorLogic.500']); |
70
|
|
|
|
71
|
|
|
return $this->deadLinkTransformer->formatFromUrl($url); |
72
|
|
|
} |
73
|
|
|
if (self::LOOSE && preg_match('#502 Bad Gateway#i', $errorMessage)) { |
74
|
|
|
$this->log->notice('502 Bad Gateway', ['stats' => 'externHttpErrorLogic.502']); |
75
|
|
|
|
76
|
|
|
return $this->deadLinkTransformer->formatFromUrl($url); |
77
|
|
|
} |
78
|
|
|
if (self::LOOSE && preg_match('#cURL error 52: Empty reply from server#i', $errorMessage)) { |
79
|
|
|
$this->log->notice( |
80
|
|
|
'cURL error 52: Empty reply from server', |
81
|
|
|
['stats' => 'externHttpErrorLogic.curl52_emptyReply'] |
82
|
|
|
); |
83
|
|
|
|
84
|
|
|
return $this->deadLinkTransformer->formatFromUrl($url); |
85
|
|
|
} |
86
|
|
|
if (self::LOOSE && preg_match('#cURL error 6: Could not resolve host#i', $errorMessage)) { |
87
|
|
|
$this->log->notice( |
88
|
|
|
'cURL error 6: Could not resolve host', |
89
|
|
|
['stats' => 'externHttpErrorLogic.curl6_resolveHost'] |
90
|
|
|
); |
91
|
|
|
|
92
|
|
|
return $this->deadLinkTransformer->formatFromUrl($url); |
93
|
|
|
} |
94
|
|
|
|
95
|
|
|
if (self::LOOSE |
96
|
|
|
&& ( |
97
|
|
|
preg_match("#cURL error 97: Can't complete SOCKS5 connection#i", $errorMessage) |
98
|
|
|
|| preg_match("#cURL error 7: Can't complete SOCKS5 connection to 0.0.0.0:0#i", $errorMessage) |
99
|
|
|
) |
100
|
|
|
) { |
101
|
|
|
// remote endpoint connection failure |
102
|
|
|
$this->log->notice("Can't complete SOCKS5 connection", ['stats' => 'externHttpErrorLogic.SOCKS5failure']); |
103
|
|
|
|
104
|
|
|
return $this->deadLinkTransformer->formatFromUrl($url); |
105
|
|
|
} |
106
|
|
|
|
107
|
|
|
// DEFAULT (not filtered) |
108
|
|
|
// autre : ne pas générer de {lien brisé}, car peut-être 404 temporaire |
109
|
|
|
|
110
|
|
|
// Faux-positif : cURL error 7: Failed to receive SOCKS5 connect request ack |
111
|
|
|
// "URL rejected: No host part in the URL (see https://curl.haxx.se/libcurl/c/libcurl-errors.html) |
112
|
|
|
// "cURL error 28: Connection timed out after 20005 milliseconds (see https://curl.haxx.se/libcurl/c/libcurl-errors.html) |
113
|
|
|
//"cURL error 28: Connection timed out after 20005 milliseconds (see https://curl.haxx.se/libcurl/c/libcurl-errors.html) |
114
|
|
|
$this->log->notice( |
115
|
|
|
'erreur non gérée sur extractWebData: "' . $errorMessage . "\" URL: " . $url, |
116
|
|
|
['stats' => 'externHttpErrorLogic.defaultSkip'] |
117
|
|
|
); |
118
|
|
|
//file_put_contents(self::LOG_REQUEST_ERROR, $this->domain."\n", FILE_APPEND); |
119
|
|
|
|
120
|
|
|
return $url; |
121
|
|
|
} |
122
|
|
|
} |