ExternRefWorker::processRefContent()   C
last analyzed

Complexity

Conditions 12
Paths 58

Size

Total Lines 66
Code Lines 36

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 36
c 0
b 0
f 0
dl 0
loc 66
rs 6.9666
cc 12
nc 58
nop 1

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/*
4
 * This file is part of dispositif/wikibot application (@github)
5
 * 2019-2023 © Philippe M./Irønie  <[email protected]>
6
 * For the full copyright and MIT license information, view the license file.
7
 */
8
declare(strict_types=1);
9
10
namespace App\Application\ExternLink;
11
12
use App\Application\AbstractRefBotWorker;
13
use App\Application\InfrastructurePorts\PageListForAppInterface as PageListInterface;
14
use App\Application\WikiBotConfig;
15
use App\Domain\Exceptions\ConfigException;
16
use App\Domain\ExternLink\ExternRefTransformer;
17
use Mediawiki\Api\MediawikiFactory;
18
use Throwable;
19
20
/**
21
 * TODO add construct arguments for TOR-Enabled
22
 */
23
class ExternRefWorker extends AbstractRefBotWorker
24
{
25
    public const TOR_ENABLED_FOR_WEB_CRAWL = true;
26
    public const TASK_BOT_FLAG = true;
27
    public const MAX_REFS_PROCESSED_IN_ARTICLE = 30;
28
    public const SLEEP_AFTER_EDITION = 5; // sec
29
    public const MINUTES_DELAY_AFTER_LAST_HUMAN_EDIT = 10; // minutes
30
    public const CHECK_EDIT_CONFLICT = true;
31
    public const ARTICLE_ANALYZED_FILENAME = __DIR__ . '/../resources/article_externRef_edited.txt';
32
    public const SKIP_ADQ = false;
33
    public const SKIP_LASTEDIT_BY_BOT = false;
34
    public const CITATION_NUMBER_ON_FIRE = 15;
35
    public const CITATION_NUMBER_NO_BOTFLAG = 20;
36
    public const DEAD_LINK_NO_BOTFLAG = 5;
37
    public const SKIP_SITE_BLACKLISTED = true;
38
    public const SKIP_ROBOT_NOINDEX = true;
39
    protected const STRING_WAYBACK_URL = '://web.archive.org/web/';
40
    protected const STRING_WIKIWIX_URL = 'https://archive.wikiwix.com/cache/';
41
42
    protected $modeAuto = true;
43
44
    protected ?ExternRefTransformer $transformer;
45
    protected array $webArchivers = [];
46
47
    public function __construct(
48
        WikiBotConfig        $bot,
49
        MediawikiFactory     $wiki,
50
        ?PageListInterface   $pagesGen = null,
51
        ExternRefTransformer $transformer = null
52
    )
53
    {
54
        if (!$transformer instanceof ExternRefTransformer) {
55
            throw new ConfigException('ExternRefTransformer not set');
56
        }
57
        $this->transformer = $transformer;
58
        $this->transformer->skipSiteBlacklisted = self::SKIP_SITE_BLACKLISTED;
59
        $this->transformer->skipRobotNoIndex = self::SKIP_ROBOT_NOINDEX;
60
61
        parent::__construct($bot, $wiki, $pagesGen);
62
    }
63
64
65
    /**
66
     * Traite contenu d'une <ref> ou bien lien externe (précédé d'une puce).
67
     */
68
    public function processRefContent(string $refContent): string
69
    {
70
        // todo // hack Temporary Skip URL
71
        if (preg_match('#books\.google#', $refContent)) {
72
            $this->log->stats->increment('externref.skip.booksgoogle');
0 ignored issues
show
Bug introduced by
Accessing stats on the interface Psr\Log\LoggerInterface suggest that you code against a concrete implementation. How about adding an instanceof check?
Loading history...
73
            return $refContent;
74
        }
75
76
        try {
77
            $result = $this->transformer->process($refContent, $this->summary);
0 ignored issues
show
Bug introduced by
The method process() does not exist on null. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

77
            /** @scrutinizer ignore-call */ 
78
            $result = $this->transformer->process($refContent, $this->summary);

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
78
        } catch (Throwable $e) {
79
            $this->log->critical(
80
                'Error patate34 ' . $e->getMessage() . " " . $e->getFile() . ":" . $e->getLine(),
81
                ['stats' => 'externref.exception.patate34']
82
            );
83
            // TODO : parse $e->message -> variable process, taskName, botflag...
84
85
            return $refContent;
86
        }
87
88
        if (trim($result) === trim($refContent)) {
89
            $this->log->stats->increment('externref.transform.same');
90
91
            return $refContent;
92
        }
93
94
        // Gestion semi-auto : todo CONDITION POURRI FAUSSE $this->transformer->skipUnauthorised
95
96
        $this->printDiff($refContent, $result, 'echo');
97
        if (!$this->autoOrYesConfirmation('Conserver cette modif ?')) {
98
            return $refContent;
99
        }
100
101
102
        if (preg_match('#{{lien brisé#i', $result)) {
103
            $this->log->stats->increment('externref.transform.lienbrisé');
104
            $this->summary->memo['count lien brisé'] = 1 + ($this->summary->memo['count lien brisé'] ?? 0);
105
            if ($this->summary->memo['count lien brisé'] >= self::DEAD_LINK_NO_BOTFLAG) {
106
                $this->summary->setBotFlag(false);
107
            }
108
        }
109
110
        if (str_contains($result, self::STRING_WIKIWIX_URL)) {
111
            $this->log->stats->increment('externref.transform.wikiwix');
112
            $this->summary->memo['wikiwix'] = 1 + ($this->summary->memo['wikiwix'] ?? 0);
113
            if ($this->summary->memo['wikiwix'] >= self::DEAD_LINK_NO_BOTFLAG) {
114
                $this->summary->setBotFlag(false);
115
            }
116
        }
117
        // not httpS in 2023
118
        if (str_contains($result, self::STRING_WAYBACK_URL)) {
119
            $this->log->stats->increment('externref.transform.wayback');
120
            $this->summary->memo['wayback'] = 1 + ($this->summary->memo['wayback'] ?? 0);
121
            if ($this->summary->memo['wayback'] >= self::DEAD_LINK_NO_BOTFLAG) {
122
                $this->summary->setBotFlag(false);
123
            }
124
        }
125
126
        if ($this->summary->citationNumber >= self::CITATION_NUMBER_NO_BOTFLAG) {
127
            $this->summary->setBotFlag(false);
128
        }
129
130
        $this->log->stats->increment('externref.transform.total');
131
        $this->summary->memo['count URL'] = 1 + ($this->summary->memo['count URL'] ?? 0);
132
133
        return $result;
134
    }
135
136
    /**
137
     * todo move to a Summary child ?
138
     * Rewriting default Summary::serialize()
139
     * @return string
140
     */
141
    protected function generateSummaryText(): string
142
    {
143
        $prefixSummary = ($this->summary->isBotFlag()) ? 'bot ' : '';
144
        $suffix = '';
145
        if (isset($this->summary->memo['count article'])) {
146
            $this->log->stats->increment('externref.count.article');
0 ignored issues
show
Bug introduced by
Accessing stats on the interface Psr\Log\LoggerInterface suggest that you code against a concrete implementation. How about adding an instanceof check?
Loading history...
147
            $suffix .= ' ' . $this->summary->memo['count article'] . 'x {article}';
148
        }
149
        if (isset($this->summary->memo['count lien web'])) {
150
            $this->log->stats->increment('externref.count.lienweb');
151
            $suffix .= ' ' . $this->summary->memo['count lien web'] . 'x {lien web}';
152
        }
153
        if (isset($this->summary->memo['presse'])) {
154
            $this->log->stats->increment('externref.count.presse');
155
            $suffix .= ' 🗞️'; // 🗞️ 📰
156
        }
157
        if (isset($this->summary->memo['science'])) {
158
            $this->log->stats->increment('externref.count.science');
159
            $suffix .= ' 🧪'; // 🧪 🔬
160
        }
161
        if (isset($this->summary->memo['count lien brisé'])) {
162
            $this->log->stats->increment('externref.count.lienbrisé');
163
            $suffix .= ' ⚠️️️lien brisé'; //⚠️💩
164
            $suffix .= ($this->summary->memo['count lien brisé'] > 1)
165
                ? ' x' . $this->summary->memo['count lien brisé']
166
                : '';
167
        }
168
        if (isset($this->summary->memo['wikiwix'])) {
169
            $suffix .= ' ';
170
            $suffix .= ($this->summary->memo['wikiwix'] > 1)
171
                ? $this->summary->memo['wikiwix'] . 'x '
172
                : '';
173
            $suffix .= 'Wikiwix';
174
        }
175
        if (isset($this->summary->memo['wayback'])) {
176
            $suffix .= ' ';
177
            $suffix .= ($this->summary->memo['wayback'] > 1)
178
                ? $this->summary->memo['wayback'] . 'x '
179
                : '';
180
            $suffix .= 'InternetArchive';
181
        }
182
183
        if (isset($this->summary->memo['accès url non libre'])) {
184
            $suffix .= ' 🔒';
185
        }
186
187
        if ($this->summary->citationNumber >= self::CITATION_NUMBER_ON_FIRE) {
188
            $suffix .= ' 🔥';
189
        }
190
191
        return $prefixSummary . $this->summary->taskName . $suffix;
192
    }
193
}
194