Passed
Push — master ( ac79bd...625dce )
by Dispositif
13:46 queued 16s
created

ScanWiki2DB::insertDB()   B

Complexity

Conditions 8
Paths 12

Size

Total Lines 35
Code Lines 20

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 20
dl 0
loc 35
rs 8.4444
c 0
b 0
f 0
cc 8
nc 12
nop 2
1
<?php
2
/*
3
 * This file is part of dispositif/wikibot application (@github)
4
 * 2019-2023 © Philippe M./Irønie  <[email protected]>
5
 * For the full copyright and MIT license information, view the license file.
6
 */
7
8
declare(strict_types=1);
9
10
namespace App\Application\OuvrageScan;
11
12
use App\Application\InfrastructurePorts\DbAdapterInterface;
13
use App\Application\InfrastructurePorts\PageListForAppInterface as PageListInterface;
14
use App\Application\WikiBotConfig;
15
use App\Application\WikiPageAction;
16
use App\Domain\Utils\TemplateParser;
17
use Exception;
18
use Mediawiki\Api\MediawikiFactory;
19
use Psr\Log\LoggerInterface;
20
21
22
/**
23
 * From a titles list, scan the wiki and add the {ouvrage} citations into the database.
24
 */
25
class ScanWiki2DB
26
{
27
    protected LoggerInterface $logger;
28
29
    public function __construct(
30
        private readonly MediawikiFactory   $wiki,
31
        private readonly DbAdapterInterface $db,
32
        private readonly WikiBotConfig      $bot,
33
        private readonly PageListInterface  $pageList,
34
        private readonly int                $priority = 0
35
    )
36
    {
37
        $this->logger = $this->bot->getLogger();
38
        $this->process();
39
    }
40
41
    /**
42
     * @throws Exception
43
     */
44
    public function process(): void
45
    {
46
        $titles = $this->pageList->getPageTitles();
47
        if ($titles === []) {
48
            $this->logger->info("pageList vide.");
49
50
            return;
51
        }
52
        foreach ($titles as $title) {
53
            $this->pageScan($title);
54
            sleep(4);
55
        }
56
    }
57
58
    /**
59
     * @throws Exception
60
     */
61
    public function pageScan(string $title): bool
62
    {
63
        sleep(2);
64
        $this->logger->notice(sprintf('%s - %s', date("Y-m-d H:i:s"), $title));
65
66
        $page = new WikiPageAction($this->wiki, $title); // todo injection
67
        $ns = $page->getNs();
68
        if ($ns !== 0) {
69
            $this->logger->debug("SKIP : namespace $ns");
70
71
            return false;
72
        }
73
        $text = $page->getText();
74
        if (empty($text)) {
75
            $this->logger->debug("SKIP : empty text");
76
77
            return false;
78
        }
79
80
        try {
81
            $parsedTemplates = TemplateParser::parseAllTemplateByName('ouvrage', $text);
82
        } catch (Exception $e) {
83
            $this->logger->error("SKIP : parse error " . $e->getMessage());
84
85
            return false;
86
        }
87
88
        if (empty($parsedTemplates)) {
89
            return false;
90
        }
91
92
        $result = $this->insertDB($parsedTemplates['ouvrage'], $title);
93
94
        return !empty($result);
95
    }
96
97
    protected function insertDB(array $ouvrages, string $title): bool|array
98
    {
99
        $data = [];
100
        foreach ($ouvrages as $res) {
101
            $oneData = [
102
                'page' => $title,
103
                'raw' => $res['raw'],
104
                'priority' => $this->priority,
105
            ];
106
107
            if ((strlen($title) > 250) || empty($oneData['raw']) || strlen($oneData['raw']) > 2500) {
108
                $this->logger->warning("Skip : string to long : ", $oneData);
109
                continue;
110
            }
111
112
            // filter duplicates
113
            if (!in_array($oneData, $data)) {
114
                $data[] = $oneData;
115
            }
116
        }
117
118
        if (empty($data)) {
119
            $this->logger->notice("Skip : empty data");
120
            return false;
121
        }
122
123
        $result = $this->db->insertPageOuvrages($data);
124
125
        if ($result === false) {
126
            $this->logger->error("Insert DB failed");
127
        } else {
128
            $this->logger->notice("Insert DB : ", $result);
129
        }
130
131
        return $result;
132
    }
133
}
134