Completed
Push — main ( 27fe00...4991e0 )
by Stefano
17s queued 15s
created

ImportSitemapCommand   A

Complexity

Total Complexity 20

Size/Duplication

Total Lines 166
Duplicated Lines 0 %

Importance

Changes 10
Bugs 2 Features 0
Metric Value
eloc 88
c 10
b 2
f 0
dl 0
loc 166
rs 10
wmc 20

4 Methods

Rating   Name   Duplication   Size   Complexity  
A buildOptionParser() 0 21 1
A initialize() 0 4 1
A linkOptions() 0 14 2
D execute() 0 80 16
1
<?php
2
declare(strict_types=1);
3
4
/**
5
 * BEdita Brevia plugin
6
 *
7
 * Copyright 2024 Atlas Srl
8
 */
9
namespace Brevia\BEdita\Command;
10
11
use BEdita\Core\Utility\LoggedUser;
12
use Brevia\BEdita\Client\BreviaClient;
13
use Brevia\BEdita\Utility\ReadCSVTrait;
14
use Cake\Command\Command;
15
use Cake\Console\Arguments;
16
use Cake\Console\ConsoleIo;
17
use Cake\Console\ConsoleOptionParser;
18
use Cake\Log\LogTrait;
19
use Cake\ORM\Table;
20
use Cake\Utility\Hash;
21
22
/**
23
 * Import links from sitemap and create links
24
 *
25
 * @property \BEdita\Core\Model\Table\ObjectsTable $Collections
26
 */
27
class ImportSitemapCommand extends Command
28
{
29
    use LogTrait;
30
    use ReadCSVTrait;
31
32
    /**
33
     * Brevia API client
34
     *
35
     * @var \Brevia\BEdita\Client\BreviaClient
36
     */
37
    protected BreviaClient $client;
38
39
    /**
40
     * Links Table
41
     *
42
     * @var \Cake\ORM\Table
43
     */
44
    protected Table $Links;
45
46
    /**
47
     * @inheritDoc
48
     */
49
    public $defaultTable = 'Collections';
50
51
    /**
52
     * @inheritDoc
53
     */
54
    protected function buildOptionParser(ConsoleOptionParser $parser): ConsoleOptionParser
55
    {
56
        return $parser->addOption('sitemap', [
57
                'help' => 'File path or URL of sitemap to import',
58
                'short' => 's',
59
                'required' => true,
60
            ])
61
            ->addOption('prefix', [
62
                'help' => 'Optional path prefix of URLs to import',
63
                'short' => 'p',
64
                'required' => false,
65
            ])
66
            ->addOption('black-list', [
67
                'help' => 'Path to a black list file containing URLs to exclude - txt file with one URL per line',
68
                'short' => 'b',
69
                'required' => false,
70
            ])
71
            ->addOption('collection', [
72
                'help' => 'Collection used to index (use the unique collection name)',
73
                'short' => 'c',
74
                'required' => true,
75
            ]);
76
    }
77
78
    /**
79
     * @inheritDoc
80
     */
81
    public function initialize(): void
82
    {
83
        $this->client = new BreviaClient();
84
        $this->Links = $this->fetchTable('Links');
85
    }
86
87
    /**
88
     * @inheritDoc
89
     */
90
    public function execute(Arguments $args, ConsoleIo $io)
91
    {
92
        $sitemap = $args->getOption('sitemap');
93
        $content = '';
94
        if (!empty($sitemap)) {
95
            if (strpos($sitemap, 'http://') !== 0 && strpos($sitemap, 'https://') !== 0 && !file_exists($sitemap)) {
96
                $io->abort(sprintf('File not found: %s', $sitemap));
97
            }
98
            $content = file_get_contents($sitemap);
99
            if ($content === false) {
100
                $io->abort(sprintf('Error reading sitemap URL: %s', $sitemap));
101
            }
102
        }
103
104
        $name = $args->getOption('collection');
105
        $response = $this->client->get('/collections', compact('name'));
106
        $collectionId = Hash::get($response->getJson(), '0.cmetadata.id');
107
        if (empty($collectionId)) {
108
            $io->abort(sprintf('Collection not found: %s', $name));
109
        }
110
        $collection = $this->Collections->get($collectionId, ['contain' => ['HasDocuments']]);
111
        $currentUrls = array_filter(array_map(function ($link) {
112
                $link = $link->getTable()->get($link->id);
113
114
                return $link->get('url');
115
        },
116
            (array)$collection->get('has_documents')));
117
        $prefix = $args->getOption('prefix');
118
119
        $blackListPath = (string)$args->getOption('black-list');
120
        $blackList = [];
121
        if (!empty($blackListPath)) {
122
            if (!file_exists($blackListPath)) {
123
                $io->abort(sprintf('Blacklist file not found: %s', $blackListPath));
124
            }
125
            $blackList = (array)file($blackListPath, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
126
        }
127
128
        $xml = simplexml_load_string($content);
129
        $json = json_encode($xml);
130
        $data = (array)json_decode($json, true);
131
        $urls = Hash::extract($data, 'url.{n}.loc');
132
        if (empty($urls)) {
133
            $io->abort('No URLs found in sitemap');
134
        }
135
        $entities = [];
136
        LoggedUser::setUserAdmin();
137
        foreach ($urls as $url) {
138
            if (
139
                in_array($url, $currentUrls) ||
140
                in_array(urldecode($url), $currentUrls) ||
141
                in_array($url, $blackList) ||
142
                ($prefix && strpos($url, $prefix) !== 0)
143
            ) {
144
                continue;
145
            }
146
            $io->info('Adding link: ' . $url);
147
            $data = [
148
                'status' => 'on',
149
                'title' => $url,
150
                'url' => $url,
151
                'extra' => [
152
                    'brevia' => [
153
                        'metadata' => [
154
                            'type' => 'links',
155
                            'url' => $url,
156
                        ],
157
                        'options' => $this->linkOptions($url, (array)$collection->get('link_load_options')),
158
                    ],
159
                ],
160
            ];
161
            $entity = $this->Links->newEntity($data);
162
            $entities[] = $this->Links->saveOrFail($entity);
163
        }
164
        // @phpstan-ignore-next-line
165
        $this->Collections->addRelated($collection, 'has_documents', $entities);
166
167
        $io->out('Done. Link added successfully: ' . count($entities));
168
169
        return null;
170
    }
171
172
    /**
173
     * Get link options
174
     *
175
     * @param string $url URL
176
     * @param array $linkLoadOptions Link load options
177
     * @return array
178
     */
179
    protected function linkOptions(string $url, array $linkLoadOptions): array
180
    {
181
        $options = array_filter($linkLoadOptions, function ($o) use ($url) {
182
            return $o['url'] === $url;
183
        });
184
        $selector = Hash::get($options, '0.selector');
185
        if (!empty($selector)) {
186
            return compact('selector');
187
        }
188
        $options = array_filter($linkLoadOptions, function ($o) use ($url) {
189
            return strpos($url, $o['url']) === 0;
190
        });
191
192
        return ['selector' => Hash::get($options, '0.selector')];
193
    }
194
}
195