Passed
Pull Request — main (#16)
by Dante
03:06 queued 01:46
created

Import::findImported()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 16
Code Lines 10

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 10
c 0
b 0
f 0
dl 0
loc 16
rs 9.9332
cc 1
nc 1
nop 3
1
<?php
2
declare(strict_types=1);
3
4
/**
5
 * BEdita, API-first content management framework
6
 * Copyright 2023 Atlas Srl, Chialab Srl
7
 *
8
 * This file is part of BEdita: you can redistribute it and/or modify
9
 * it under the terms of the GNU Lesser General Public License as published
10
 * by the Free Software Foundation, either version 3 of the License, or
11
 * (at your option) any later version.
12
 *
13
 * See LICENSE.LGPL or <http://gnu.org/licenses/lgpl-3.0.html> for more details.
14
 */
15
16
namespace BEdita\ImportTools\Utility;
17
18
use BEdita\Core\Model\Action\SaveEntityAction;
19
use BEdita\Core\Model\Action\SetRelatedObjectsAction;
20
use BEdita\Core\Model\Entity\ObjectEntity;
21
use BEdita\Core\Model\Entity\Translation;
22
use BEdita\Core\Model\Table\ObjectsTable;
23
use BEdita\Core\Model\Table\TranslationsTable;
24
use Cake\Database\Expression\FunctionExpression;
25
use Cake\Database\Expression\QueryExpression;
26
use Cake\Datasource\EntityInterface;
27
use Cake\Http\Exception\BadRequestException;
28
use Cake\Log\LogTrait;
29
use Cake\ORM\Locator\LocatorAwareTrait;
30
use Cake\ORM\Query;
31
use Cake\ORM\Table;
32
use Cake\Utility\Hash;
33
use DOMDocument;
34
use DOMXPath;
35
36
/**
37
 * Import utility
38
 *
39
 * This class provides functions to import data from csv files into BEdita.
40
 *
41
 * Public methods are:
42
 *
43
 * - `saveObjects`: read data from csv and save objects
44
 * - `saveObject`: save a single object
45
 * - `saveTranslations`: read data from csv and save translations
46
 * - `saveTranslation`: save a single translation
47
 * - `translatedFields`: get translated fields for a given object
48
 *
49
 * Usage example:
50
 * ```php
51
 * use BEdita\ImportTools\Utility\Import;
52
 *
53
 * class MyImporter
54
 * {
55
 *     public function import(string $filename, string $type, ?string $parent, ?bool $dryrun): void
56
 *     {
57
 *         $import = new Import($filename, $type, $parent, $dryrun);
58
 *         $import->saveObjects();
59
 *     }
60
 * }
61
 * ```
62
 */
63
class Import
64
{
65
    use LocatorAwareTrait;
66
    use LogTrait;
67
    use ReadTrait;
68
    use TreeTrait;
69
70
    /**
71
     * @inheritDoc
72
     */
73
    protected $_defaultConfig = [
74
        'defaults' => [
75
            'status' => 'on',
76
        ],
77
        'csv' => [
78
            'delimiter' => ',',
79
            'enclosure' => '"',
80
            'escape' => '"',
81
        ],
82
    ];
83
84
    /**
85
     * Dry run mode flag
86
     *
87
     * @var bool
88
     */
89
    public bool $dryrun = false;
90
91
    /**
92
     * Full filename path
93
     *
94
     * @var string|null
95
     */
96
    public ?string $filename = '';
97
98
    /**
99
     * Parent uname or ID
100
     *
101
     * @var string|null
102
     */
103
    public ?string $parent = '';
104
105
    /**
106
     * Number of processed entities
107
     *
108
     * @var int
109
     */
110
    public int $processed = 0;
111
112
    /**
113
     * Number of saved entities
114
     *
115
     * @var int
116
     */
117
    public int $saved = 0;
118
119
    /**
120
     * Number of errors
121
     *
122
     * @var int
123
     */
124
    public int $errors = 0;
125
126
    /**
127
     * Errors details
128
     *
129
     * @var array
130
     */
131
    public array $errorsDetails = [];
132
133
    /**
134
     * Number of skipped
135
     *
136
     * @var int
137
     */
138
    public int $skipped = 0;
139
140
    /**
141
     * Entity type
142
     *
143
     * @var string
144
     */
145
    public string $type = '';
146
147
    /**
148
     * Source type
149
     *
150
     * @var string
151
     */
152
    public string $sourceType = 'csv';
153
154
    /**
155
     * Source mapping
156
     *
157
     * @var array
158
     */
159
    public array $sourceMapping = [];
160
161
    /**
162
     * Objects table
163
     *
164
     * @var \BEdita\Core\Model\Table\ObjectsTable
165
     */
166
    protected ObjectsTable $objectsTable;
167
168
    /**
169
     * Type table
170
     *
171
     * @var \BEdita\Core\Model\Table\ObjectsTable
172
     */
173
    protected ObjectsTable $typeTable;
174
175
    /**
176
     * Translations table
177
     *
178
     * @var \BEdita\Core\Model\Table\TranslationsTable
179
     */
180
    protected TranslationsTable $translationsTable;
181
182
    /**
183
     * Assoc flag, for csv import
184
     *
185
     * @var bool
186
     */
187
    protected bool $assoc = true;
188
189
    /**
190
     * Element name, for xml import
191
     *
192
     * @var string
193
     */
194
    protected string $element = 'post';
195
196
    /**
197
     * Constructor
198
     *
199
     * @param string|null $filename Full filename path
200
     * @param string|null $type Entity type
201
     * @param string|null $parent Parent uname or ID
202
     * @param bool|null $dryrun Dry run mode flag
203
     * @param array|null $options Options
204
     * @return void
205
     */
206
    public function __construct(
207
        ?string $filename = null,
208
        ?string $type = 'objects',
209
        ?string $parent = null,
210
        ?bool $dryrun = false,
211
        ?array $options = ['mapping' => [], 'type' => 'csv', 'assoc' => true, 'element' => 'post']
212
    ) {
213
        $this->filename = $filename;
214
        $this->type = $type;
215
        $this->parent = $parent;
216
        $this->dryrun = $dryrun;
217
        $this->sourceMapping = Hash::get($options, 'mapping', []);
218
        $this->sourceType = Hash::get($options, 'type', 'csv');
219
        $this->assoc = Hash::get($options, 'assoc', true);
220
        $this->element = Hash::get($options, 'element', 'post');
221
        $this->processed = 0;
222
        $this->saved = 0;
223
        $this->errors = 0;
224
        $this->skipped = 0;
225
        $this->errorsDetails = [];
226
        /** @var \BEdita\Core\Model\Table\ObjectsTable $objectsTable */
227
        $objectsTable = $this->fetchTable('objects');
228
        $this->objectsTable = $objectsTable;
229
        /** @var \BEdita\Core\Model\Table\ObjectsTable $typesTable */
230
        $typesTable = $this->fetchTable($this->type);
231
        $this->typeTable = $typesTable instanceof ObjectsTable ? $typesTable : $objectsTable;
0 ignored issues
show
introduced by
$typesTable is always a sub-type of BEdita\Core\Model\Table\ObjectsTable.
Loading history...
232
        /** @var \BEdita\Core\Model\Table\TranslationsTable $translationsTable */
233
        $translationsTable = $this->fetchTable('translations');
234
        $this->translationsTable = $translationsTable;
235
    }
236
237
    /**
238
     * Save media
239
     *
240
     * @param \Cake\ORM\Table $mediaTable Media table
241
     * @param array $mediaData Media data
242
     * @param array $streamData Stream data
243
     * @return \Cake\Datasource\EntityInterface|bool
244
     */
245
    public function saveMedia($mediaTable, array $mediaData, array $streamData): EntityInterface|bool
246
    {
247
        // create media
248
        $media = $mediaTable->newEntity($mediaData);
249
        if ($this->dryrun === true) {
250
            $this->skipped++;
251
252
            return $media;
253
        }
254
        // create media
255
        $action = new SaveEntityAction(['table' => $mediaTable]);
256
        $entity = $media;
257
        $data = $mediaData;
258
        $entity = $action(compact('entity', 'data'));
259
        $id = $entity->id;
260
261
        // create stream and attach it to the media
262
        $streamsTable = $this->fetchTable('Streams');
263
        $entity = $streamsTable->newEmptyEntity();
264
        $action = new SaveEntityAction(['table' => $streamsTable]);
265
        $data = $streamData;
266
        $entity->set('object_id', $id);
267
268
        return $action(compact('entity', 'data'));
269
    }
270
271
    /**
272
     * Save objects
273
     *
274
     * @return void
275
     */
276
    public function saveObjects(): void
277
    {
278
        foreach ($this->readItem($this->sourceType, $this->filename, $this->assoc, $this->element) as $obj) {
279
            try {
280
                $data = $this->transform($obj, $this->sourceMapping);
281
                $this->saveObject($data);
282
            } catch (\Exception $e) {
283
                $this->errorsDetails[] = $e->getMessage();
284
                $this->errors++;
285
            } finally {
286
                $this->processed++;
287
            }
288
        }
289
    }
290
291
    /**
292
     * Save object
293
     *
294
     * @param array $obj Object data
295
     * @return \BEdita\Core\Model\Entity\ObjectEntity
296
     */
297
    public function saveObject(array $obj): ObjectEntity
298
    {
299
        $entity = $this->typeTable->newEmptyEntity();
300
        if (!empty($obj['uname']) || !empty($obj['id'])) {
301
            $uname = (string)Hash::get($obj, 'uname');
302
            $identifier = empty($uname) ? 'id' : 'uname';
303
            $conditions = [$identifier => (string)Hash::get($obj, $identifier)];
304
            if ($this->objectsTable->exists($conditions)) {
305
                /** @var \BEdita\Core\Model\Entity\ObjectEntity $o */
306
                $o = $this->objectsTable->find()->where($conditions)->firstOrFail();
307
                if ($o->type !== $this->type) {
308
                    throw new BadRequestException(
309
                        sprintf(
310
                            'Object "%s" already present with another type "%s"',
311
                            $conditions[$identifier],
312
                            $o->type
313
                        )
314
                    );
315
                }
316
                $entity = $o->getTable()->find('type', [$this->type])->where($conditions)->firstOrFail();
317
            }
318
        }
319
        $entity = $this->typeTable->patchEntity($entity, $obj);
320
        $entity->set('type', $this->type);
321
        if ($this->dryrun === true) {
322
            $this->skipped++;
323
324
            return $entity;
325
        }
326
        $this->typeTable->saveOrFail($entity);
327
        if (!empty($this->parent)) {
328
            $this->setParent($entity, $this->parent);
329
        }
330
        $this->saved++;
331
332
        return $entity;
333
    }
334
335
    /**
336
     * Set related objects to an entity by relation
337
     *
338
     * @param string $relation Relation name
339
     * @param \BEdita\Core\Model\Entity\ObjectEntity $entity Entity
340
     * @param array $relatedEntities Related entities
341
     * @return array|int|false
342
     */
343
    public function setRelated(string $relation, ObjectEntity $entity, array $relatedEntities): array|int|false
344
    {
345
        if (empty($relatedEntities)) {
346
            return false;
347
        }
348
        $association = $entity->getTable()->associations()->getByProperty($relation);
349
        $action = new SetRelatedObjectsAction(compact('association'));
350
351
        return $action(['entity' => $entity, 'relatedEntities' => $relatedEntities]);
352
    }
353
354
    /**
355
     * Save translations
356
     *
357
     * @return void
358
     */
359
    public function saveTranslations(): void
360
    {
361
        foreach ($this->readItem($this->sourceType, $this->filename, $this->assoc, $this->element) as $translation) {
362
            try {
363
                $this->saveTranslation($translation);
364
            } catch (\Exception $e) {
365
                $this->errorsDetails[] = $e->getMessage();
366
                $this->errors++;
367
            } finally {
368
                $this->processed++;
369
            }
370
        }
371
    }
372
373
    /**
374
     * Save translation
375
     *
376
     * @param array $data Translation data
377
     * @return \BEdita\Core\Model\Entity\Translation
378
     * @throws \Cake\Http\Exception\BadRequestException
379
     */
380
    public function saveTranslation(array $data): Translation
381
    {
382
        $uname = (string)Hash::get($data, 'object_uname');
383
        if (!$this->objectsTable->exists(compact('uname'))) {
384
            throw new BadRequestException(sprintf('Object "%s" not found', $uname));
385
        }
386
        /** @var \BEdita\Core\Model\Entity\ObjectEntity $o */
387
        $o = $this->objectsTable->find()->where(compact('uname'))->firstOrFail();
388
        $objectId = $o->id;
389
        /** @var \BEdita\Core\Model\Entity\Translation $entity */
390
        $entity = $this->translationsTable->find()
391
            ->where([
392
                'object_id' => $objectId,
393
                'lang' => $data['lang'],
394
            ])
395
            ->first();
396
        $translation = [
397
            'object_id' => $objectId,
398
        ];
399
        if ($entity != null) {
400
            $entity = $this->translationsTable->patchEntity($entity, $translation);
401
        } else {
402
            $entity = $this->translationsTable->newEntity($translation);
403
        }
404
        $entity->set('translated_fields', $this->translatedFields($data));
405
        $entity->set('status', $this->getConfig('defaults')['status']);
406
        $entity->set('lang', $data['lang']);
407
        if ($this->dryrun === true) {
408
            $this->skipped++;
409
410
            return $entity;
411
        }
412
        $this->translationsTable->saveOrFail($entity);
413
        $this->saved++;
414
415
        return $entity;
416
    }
417
418
    /**
419
     * Transform data into BEdita object data
420
     *
421
     * @param array $obj The source data
422
     * @param array $mapping The mapping
423
     * @return array
424
     */
425
    public function transform(array $obj, array $mapping): array
426
    {
427
        if (empty($mapping)) {
428
            return $obj;
429
        }
430
        $data = [];
431
        foreach ($mapping as $key => $value) {
432
            if (!array_key_exists($key, $obj)) {
433
                continue;
434
            }
435
            $data = Hash::insert($data, $value, Hash::get($obj, $key));
436
        }
437
438
        return $data;
439
    }
440
441
    /**
442
     * Get translated fields
443
     *
444
     * @param array $source Source data
445
     * @return array
446
     */
447
    public function translatedFields(array $source): array
448
    {
449
        $fields = (string)Hash::get($source, 'translated_fields');
450
        if (!empty($fields)) {
451
            return json_decode($fields, true);
452
        }
453
        $fields = [];
454
        foreach ($source as $key => $value) {
455
            if (in_array($key, ['id', 'object_uname', 'lang'])) {
456
                continue;
457
            }
458
            $subkey = strpos($key, 'translation_') === 0 ? substr($key, 12) : $key;
459
            $fields[$subkey] = $value;
460
        }
461
462
        return $fields;
463
    }
464
465
    /**
466
     * Find object by key and identifier.
467
     *
468
     * @param \Cake\ORM\Table $table Table instance.
469
     * @param string $extraKey Extra key.
470
     * @param string $extraValue Extra value.
471
     * @return \Cake\ORM\Query|null
472
     * @codeCoverageIgnore as JSON_UNQUOTE and JSON_EXTRACT are not available for sqlite
473
     */
474
    public function findImported(Table $table, string $extraKey, string $extraValue): ?Query
475
    {
476
        return $table->find('available')->where(function (QueryExpression $exp) use ($table, $extraKey, $extraValue): QueryExpression {
477
            return $exp->and([
478
                $exp->isNotNull($table->aliasField('extra')),
479
                $exp->eq(
480
                    new FunctionExpression(
481
                        'JSON_UNQUOTE',
482
                        [
483
                            new FunctionExpression(
484
                                'JSON_EXTRACT',
485
                                ['extra' => 'identifier', sprintf('$.%s', $extraKey)]
486
                            ),
487
                        ]
488
                    ),
489
                    new FunctionExpression('JSON_UNQUOTE', [json_encode($extraValue)])
490
                ),
491
            ]);
492
        });
493
    }
494
495
    /**
496
     * Clean HTML from attributes, preserve some (using xpath expression)
497
     *
498
     * @param string $html HTML content
499
     * @param string $expression XPath expression
500
     * @return string
501
     */
502
    public function cleanHtml(string $html, string $expression = "//@*[local-name() != 'href' and local-name() != 'id' and local-name() != 'src']"): string
503
    {
504
        $dom = new DOMDocument();
505
        $metaUtf8 = '<meta http-equiv="content-type" content="text/html; charset=utf-8">';
506
        $dom->loadHTML($metaUtf8 . $html, LIBXML_NOWARNING);
507
        $xpath = new DOMXPath($dom);
508
        $nodes = $xpath->query($expression);
509
        foreach ($nodes as $node) {
510
            /** @var \DOMElement $element */
511
            $element = $node->parentNode;
512
            $element->removeAttribute($node->nodeName);
513
        }
514
        $body = $dom->documentElement->lastChild;
515
        $content = $dom->saveHTML($body);
516
        $content = preg_replace('/<\\/?body(\\s+.*?>|>)/', '', $content);
517
518
        return $content;
519
    }
520
}
521