Import   A
last analyzed

Complexity

Total Complexity 37

Size/Duplication

Total Lines 466
Duplicated Lines 0 %

Importance

Changes 3
Bugs 0 Features 0
Metric Value
wmc 37
eloc 180
c 3
b 0
f 0
dl 0
loc 466
rs 9.44

11 Methods

Rating   Name   Duplication   Size   Complexity  
A findImported() 0 17 1
A saveMedia() 0 26 2
A translatedFields() 0 16 5
A saveTranslations() 0 10 3
B saveObject() 0 38 8
A transform() 0 16 5
A saveObjects() 0 11 3
A cleanHtml() 0 19 2
A setRelated() 0 9 2
A __construct() 0 29 2
A saveTranslation() 0 36 4
1
<?php
2
declare(strict_types=1);
3
4
/**
5
 * BEdita, API-first content management framework
6
 * Copyright 2023 Atlas Srl, Chialab Srl
7
 *
8
 * This file is part of BEdita: you can redistribute it and/or modify
9
 * it under the terms of the GNU Lesser General Public License as published
10
 * by the Free Software Foundation, either version 3 of the License, or
11
 * (at your option) any later version.
12
 *
13
 * See LICENSE.LGPL or <http://gnu.org/licenses/lgpl-3.0.html> for more details.
14
 */
15
16
namespace BEdita\ImportTools\Utility;
17
18
use BEdita\Core\Model\Action\SaveEntityAction;
19
use BEdita\Core\Model\Action\SetRelatedObjectsAction;
20
use BEdita\Core\Model\Entity\ObjectEntity;
21
use BEdita\Core\Model\Entity\Translation;
22
use BEdita\Core\Model\Table\ObjectsBaseTable;
23
use BEdita\Core\Model\Table\ObjectsTable;
24
use BEdita\Core\Model\Table\TranslationsTable;
25
use Cake\Core\InstanceConfigTrait;
26
use Cake\Database\Expression\FunctionExpression;
27
use Cake\Database\Expression\QueryExpression;
28
use Cake\Datasource\EntityInterface;
29
use Cake\Http\Exception\BadRequestException;
30
use Cake\Log\LogTrait;
31
use Cake\ORM\Locator\LocatorAwareTrait;
32
use Cake\ORM\Query;
33
use Cake\ORM\Table;
34
use Cake\Utility\Hash;
35
use DOMDocument;
36
use DOMXPath;
37
use Exception;
38
39
/**
40
 * Import utility
41
 *
42
 * This class provides functions to import data from csv files into BEdita.
43
 *
44
 * Public methods are:
45
 *
46
 * - `saveObjects`: read data from csv and save objects
47
 * - `saveObject`: save a single object
48
 * - `saveTranslations`: read data from csv and save translations
49
 * - `saveTranslation`: save a single translation
50
 * - `translatedFields`: get translated fields for a given object
51
 *
52
 * Usage example:
53
 * ```php
54
 * use BEdita\ImportTools\Utility\Import;
55
 *
56
 * class MyImporter
57
 * {
58
 *     public function import(string $filename, string $type, ?string $parent, ?bool $dryrun): void
59
 *     {
60
 *         $import = new Import($filename, $type, $parent, $dryrun);
61
 *         $import->saveObjects();
62
 *     }
63
 * }
64
 * ```
65
 */
66
class Import
67
{
68
    use InstanceConfigTrait;
69
    use LocatorAwareTrait;
70
    use LogTrait;
71
    use ReadTrait;
0 ignored issues
show
introduced by
The trait BEdita\ImportTools\Utility\ReadTrait requires some properties which are not provided by BEdita\ImportTools\Utility\Import: $name, $nodeType
Loading history...
72
    use TreeTrait;
73
74
    /**
75
     * @inheritDoc
76
     */
77
    protected $_defaultConfig = [
78
        'defaults' => [
79
            'status' => 'on',
80
        ],
81
        'csv' => [
82
            'delimiter' => ',',
83
            'enclosure' => '"',
84
            'escape' => '"',
85
        ],
86
    ];
87
88
    /**
89
     * Dry run mode flag
90
     *
91
     * @var bool
92
     */
93
    public bool $dryrun = false;
94
95
    /**
96
     * Full filename path
97
     *
98
     * @var string|null
99
     */
100
    public ?string $filename = '';
101
102
    /**
103
     * Parent uname or ID
104
     *
105
     * @var string|null
106
     */
107
    public ?string $parent = '';
108
109
    /**
110
     * Number of processed entities
111
     *
112
     * @var int
113
     */
114
    public int $processed = 0;
115
116
    /**
117
     * Number of saved entities
118
     *
119
     * @var int
120
     */
121
    public int $saved = 0;
122
123
    /**
124
     * Number of errors
125
     *
126
     * @var int
127
     */
128
    public int $errors = 0;
129
130
    /**
131
     * Errors details
132
     *
133
     * @var array
134
     */
135
    public array $errorsDetails = [];
136
137
    /**
138
     * Number of skipped
139
     *
140
     * @var int
141
     */
142
    public int $skipped = 0;
143
144
    /**
145
     * Entity type
146
     *
147
     * @var string
148
     */
149
    public string $type = '';
150
151
    /**
152
     * Source type
153
     *
154
     * @var string
155
     */
156
    public string $sourceType = 'csv';
157
158
    /**
159
     * Source mapping
160
     *
161
     * @var array
162
     */
163
    public array $sourceMapping = [];
164
165
    /**
166
     * Objects table
167
     *
168
     * @var \BEdita\Core\Model\Table\ObjectsTable
169
     */
170
    protected ObjectsTable $objectsTable;
171
172
    /**
173
     * Type table
174
     *
175
     * @var \BEdita\Core\Model\Table\ObjectsTable|\BEdita\Core\Model\Table\ObjectsBaseTable
176
     */
177
    protected ObjectsTable|ObjectsBaseTable $typeTable;
178
179
    /**
180
     * Translations table
181
     *
182
     * @var \BEdita\Core\Model\Table\TranslationsTable
183
     */
184
    protected TranslationsTable $translationsTable;
185
186
    /**
187
     * Assoc flag, for csv import
188
     *
189
     * @var bool
190
     */
191
    protected bool $assoc = true;
192
193
    /**
194
     * Element name, for xml import
195
     *
196
     * @var string
197
     */
198
    protected string $element = 'post';
199
200
    /**
201
     * Constructor
202
     *
203
     * @param string|null $filename Full filename path
204
     * @param string|null $type Entity type
205
     * @param string|null $parent Parent uname or ID
206
     * @param bool|null $dryrun Dry run mode flag
207
     * @param array|null $options Options
208
     * @return void
209
     */
210
    public function __construct(
211
        ?string $filename = null,
212
        ?string $type = 'objects',
213
        ?string $parent = null,
214
        ?bool $dryrun = false,
215
        ?array $options = ['mapping' => [], 'type' => 'csv', 'assoc' => true, 'element' => 'post'],
216
    ) {
217
        $this->setConfig($this->_defaultConfig);
218
        $this->filename = $filename;
219
        $this->type = $type;
220
        $this->parent = $parent;
221
        $this->dryrun = $dryrun;
222
        $this->sourceMapping = Hash::get($options, 'mapping', []);
223
        $this->sourceType = Hash::get($options, 'type', 'csv');
224
        $this->assoc = Hash::get($options, 'assoc', true);
225
        $this->element = Hash::get($options, 'element', 'post');
226
        $this->processed = 0;
227
        $this->saved = 0;
228
        $this->errors = 0;
229
        $this->skipped = 0;
230
        $this->errorsDetails = [];
231
        /** @var \BEdita\Core\Model\Table\ObjectsTable $objectsTable */
232
        $objectsTable = $this->fetchTable('objects');
233
        $this->objectsTable = $objectsTable;
234
        $typesTable = $this->fetchTable($this->type);
235
        $this->typeTable = $typesTable instanceof ObjectsBaseTable ? $typesTable : $objectsTable;
236
        /** @var \BEdita\Core\Model\Table\TranslationsTable $translationsTable */
237
        $translationsTable = $this->fetchTable('translations');
238
        $this->translationsTable = $translationsTable;
239
    }
240
241
    /**
242
     * Save media
243
     *
244
     * @param \Cake\ORM\Table $mediaTable Media table
245
     * @param array $mediaData Media data
246
     * @param array $streamData Stream data
247
     * @return \Cake\Datasource\EntityInterface|bool
248
     */
249
    public function saveMedia(Table $mediaTable, array $mediaData, array $streamData): EntityInterface|bool
250
    {
251
        // create media
252
        $media = $mediaTable->newEntity($mediaData);
253
        if ($this->dryrun === true) {
254
            $this->skipped++;
255
256
            return $media;
257
        }
258
        // create media
259
        $action = new SaveEntityAction(['table' => $mediaTable]);
260
        $entity = $media;
261
        $data = $mediaData;
262
        $media = $action(compact('entity', 'data'));
263
        $id = $media->id;
264
265
        // create stream and attach it to the media
266
        $streamsTable = $this->fetchTable('Streams');
267
        $entity = $streamsTable->newEmptyEntity();
268
        $action = new SaveEntityAction(['table' => $streamsTable]);
269
        $data = $streamData;
270
        $entity->set('object_id', $id);
271
        $action(compact('entity', 'data'));
272
        $mediaTable->loadInto($media, ['Streams']);
273
274
        return $media;
275
    }
276
277
    /**
278
     * Save objects
279
     *
280
     * @return void
281
     */
282
    public function saveObjects(): void
283
    {
284
        foreach ($this->readItem($this->sourceType, $this->filename, $this->assoc, $this->element) as $obj) {
0 ignored issues
show
Bug introduced by
It seems like $this->filename can also be of type null; however, parameter $path of BEdita\ImportTools\Utility\Import::readItem() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

284
        foreach ($this->readItem($this->sourceType, /** @scrutinizer ignore-type */ $this->filename, $this->assoc, $this->element) as $obj) {
Loading history...
285
            try {
286
                $data = $this->transform($obj, $this->sourceMapping);
287
                $this->saveObject($data);
288
            } catch (Exception $e) {
289
                $this->errorsDetails[] = $e->getMessage();
290
                $this->errors++;
291
            } finally {
292
                $this->processed++;
293
            }
294
        }
295
    }
296
297
    /**
298
     * Save object
299
     *
300
     * @param array $obj Object data
301
     * @return \BEdita\Core\Model\Entity\ObjectEntity
302
     */
303
    public function saveObject(array $obj): ObjectEntity
304
    {
305
        /** @var \BEdita\Core\Model\Entity\ObjectEntity $entity */
306
        $entity = $this->typeTable->newEmptyEntity();
307
        if (!empty($obj['uname']) || !empty($obj['id'])) {
308
            $uname = (string)Hash::get($obj, 'uname');
309
            $identifier = empty($uname) ? 'id' : 'uname';
310
            $conditions = [$identifier => (string)Hash::get($obj, $identifier)];
311
            if ($this->objectsTable->exists($conditions)) {
312
                /** @var \BEdita\Core\Model\Entity\ObjectEntity $o */
313
                $o = $this->objectsTable->find()->where($conditions)->firstOrFail();
314
                if ($o->type !== $this->type) {
315
                    throw new BadRequestException(
316
                        sprintf(
317
                            'Object "%s" already present with another type "%s"',
318
                            $conditions[$identifier],
319
                            $o->type,
320
                        ),
321
                    );
322
                }
323
                $entity = $o->getTable()->find('type', [$this->type])->where($conditions)->firstOrFail();
324
            }
325
        }
326
        /** @var \BEdita\Core\Model\Entity\ObjectEntity $entity */
327
        $entity = $this->typeTable->patchEntity($entity, $obj);
328
        $entity->set('type', $this->type);
329
        if ($this->dryrun === true) {
330
            $this->skipped++;
331
332
            return $entity;
333
        }
334
        $this->typeTable->saveOrFail($entity);
335
        if (!empty($this->parent)) {
336
            $this->setParent($entity, $this->parent);
337
        }
338
        $this->saved++;
339
340
        return $entity;
341
    }
342
343
    /**
344
     * Set related objects to an entity by relation
345
     *
346
     * @param string $relation Relation name
347
     * @param \BEdita\Core\Model\Entity\ObjectEntity $entity Entity
348
     * @param array $relatedEntities Related entities
349
     * @return array|int|false
350
     */
351
    public function setRelated(string $relation, ObjectEntity $entity, array $relatedEntities): array|int|false
352
    {
353
        if (empty($relatedEntities)) {
354
            return false;
355
        }
356
        $association = $entity->getTable()->associations()->getByProperty($relation);
357
        $action = new SetRelatedObjectsAction(compact('association'));
358
359
        return $action(['entity' => $entity, 'relatedEntities' => $relatedEntities]);
360
    }
361
362
    /**
363
     * Save translations
364
     *
365
     * @return void
366
     */
367
    public function saveTranslations(): void
368
    {
369
        foreach ($this->readItem($this->sourceType, $this->filename, $this->assoc, $this->element) as $translation) {
0 ignored issues
show
Bug introduced by
It seems like $this->filename can also be of type null; however, parameter $path of BEdita\ImportTools\Utility\Import::readItem() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

369
        foreach ($this->readItem($this->sourceType, /** @scrutinizer ignore-type */ $this->filename, $this->assoc, $this->element) as $translation) {
Loading history...
370
            try {
371
                $this->saveTranslation($translation);
372
            } catch (Exception $e) {
373
                $this->errorsDetails[] = $e->getMessage();
374
                $this->errors++;
375
            } finally {
376
                $this->processed++;
377
            }
378
        }
379
    }
380
381
    /**
382
     * Save translation
383
     *
384
     * @param array $data Translation data
385
     * @return \BEdita\Core\Model\Entity\Translation
386
     * @throws \Cake\Http\Exception\BadRequestException
387
     */
388
    public function saveTranslation(array $data): Translation
389
    {
390
        $uname = (string)Hash::get($data, 'object_uname');
391
        if (!$this->objectsTable->exists(compact('uname'))) {
392
            throw new BadRequestException(sprintf('Object "%s" not found', $uname));
393
        }
394
        /** @var \BEdita\Core\Model\Entity\ObjectEntity $o */
395
        $o = $this->objectsTable->find()->where(compact('uname'))->firstOrFail();
396
        $objectId = $o->id;
397
        /** @var \BEdita\Core\Model\Entity\Translation $entity */
398
        $entity = $this->translationsTable->find()
399
            ->where([
400
                'object_id' => $objectId,
401
                'lang' => $data['lang'],
402
            ])
403
            ->first();
404
        $translation = [
405
            'object_id' => $objectId,
406
        ];
407
        if ($entity != null) {
408
            $entity = $this->translationsTable->patchEntity($entity, $translation);
409
        } else {
410
            $entity = $this->translationsTable->newEntity($translation);
411
        }
412
        $entity->set('translated_fields', $this->translatedFields($data));
413
        $entity->set('status', $this->getConfig('defaults')['status']);
414
        $entity->set('lang', $data['lang']);
415
        if ($this->dryrun === true) {
416
            $this->skipped++;
417
418
            return $entity;
419
        }
420
        $this->translationsTable->saveOrFail($entity);
421
        $this->saved++;
422
423
        return $entity;
424
    }
425
426
    /**
427
     * Transform data into BEdita object data
428
     *
429
     * @param array $obj The source data
430
     * @param array $mapping The mapping
431
     * @return array
432
     */
433
    public function transform(array $obj, array $mapping): array
434
    {
435
        if (empty($mapping)) {
436
            return $obj;
437
        }
438
        $data = [];
439
        foreach ($mapping as $key => $value) {
440
            if (!array_key_exists($key, $obj)) {
441
                continue;
442
            }
443
            $content = Hash::get($obj, $key);
444
            $content = is_string($content) ? trim($content) : $content;
445
            $data = Hash::insert($data, $value, $content);
446
        }
447
448
        return $data;
449
    }
450
451
    /**
452
     * Get translated fields
453
     *
454
     * @param array $source Source data
455
     * @return array
456
     */
457
    public function translatedFields(array $source): array
458
    {
459
        $fields = (string)Hash::get($source, 'translated_fields');
460
        if (!empty($fields)) {
461
            return json_decode($fields, true);
462
        }
463
        $fields = [];
464
        foreach ($source as $key => $value) {
465
            if (in_array($key, ['id', 'object_uname', 'lang'])) {
466
                continue;
467
            }
468
            $subkey = strpos($key, 'translation_') === 0 ? substr($key, 12) : $key;
469
            $fields[$subkey] = $value;
470
        }
471
472
        return $fields;
473
    }
474
475
    /**
476
     * Find object by key and identifier.
477
     *
478
     * @param \Cake\ORM\Table $table Table instance.
479
     * @param string $extraKey Extra key.
480
     * @param string $extraValue Extra value.
481
     * @return \Cake\ORM\Query|null
482
     * @codeCoverageIgnore as JSON_UNQUOTE and JSON_EXTRACT are not available for sqlite
483
     */
484
    public function findImported(Table $table, string $extraKey, string $extraValue): ?Query
485
    {
486
        return $table->find('available')
487
            ->where(function (QueryExpression $exp) use ($table, $extraKey, $extraValue): QueryExpression {
488
                return $exp->and([
489
                    $exp->isNotNull($table->aliasField('extra')),
490
                    $exp->eq(
491
                        new FunctionExpression(
492
                            'JSON_UNQUOTE',
493
                            [
494
                                new FunctionExpression(
495
                                    'JSON_EXTRACT',
496
                                    ['extra' => 'identifier', sprintf('$.%s', $extraKey)],
497
                                ),
498
                            ],
499
                        ),
500
                        new FunctionExpression('JSON_UNQUOTE', [json_encode($extraValue)]),
501
                    ),
502
                ]);
503
            });
504
    }
505
506
    /**
507
     * Clean HTML from attributes, preserve some (using xpath expression)
508
     *
509
     * @param string $html HTML content
510
     * @param string $expression XPath expression
511
     * @return string
512
     */
513
    public function cleanHtml(
514
        string $html,
515
        string $expression = "//@*[local-name() != 'href' and local-name() != 'id' and local-name() != 'src']",
516
    ): string {
517
        $dom = new DOMDocument();
518
        $metaUtf8 = '<meta http-equiv="content-type" content="text/html; charset=utf-8">';
519
        $dom->loadHTML($metaUtf8 . $html, LIBXML_NOWARNING);
520
        $xpath = new DOMXPath($dom);
521
        $nodes = $xpath->query($expression);
522
        foreach ($nodes as $node) {
523
            /** @var \DOMElement $element */
524
            $element = $node->parentNode;
525
            $element->removeAttribute($node->nodeName);
526
        }
527
        $body = $dom->documentElement->lastChild;
528
        $content = $dom->saveHTML($body);
529
        $content = preg_replace('/<\\/?body(\\s+.*?>|>)/', '', $content);
530
531
        return $content;
532
    }
533
}
534