Import   A
last analyzed

Complexity

Total Complexity 37

Size/Duplication

Total Lines 461
Duplicated Lines 0 %

Importance

Changes 3
Bugs 0 Features 0
Metric Value
wmc 37
eloc 177
c 3
b 0
f 0
dl 0
loc 461
rs 9.44

11 Methods

Rating   Name   Duplication   Size   Complexity  
A saveMedia() 0 26 2
A saveObjects() 0 11 3
A __construct() 0 28 2
A findImported() 0 16 1
A translatedFields() 0 16 5
A saveTranslations() 0 10 3
A transform() 0 16 5
A cleanHtml() 0 17 2
A setRelated() 0 9 2
A saveTranslation() 0 36 4
B saveObject() 0 38 8
1
<?php
2
declare(strict_types=1);
3
4
/**
5
 * BEdita, API-first content management framework
6
 * Copyright 2023 Atlas Srl, Chialab Srl
7
 *
8
 * This file is part of BEdita: you can redistribute it and/or modify
9
 * it under the terms of the GNU Lesser General Public License as published
10
 * by the Free Software Foundation, either version 3 of the License, or
11
 * (at your option) any later version.
12
 *
13
 * See LICENSE.LGPL or <http://gnu.org/licenses/lgpl-3.0.html> for more details.
14
 */
15
16
namespace BEdita\ImportTools\Utility;
17
18
use BEdita\Core\Model\Action\SaveEntityAction;
19
use BEdita\Core\Model\Action\SetRelatedObjectsAction;
20
use BEdita\Core\Model\Entity\ObjectEntity;
21
use BEdita\Core\Model\Entity\Translation;
22
use BEdita\Core\Model\Table\ObjectsBaseTable;
23
use BEdita\Core\Model\Table\ObjectsTable;
24
use BEdita\Core\Model\Table\TranslationsTable;
25
use Cake\Database\Expression\FunctionExpression;
26
use Cake\Database\Expression\QueryExpression;
27
use Cake\Datasource\EntityInterface;
28
use Cake\Http\Exception\BadRequestException;
29
use Cake\Log\LogTrait;
30
use Cake\ORM\Locator\LocatorAwareTrait;
31
use Cake\ORM\Query;
32
use Cake\ORM\Table;
33
use Cake\Utility\Hash;
34
use DOMDocument;
35
use DOMXPath;
36
37
/**
38
 * Import utility
39
 *
40
 * This class provides functions to import data from csv files into BEdita.
41
 *
42
 * Public methods are:
43
 *
44
 * - `saveObjects`: read data from csv and save objects
45
 * - `saveObject`: save a single object
46
 * - `saveTranslations`: read data from csv and save translations
47
 * - `saveTranslation`: save a single translation
48
 * - `translatedFields`: get translated fields for a given object
49
 *
50
 * Usage example:
51
 * ```php
52
 * use BEdita\ImportTools\Utility\Import;
53
 *
54
 * class MyImporter
55
 * {
56
 *     public function import(string $filename, string $type, ?string $parent, ?bool $dryrun): void
57
 *     {
58
 *         $import = new Import($filename, $type, $parent, $dryrun);
59
 *         $import->saveObjects();
60
 *     }
61
 * }
62
 * ```
63
 */
64
class Import
65
{
66
    use LocatorAwareTrait;
67
    use LogTrait;
68
    use ReadTrait;
69
    use TreeTrait;
70
71
    /**
72
     * @inheritDoc
73
     */
74
    protected $_defaultConfig = [
75
        'defaults' => [
76
            'status' => 'on',
77
        ],
78
        'csv' => [
79
            'delimiter' => ',',
80
            'enclosure' => '"',
81
            'escape' => '"',
82
        ],
83
    ];
84
85
    /**
86
     * Dry run mode flag
87
     *
88
     * @var bool
89
     */
90
    public bool $dryrun = false;
91
92
    /**
93
     * Full filename path
94
     *
95
     * @var string|null
96
     */
97
    public ?string $filename = '';
98
99
    /**
100
     * Parent uname or ID
101
     *
102
     * @var string|null
103
     */
104
    public ?string $parent = '';
105
106
    /**
107
     * Number of processed entities
108
     *
109
     * @var int
110
     */
111
    public int $processed = 0;
112
113
    /**
114
     * Number of saved entities
115
     *
116
     * @var int
117
     */
118
    public int $saved = 0;
119
120
    /**
121
     * Number of errors
122
     *
123
     * @var int
124
     */
125
    public int $errors = 0;
126
127
    /**
128
     * Errors details
129
     *
130
     * @var array
131
     */
132
    public array $errorsDetails = [];
133
134
    /**
135
     * Number of skipped
136
     *
137
     * @var int
138
     */
139
    public int $skipped = 0;
140
141
    /**
142
     * Entity type
143
     *
144
     * @var string
145
     */
146
    public string $type = '';
147
148
    /**
149
     * Source type
150
     *
151
     * @var string
152
     */
153
    public string $sourceType = 'csv';
154
155
    /**
156
     * Source mapping
157
     *
158
     * @var array
159
     */
160
    public array $sourceMapping = [];
161
162
    /**
163
     * Objects table
164
     *
165
     * @var \BEdita\Core\Model\Table\ObjectsTable
166
     */
167
    protected ObjectsTable $objectsTable;
168
169
    /**
170
     * Type table
171
     *
172
     * @var \BEdita\Core\Model\Table\ObjectsTable|\BEdita\Core\Model\Table\ObjectsBaseTable
173
     */
174
    protected ObjectsTable|ObjectsBaseTable $typeTable;
175
176
    /**
177
     * Translations table
178
     *
179
     * @var \BEdita\Core\Model\Table\TranslationsTable
180
     */
181
    protected TranslationsTable $translationsTable;
182
183
    /**
184
     * Assoc flag, for csv import
185
     *
186
     * @var bool
187
     */
188
    protected bool $assoc = true;
189
190
    /**
191
     * Element name, for xml import
192
     *
193
     * @var string
194
     */
195
    protected string $element = 'post';
196
197
    /**
198
     * Constructor
199
     *
200
     * @param string|null $filename Full filename path
201
     * @param string|null $type Entity type
202
     * @param string|null $parent Parent uname or ID
203
     * @param bool|null $dryrun Dry run mode flag
204
     * @param array|null $options Options
205
     * @return void
206
     */
207
    public function __construct(
208
        ?string $filename = null,
209
        ?string $type = 'objects',
210
        ?string $parent = null,
211
        ?bool $dryrun = false,
212
        ?array $options = ['mapping' => [], 'type' => 'csv', 'assoc' => true, 'element' => 'post']
213
    ) {
214
        $this->filename = $filename;
215
        $this->type = $type;
216
        $this->parent = $parent;
217
        $this->dryrun = $dryrun;
218
        $this->sourceMapping = Hash::get($options, 'mapping', []);
219
        $this->sourceType = Hash::get($options, 'type', 'csv');
220
        $this->assoc = Hash::get($options, 'assoc', true);
221
        $this->element = Hash::get($options, 'element', 'post');
222
        $this->processed = 0;
223
        $this->saved = 0;
224
        $this->errors = 0;
225
        $this->skipped = 0;
226
        $this->errorsDetails = [];
227
        /** @var \BEdita\Core\Model\Table\ObjectsTable $objectsTable */
228
        $objectsTable = $this->fetchTable('objects');
229
        $this->objectsTable = $objectsTable;
230
        $typesTable = $this->fetchTable($this->type);
231
        $this->typeTable = $typesTable instanceof ObjectsBaseTable ? $typesTable : $objectsTable;
232
        /** @var \BEdita\Core\Model\Table\TranslationsTable $translationsTable */
233
        $translationsTable = $this->fetchTable('translations');
234
        $this->translationsTable = $translationsTable;
235
    }
236
237
    /**
238
     * Save media
239
     *
240
     * @param \Cake\ORM\Table $mediaTable Media table
241
     * @param array $mediaData Media data
242
     * @param array $streamData Stream data
243
     * @return \Cake\Datasource\EntityInterface|bool
244
     */
245
    public function saveMedia($mediaTable, array $mediaData, array $streamData): EntityInterface|bool
246
    {
247
        // create media
248
        $media = $mediaTable->newEntity($mediaData);
249
        if ($this->dryrun === true) {
250
            $this->skipped++;
251
252
            return $media;
253
        }
254
        // create media
255
        $action = new SaveEntityAction(['table' => $mediaTable]);
256
        $entity = $media;
257
        $data = $mediaData;
258
        $media = $action(compact('entity', 'data'));
259
        $id = $media->id;
260
261
        // create stream and attach it to the media
262
        $streamsTable = $this->fetchTable('Streams');
263
        $entity = $streamsTable->newEmptyEntity();
264
        $action = new SaveEntityAction(['table' => $streamsTable]);
265
        $data = $streamData;
266
        $entity->set('object_id', $id);
267
        $stream = $action(compact('entity', 'data'));
268
        $mediaTable->loadInto($media, ['Streams']);
269
270
        return $media;
271
    }
272
273
    /**
274
     * Save objects
275
     *
276
     * @return void
277
     */
278
    public function saveObjects(): void
279
    {
280
        foreach ($this->readItem($this->sourceType, $this->filename, $this->assoc, $this->element) as $obj) {
281
            try {
282
                $data = $this->transform($obj, $this->sourceMapping);
283
                $this->saveObject($data);
284
            } catch (\Exception $e) {
285
                $this->errorsDetails[] = $e->getMessage();
286
                $this->errors++;
287
            } finally {
288
                $this->processed++;
289
            }
290
        }
291
    }
292
293
    /**
294
     * Save object
295
     *
296
     * @param array $obj Object data
297
     * @return \BEdita\Core\Model\Entity\ObjectEntity
298
     */
299
    public function saveObject(array $obj): ObjectEntity
300
    {
301
        /** @var \BEdita\Core\Model\Entity\ObjectEntity $entity */
302
        $entity = $this->typeTable->newEmptyEntity();
303
        if (!empty($obj['uname']) || !empty($obj['id'])) {
304
            $uname = (string)Hash::get($obj, 'uname');
305
            $identifier = empty($uname) ? 'id' : 'uname';
306
            $conditions = [$identifier => (string)Hash::get($obj, $identifier)];
307
            if ($this->objectsTable->exists($conditions)) {
308
                /** @var \BEdita\Core\Model\Entity\ObjectEntity $o */
309
                $o = $this->objectsTable->find()->where($conditions)->firstOrFail();
310
                if ($o->type !== $this->type) {
311
                    throw new BadRequestException(
312
                        sprintf(
313
                            'Object "%s" already present with another type "%s"',
314
                            $conditions[$identifier],
315
                            $o->type
316
                        )
317
                    );
318
                }
319
                $entity = $o->getTable()->find('type', [$this->type])->where($conditions)->firstOrFail();
320
            }
321
        }
322
        /** @var \BEdita\Core\Model\Entity\ObjectEntity $entity */
323
        $entity = $this->typeTable->patchEntity($entity, $obj);
324
        $entity->set('type', $this->type);
325
        if ($this->dryrun === true) {
326
            $this->skipped++;
327
328
            return $entity;
329
        }
330
        $this->typeTable->saveOrFail($entity);
331
        if (!empty($this->parent)) {
332
            $this->setParent($entity, $this->parent);
333
        }
334
        $this->saved++;
335
336
        return $entity;
337
    }
338
339
    /**
340
     * Set related objects to an entity by relation
341
     *
342
     * @param string $relation Relation name
343
     * @param \BEdita\Core\Model\Entity\ObjectEntity $entity Entity
344
     * @param array $relatedEntities Related entities
345
     * @return array|int|false
346
     */
347
    public function setRelated(string $relation, ObjectEntity $entity, array $relatedEntities): array|int|false
348
    {
349
        if (empty($relatedEntities)) {
350
            return false;
351
        }
352
        $association = $entity->getTable()->associations()->getByProperty($relation);
353
        $action = new SetRelatedObjectsAction(compact('association'));
354
355
        return $action(['entity' => $entity, 'relatedEntities' => $relatedEntities]);
356
    }
357
358
    /**
359
     * Save translations
360
     *
361
     * @return void
362
     */
363
    public function saveTranslations(): void
364
    {
365
        foreach ($this->readItem($this->sourceType, $this->filename, $this->assoc, $this->element) as $translation) {
366
            try {
367
                $this->saveTranslation($translation);
368
            } catch (\Exception $e) {
369
                $this->errorsDetails[] = $e->getMessage();
370
                $this->errors++;
371
            } finally {
372
                $this->processed++;
373
            }
374
        }
375
    }
376
377
    /**
378
     * Save translation
379
     *
380
     * @param array $data Translation data
381
     * @return \BEdita\Core\Model\Entity\Translation
382
     * @throws \Cake\Http\Exception\BadRequestException
383
     */
384
    public function saveTranslation(array $data): Translation
385
    {
386
        $uname = (string)Hash::get($data, 'object_uname');
387
        if (!$this->objectsTable->exists(compact('uname'))) {
388
            throw new BadRequestException(sprintf('Object "%s" not found', $uname));
389
        }
390
        /** @var \BEdita\Core\Model\Entity\ObjectEntity $o */
391
        $o = $this->objectsTable->find()->where(compact('uname'))->firstOrFail();
392
        $objectId = $o->id;
393
        /** @var \BEdita\Core\Model\Entity\Translation $entity */
394
        $entity = $this->translationsTable->find()
395
            ->where([
396
                'object_id' => $objectId,
397
                'lang' => $data['lang'],
398
            ])
399
            ->first();
400
        $translation = [
401
            'object_id' => $objectId,
402
        ];
403
        if ($entity != null) {
404
            $entity = $this->translationsTable->patchEntity($entity, $translation);
405
        } else {
406
            $entity = $this->translationsTable->newEntity($translation);
407
        }
408
        $entity->set('translated_fields', $this->translatedFields($data));
409
        $entity->set('status', $this->getConfig('defaults')['status']);
410
        $entity->set('lang', $data['lang']);
411
        if ($this->dryrun === true) {
412
            $this->skipped++;
413
414
            return $entity;
415
        }
416
        $this->translationsTable->saveOrFail($entity);
417
        $this->saved++;
418
419
        return $entity;
420
    }
421
422
    /**
423
     * Transform data into BEdita object data
424
     *
425
     * @param array $obj The source data
426
     * @param array $mapping The mapping
427
     * @return array
428
     */
429
    public function transform(array $obj, array $mapping): array
430
    {
431
        if (empty($mapping)) {
432
            return $obj;
433
        }
434
        $data = [];
435
        foreach ($mapping as $key => $value) {
436
            if (!array_key_exists($key, $obj)) {
437
                continue;
438
            }
439
            $content = Hash::get($obj, $key);
440
            $content = is_string($content) ? trim($content) : $content;
441
            $data = Hash::insert($data, $value, $content);
442
        }
443
444
        return $data;
445
    }
446
447
    /**
448
     * Get translated fields
449
     *
450
     * @param array $source Source data
451
     * @return array
452
     */
453
    public function translatedFields(array $source): array
454
    {
455
        $fields = (string)Hash::get($source, 'translated_fields');
456
        if (!empty($fields)) {
457
            return json_decode($fields, true);
458
        }
459
        $fields = [];
460
        foreach ($source as $key => $value) {
461
            if (in_array($key, ['id', 'object_uname', 'lang'])) {
462
                continue;
463
            }
464
            $subkey = strpos($key, 'translation_') === 0 ? substr($key, 12) : $key;
465
            $fields[$subkey] = $value;
466
        }
467
468
        return $fields;
469
    }
470
471
    /**
472
     * Find object by key and identifier.
473
     *
474
     * @param \Cake\ORM\Table $table Table instance.
475
     * @param string $extraKey Extra key.
476
     * @param string $extraValue Extra value.
477
     * @return \Cake\ORM\Query|null
478
     * @codeCoverageIgnore as JSON_UNQUOTE and JSON_EXTRACT are not available for sqlite
479
     */
480
    public function findImported(Table $table, string $extraKey, string $extraValue): ?Query
481
    {
482
        return $table->find('available')->where(function (QueryExpression $exp) use ($table, $extraKey, $extraValue): QueryExpression {
483
            return $exp->and([
484
                $exp->isNotNull($table->aliasField('extra')),
485
                $exp->eq(
486
                    new FunctionExpression(
487
                        'JSON_UNQUOTE',
488
                        [
489
                            new FunctionExpression(
490
                                'JSON_EXTRACT',
491
                                ['extra' => 'identifier', sprintf('$.%s', $extraKey)]
492
                            ),
493
                        ]
494
                    ),
495
                    new FunctionExpression('JSON_UNQUOTE', [json_encode($extraValue)])
496
                ),
497
            ]);
498
        });
499
    }
500
501
    /**
502
     * Clean HTML from attributes, preserve some (using xpath expression)
503
     *
504
     * @param string $html HTML content
505
     * @param string $expression XPath expression
506
     * @return string
507
     */
508
    public function cleanHtml(string $html, string $expression = "//@*[local-name() != 'href' and local-name() != 'id' and local-name() != 'src']"): string
509
    {
510
        $dom = new DOMDocument();
511
        $metaUtf8 = '<meta http-equiv="content-type" content="text/html; charset=utf-8">';
512
        $dom->loadHTML($metaUtf8 . $html, LIBXML_NOWARNING);
513
        $xpath = new DOMXPath($dom);
514
        $nodes = $xpath->query($expression);
515
        foreach ($nodes as $node) {
516
            /** @var \DOMElement $element */
517
            $element = $node->parentNode;
518
            $element->removeAttribute($node->nodeName);
519
        }
520
        $body = $dom->documentElement->lastChild;
521
        $content = $dom->saveHTML($body);
522
        $content = preg_replace('/<\\/?body(\\s+.*?>|>)/', '', $content);
523
524
        return $content;
525
    }
526
}
527