Issues (281)

Branch: master

Backend/Modules/Blog/Actions/ImportWordpress.php (1 issue)

1
<?php
2
3
namespace Backend\Modules\Blog\Actions;
4
5
use SimpleXMLElement;
6
use Symfony\Component\Filesystem\Filesystem;
7
use Backend\Core\Engine\Base\ActionEdit as BackendBaseActionEdit;
8
use Backend\Core\Engine\Exception;
9
use Backend\Core\Engine\Model as BackendModel;
10
use Backend\Core\Language\Language as BL;
11
use Backend\Core\Engine\Form;
12
use Backend\Modules\Blog\Engine\Model;
13
14
/**
15
 * This import-action will let you import a wordpress blog
16
 */
17
class ImportWordpress extends BackendBaseActionEdit
18
{
19
    /**
20
     * @var array
21
     */
22
    private $authors = [];
23
24
    /**
25
     * @var array
26
     */
27
    private $attachments = [];
28
29
    /**
30
     * @var Filesystem
31
     */
32
    private $filesystem;
33
34
    public function execute(): void
35
    {
36
        parent::execute();
37
        set_time_limit(0);
38
        $this->filesystem = new Filesystem();
39
        $this->loadForm();
40
        $this->validateForm();
41
        $this->parse();
42
        $this->display();
43
    }
44
45
    private function loadForm(): void
46
    {
47
        $this->form = new Form('import');
48
        $this->form->addFile('wordpress');
49
        $this->form->addText('filter', SITE_URL);
50
    }
51
52
    private function validateForm(): void
53
    {
54
        // Is the form submitted?
55
        if (!$this->form->isSubmitted()) {
56
            return;
57
        }
58
59
        // Cleanup the submitted fields, ignore fields that were added by hackers
60
        $this->form->cleanupFields();
61
62
        // XML provided?
63
        if ($this->form->getField('wordpress')->isFilled()) {
64
            $this->form->getField('wordpress')->isAllowedExtension(['xml'], BL::err('XMLFilesOnly'));
65
        } else {
66
            // No file
67
            $this->form->getField('wordpress')->addError(BL::err('FieldIsRequired'));
68
        }
69
70
        if (!$this->form->isCorrect()) {
71
            return;
72
        }
73
74
        // Move the file
75
        $this->form->getField('wordpress')->moveFile(FRONTEND_FILES_PATH . '/wordpress.xml');
76
77
        // Process the XML
78
        $this->processXML();
79
80
        // Remove the file
81
        $this->filesystem->remove(FRONTEND_FILES_PATH . '/wordpress.xml');
82
83
        // Everything is saved, so redirect to the overview
84
        $this->redirect(BackendModel::createUrlForAction('index') . '&report=imported');
85
    }
86
87
    private function processXML(): void
88
    {
89
        $reader = new \XMLReader();
90
        $reader->open(FRONTEND_FILES_PATH . '/wordpress.xml');
91
92
        // Loop through the document
93
        while ($reader->read()) {
94
            // Start tag for item?
95
            if ($reader->name !== 'item' && $reader->name !== 'wp:author') {
96
                continue;
97
            }
98
99
            // End tag?
100
            if ($reader->nodeType === \XMLReader::END_ELEMENT) {
101
                continue;
102
            }
103
104
            // Get the raw XML
105
            $xmlString = $reader->readOuterXml();
106
107
            // Read the XML as an SimpleXML-object
108
            /* @var SimpleXMLElement $xml */
109
            $xml = @simplexml_load_string($xmlString);
110
111
            // Skip element if it isn't a valid SimpleXML-object
112
            if ($xml === false) {
113
                continue;
114
            }
115
116
            // Is it really an item?
117
            if (mb_substr($xmlString, 0, 5) === '<item') {
118
                // What type of content are we dealing with?
119
                switch ($xml->children('wp', true)->post_type) {
120
                    case 'post':
121
                        // Process as post
122
                        $this->importPost($xml);
123
                        break;
124
125
                    case 'attachment':
126
                        // Process as attachment
127
                        $this->importAttachment($xml);
128
                        break;
129
130
                    default:
131
                        // Don't do anything
132
                        break;
133
                }
134
            } elseif (mb_substr($xmlString, 0, 10) === '<wp:author') {
135
                // Process the authors
136
                $this->authors[(string) $xml->children('wp', true)->author_login] = [
137
                    'id' => (string) $xml->children('wp', true)->author_id,
138
                    'login' => (string) $xml->children('wp', true)->author_login,
139
                    'email' => (string) $xml->children('wp', true)->author_email,
140
                    'display_name' => (string) $xml->children('wp', true)->author_display_name,
141
                    'first_name' => (string) $xml->children('wp', true)->author_first_name,
142
                    'last_name' => (string) $xml->children('wp', true)->author_last_name,
143
                ];
144
            }
145
146
            // End
147
            if (!$reader->read()) {
148
                break;
149
            }
150
        }
151
152
        // close
153
        $reader->close();
154
    }
155
156
    private function importPost(SimpleXMLElement $xml): bool
157
    {
158
        // Are we really working with a post?
159
        if ($xml->children('wp', true)->post_type != 'post') {
0 ignored issues
show
The condition $xml->children('wp', true)->post_type != 'post' is always true.
Loading history...
160
            return false;
161
        }
162
163
        // This is a deleted post, don't import
164
        if ($xml->children('wp', true)->status === 'trash') {
165
            return false;
166
        }
167
168
        // Mapping for wordpress status => fork status
169
        $statusses = [
170
            'draft' => 'draft',
171
            'pending' => 'draft',
172
            'private' => 'private',
173
            'publish' => 'active',
174
            'future' => 'publish',
175
        ];
176
        $commentStatusses = [
177
            'open' => true,
178
            'closed' => false,
179
        ];
180
181
        // Prepare item
182
        $item = [];
183
        $item['user_id'] = $this->handleUser((string) $xml->children('dc', true)->creator);
184
        $item['title'] = (string) $xml->title;
185
        $item['text'] = $this->handleUrls(
186
            (string) $xml->children('content', true)->encoded,
187
            $this->form->getField('filter')->getValue()
188
        );
189
        $item['created_on'] = (string) $xml->children('wp', true)->post_date;
190
        $item['publish_on'] = (string) $xml->children('wp', true)->post_date;
191
        $item['edited_on'] = (string) $xml->children('wp', true)->post_date;
192
        $item['status'] = $statusses[(string) $xml->children('wp', true)->status];
193
        $item['allow_comments'] = $commentStatusses[(string) $xml->children('wp', true)->comment_status];
194
195
        // Some status corrections
196
        if ($item['status'] === 'draft') {
197
            $item['hidden'] = true;
198
        } elseif ($item['status'] === 'private') {
199
            $item['status'] = 'publish';
200
            $item['hidden'] = true;
201
        }
202
203
        // Prepare meta
204
        $meta = [];
205
        $meta['url'] = (string) $xml->children('wp', true)->post_name;
206
207
        // Prepare tags
208
        $tags = [];
209
210
        // Walk through wp categories
211
        foreach ($xml->category as $category) {
212
            /* @var SimpleXMLElement $category */
213
            switch ($category->attributes()->domain) {
214
                case 'category':
215
                    $item['category_id'] = $this->handleCategory((string) $category);
216
                    break;
217
218
                case 'post_tag':
219
                    $tags[] = (string) $category;
220
                    break;
221
222
                default:
223
                    // Do nothing
224
                    break;
225
            }
226
        }
227
228
        // Prepare comments
229
        $comments = [];
230
231
        // Walk through wp comments
232
        foreach ($xml->children('wp', true)->comment as $comment) {
233
            /* @var SimpleXMLElement $comment */
234
            $comments[] = [
235
                'author' => (string) $comment->children('wp', true)->comment_author,
236
                'email' => (string) $comment->children('wp', true)->comment_author_email,
237
                'text' => filter_var((string) $comment->children('wp', true)->comment_content, FILTER_SANITIZE_STRING),
238
                'created_on' => (string) $comment->children('wp', true)->comment_date,
239
                'status' => ((string) $comment->children('wp', true)->comment_approved == '1') ? 'published' : 'moderation',
240
            ];
241
        }
242
243
        // Make the call
244
        Model::insertCompletePost($item, $meta, $tags, $comments);
245
246
        return true;
247
    }
248
249
    private function importAttachment(SimpleXMLElement $xml): bool
250
    {
251
        // Are we really working with a post?
252
        if ($xml->children('wp', true)->post_type !== 'attachment') {
253
            return false;
254
        }
255
256
        // Set paths
257
        $imagesPath = FRONTEND_FILES_PATH . '/Core/CKFinder/images/blog';
258
        $imagesUrl = FRONTEND_FILES_URL . '/Core/CKFinder/images/blog';
259
260
        // Create directory if needed
261
        if (!file_exists($imagesPath) || !is_dir($imagesPath)) {
262
            $this->filesystem->mkdir($imagesPath);
263
        }
264
265
        $file = (string) $xml->children('wp', true)->attachment_url;
266
        $guid = (string) $xml->guid;
267
        $fileId = (string) $xml->children('wp', true)->post_id;
268
269
        // Set filename
270
        $destinationFile = $fileId . '_' . basename($file);
271
272
        // Download the file
273
        try {
274
            $this->filesystem->dumpFile(
275
                $imagesPath . '/' . $destinationFile,
276
                file_get_contents($file)
277
            );
278
        } catch (Exception $e) {
279
            // Ignore
280
        }
281
282
        // Keep a log of downloaded files
283
        $this->attachments[mb_strtolower($file)] = $imagesUrl . '/' . $destinationFile;
284
        $this->attachments[mb_strtolower($guid)] = $imagesUrl . '/' . $destinationFile;
285
286
        return true;
287
    }
288
289
    /**
290
     * Handle the user of a post
291
     *
292
     * We'll try and match the original user with a fork user.
293
     * If we find no matches, we'll assign to the main fork user.
294
     *
295
     * @param string $username The original user name
296
     *
297
     * @return int
298
     */
299
    private function handleUser(string $username = ''): int
300
    {
301
        // Does someone with this username exist?
302
        /* @var \SpoonDatabase $database */
303
        $database = BackendModel::getContainer()->get('database');
304
        $id = (int) $database->getVar(
305
            'SELECT id FROM users WHERE email=? AND active=? AND deleted=?',
306
            [mb_strtolower($this->authors[(string) $username]['email']), true, false]
307
        );
308
309
        // We found an id!
310
        if ($id > 0) {
311
            return $id;
312
        }
313
314
        // Assign to main user
315
        return 1;
316
    }
317
318
    /**
319
     * Handle the urls inside a post
320
     *
321
     * We'll try and download images, and replace their urls
322
     * We'll also check for links to schrijf.be and try to replace them
323
     *
324
     * @param string $text The post text
325
     * @param string $filter The text that needs to be in a url before we start replacing it.
326
     *
327
     * @return string
328
     */
329
    private function handleUrls(string $text, string $filter = ''): string
330
    {
331
        // Check for images and download them, replace urls
332
        preg_match_all('/<img.*src="(.*)".*\/>/Ui', $text, $matchesImages);
333
334
        if (isset($matchesImages[1]) && !empty($matchesImages[1])) {
335
            // Walk through image links
336
            foreach ($matchesImages[1] as $key => $file) {
337
                // Should we bother looking at this file?
338
                if (!empty($filter) && !mb_stristr($file, $filter)) {
339
                    continue;
340
                }
341
342
                $noSize = preg_replace('/\-\d+x\d+/i', '', $file);
343
344
                if (isset($this->attachments[mb_strtolower($file)])) {
345
                    $text = str_replace($file, $this->attachments[mb_strtolower($file)], $text);
346
                } elseif (isset($this->attachments[mb_strtolower($noSize)])) {
347
                    $text = str_replace($file, $this->attachments[mb_strtolower($noSize)], $text);
348
                }
349
            }
350
        }
351
352
        // Check for links to schrijf.be and try to replace them
353
        preg_match_all('/<a.*href="(.*)".*\/>/Ui', $text, $matchesLinks);
354
355
        if (isset($matchesLinks[1]) && !empty($matchesLinks[1])) {
356
            // Walk through links
357
            foreach ($matchesLinks[1] as $key => $link) {
358
                // Should we bother looking at this file?
359
                if (!empty($filter) && !mb_stristr($link, $filter)) {
360
                    continue;
361
                }
362
363
                $noSize = preg_replace('/\-\d+x\d+/i', '', $link);
364
365
                if (isset($this->attachments[mb_strtolower($link)])) {
366
                    $text = str_replace($link, $this->attachments[mb_strtolower($link)], $text);
367
                } elseif (isset($this->attachments[mb_strtolower($noSize)])) {
368
                    $text = str_replace($link, $this->attachments[mb_strtolower($noSize)], $text);
369
                }
370
            }
371
        }
372
373
        return $text;
374
    }
375
376
    /**
377
     * Handle the category of a post
378
     *
379
     * We'll check if the category exists in the fork blog module, and create it if it doesn't.
380
     *
381
     * @param string $category The post category
382
     *
383
     * @return int
384
     */
385
    private function handleCategory(string $category = ''): int
386
    {
387
        // Does a category with this name exist?
388
        /* @var \SpoonDatabase $database */
389
        $database = BackendModel::getContainer()->get('database');
390
        $id = (int) $database->getVar(
391
            'SELECT id FROM blog_categories WHERE title=? AND language=?',
392
            [$category, BL::getWorkingLanguage()]
393
        );
394
395
        // We found an id!
396
        if ($id > 0) {
397
            return $id;
398
        }
399
400
        // Return default if we got an empty string
401
        if (trim($category) === '') {
402
            return 2;
403
        }
404
405
        // We should create a new category
406
        $cat = [];
407
        $cat['language'] = BL::getWorkingLanguage();
408
        $cat['title'] = $category;
409
        $meta = [];
410
        $meta['keywords'] = $category;
411
        $meta['description'] = $category;
412
        $meta['title'] = $category;
413
        $meta['url'] = $category;
414
415
        return Model::insertCategory($cat, $meta);
416
    }
417
}
418