Completed
Push — 2.1 ( 3d3dcf...bebfa2 )
by Rafał
10:25 queued 20s
created

createFigureComponent()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 12

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 12
rs 9.8666
c 0
b 0
f 0
cc 1
nc 1
nop 1
1
<?php
2
3
declare(strict_types=1);
4
5
/*
6
 * This file is part of the Superdesk Web Publisher Core Bundle.
7
 *
8
 * Copyright 2020 Sourcefabric z.ú. and contributors.
9
 *
10
 * For the full copyright and license information, please see the
11
 * AUTHORS and LICENSE files distributed with this source code.
12
 *
13
 * @copyright 2020 Sourcefabric z.ú
14
 * @license http://www.superdesk.org/license
15
 */
16
17
namespace SWP\Bundle\CoreBundle\AppleNews\Converter;
18
19
use SWP\Bundle\CoreBundle\AppleNews\Component\Body;
20
use SWP\Bundle\CoreBundle\AppleNews\Component\EmbedWebVideo;
21
use SWP\Bundle\CoreBundle\AppleNews\Component\FacebookPost;
22
use SWP\Bundle\CoreBundle\AppleNews\Component\Figure;
23
use SWP\Bundle\CoreBundle\AppleNews\Component\Heading;
24
use SWP\Bundle\CoreBundle\AppleNews\Component\Instagram;
25
use SWP\Bundle\CoreBundle\AppleNews\Component\Quote;
26
use SWP\Bundle\CoreBundle\AppleNews\Component\Tweet;
27
28
final class ArticleBodyToComponentsConverter
29
{
30
    public function convert(string $body): array
31
    {
32
        if ('' === $body) {
33
            return [];
34
        }
35
36
        $document = new \DOMDocument();
37
        libxml_use_internal_errors(true);
38
        $document->loadHTML('<?xml encoding="UTF-8">'.self::stripHtmlTags($body));
39
        $document->encoding = 'UTF-8';
40
        libxml_clear_errors();
41
42
        /** @var \DOMNodeList $body */
43
        if (!($body = $document->getElementsByTagName('body')->item(0))) {
44
            throw new \InvalidArgumentException('Invalid HTML was provided');
45
        }
46
47
        $components = [];
48
        foreach ($body->childNodes as $node) {
49
            switch ($node->nodeName) {
50
                case 'h1':
51
                case 'h2':
52
                case 'h3':
53
                case 'h4':
54
                case 'h5':
55
                case 'h6':
56
                    if ('' !== $node->textContent) {
57
                        $level = substr($node->nodeName, 1);
58
                        $components[] = new Heading($node->textContent, (int) $level, 'fullMarginAboveHalfBelowLayout');
59
                    }
60
61
                    break;
62
                case 'p':
63
                    if ('' !== $node->textContent) {
64
                        if ($node->getElementsByTagName('img')->length > 0) {
65
                            $components[] = $this->createFigureComponent($node);
66
67
                            break;
68
                        }
69
70
                        $bodyHtml = $node->ownerDocument->saveHTML($node);
71
                        $components[] = new Body($bodyHtml, 'marginBetweenComponents');
72
                    }
73
74
                    break;
75
76
                case 'figure':
77
                    $components[] = $this->createFigureComponent($node);
78
79
                    break;
80
                case 'div':
81
                    if (!$node->hasAttribute('class')) {
82
                        break;
83
                    }
84
85
                    $iframeElement = $node->getElementsByTagName('iframe')
86
                        ->item(0);
87
88
                    if (null !== $iframeElement) {
89
                        $webVideoUrl = $iframeElement->getAttribute('src');
90
                        $url = str_replace('\"', '', $webVideoUrl);
91
                        if (false !== strpos($url, 'twitter.com')) {
92
                            $parsedUrl = parse_url($url);
93
                            parse_str($parsedUrl['query'], $url);
94
95
                            $components[] = new Tweet($url['url']);
96
                        } elseif (false !== strpos($url, 'iframe.ly')) {
97
                            $parsedUrl = parse_url($url);
98
                            parse_str($parsedUrl['query'], $url);
99
100
                            $components[] = new FacebookPost($url['url']);
101
                        } elseif (false !== strpos($url, 'facebook.com')) {
102
                            $parsedUrl = parse_url($url);
103
                            parse_str($parsedUrl['query'], $url);
104
                            if ($this->isValidFacebookPostUrl($url['href'])) {
105
                                $components[] = new FacebookPost($url['href']);
106
                            }
107
                        } elseif (false !== strpos($url, 'youtube.com') || false !== strpos($url, 'vimeo.com')) {
108
                            $components[] = new EmbedWebVideo($url);
109
                        }
110
111
                        break;
112
                    }
113
114
                    $instagramElement = $node->getElementsByTagName('blockquote')
115
                        ->item(0);
116
117
                    if (null !== $instagramElement) {
118
                        $instagramUrl = $instagramElement->getAttribute('data-instgrm-permalink');
119
                        $url = str_replace('\"', '', $instagramUrl);
120
                        $components[] = new Instagram($url);
121
                    }
122
123
                    break;
124
125
                case 'blockquote':
126
                    if ('' !== $node->textContent) {
127
                        $components[] = new Quote('“'.$node->textContent.'”');
128
                    }
129
130
                    break;
131
            }
132
        }
133
134
        return $components;
135
    }
136
137
    public static function stripHtmlTags(string $html): string
138
    {
139
        return preg_replace('/<script.*>.*<\/script>/isU', '', $html);
140
    }
141
142
    private function isValidFacebookPostUrl(string $url): bool
143
    {
144
        preg_match('/^https:\/\/www\.facebook\.com\/(photo(\.php|s)|permalink\.php|[^\/]+\/(activity|posts))[\/?].*$/', $url, $matches);
145
146
        return !empty($matches);
147
    }
148
149
    private function createFigureComponent(\DOMElement $node): Figure
150
    {
151
        $src = $node->getElementsByTagName('img')
152
            ->item(0)
153
            ->getAttribute('src');
154
155
        $caption = $node->getElementsByTagName('figcaption')
156
            ->item(0)
157
            ->textContent;
158
159
        return new Figure($src, $caption);
160
    }
161
}
162