Passed
Push — 1.11.x ( bce6cd...c146d9 )
by Angel Fernando Quiroz
12:25
created

import/src/converter/CcEntities.php (3 issues)

1
<?php
2
/* For licensing terms, see /license.txt */
3
4
class CcEntities
5
{
6
    /**
7
     * Prepares convert for inclusion into XML.
8
     *
9
     * @param string $value
10
     *
11
     * @return string
12
     */
13
    public static function safexml($value)
14
    {
15
        $result = htmlspecialchars(html_entity_decode($value, ENT_QUOTES, 'UTF-8'),
16
                                   ENT_NOQUOTES,
17
                                   'UTF-8',
18
                                   false);
19
20
        return $result;
21
    }
22
23
    public function loadXmlResource($path_to_file)
24
    {
25
        $resource = new DOMDocument();
26
27
        Cc1p3Convert::logAction('Load the XML resource file: '.$path_to_file);
28
29
        if (!$resource->load($path_to_file)) {
30
            Cc1p3Convert::logAction('Cannot load the XML resource file: '.$path_to_file, false);
31
        }
32
33
        return $resource;
34
    }
35
36
    public function updateSources($html, $rootPath = '')
37
    {
38
        $document = $this->loadHtml($html);
39
40
        $tags = ['img' => 'src', 'a' => 'href'];
41
42
        foreach ($tags as $tag => $attribute) {
43
            $elements = $document->getElementsByTagName($tag);
44
45
            foreach ($elements as $element) {
46
                $attribute_value = $element->getAttribute($attribute);
47
                $protocol = parse_url($attribute_value, PHP_URL_SCHEME);
48
49
                if (empty($protocol)) {
50
                    $attribute_value = str_replace("\$IMS-CC-FILEBASE\$", "", $attribute_value);
51
                    $attribute_value = $this->fullPath($rootPath."/".$attribute_value, "/");
52
                    $attribute_value = "\$@FILEPHP@\$"."/".$attribute_value;
53
                }
54
55
                $element->setAttribute($attribute, $attribute_value);
56
            }
57
        }
58
59
        $html = $this->htmlInsidebody($document);
60
61
        return $html;
62
    }
63
64
    public function fullPath($path, $dir_sep = DIRECTORY_SEPARATOR)
65
    {
66
        $token = '$IMS-CC-FILEBASE$';
67
        $path = str_replace($token, '', $path);
68
69
        if (is_string($path) && ($path != '')) {
70
            $dot_dir = '.';
71
            $up_dir = '..';
72
            $length = strlen($path);
73
            $rtemp = trim($path);
74
            $start = strrpos($path, $dir_sep);
75
            $can_continue = ($start !== false);
76
            $result = $can_continue ? '' : $path;
77
            $rcount = 0;
78
79
            while ($can_continue) {
80
                $dir_part = ($start !== false) ? substr($rtemp, $start + 1, $length - $start) : $rtemp;
81
                $can_continue = ($dir_part !== false);
82
83
                if ($can_continue) {
84
                    if ($dir_part != $dot_dir) {
85
                        if ($dir_part == $up_dir) {
86
                            $rcount++;
87
                        } else {
88
                            if ($rcount > 0) {
89
                                $rcount--;
90
                            } else {
91
                                $result = ($result == '') ? $dir_part : $dir_part.$dir_sep.$result;
92
                            }
93
                        }
94
                    }
95
                    $rtemp = substr($path, 0, $start);
96
                    $start = strrpos($rtemp, $dir_sep);
97
                    $can_continue = (($start !== false) || (strlen($rtemp) > 0));
98
                }
99
            }
100
        }
101
102
        return $result;
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $result does not seem to be defined for all execution paths leading up to this point.
Loading history...
103
    }
104
105
    public function includeTitles($html)
106
    {
107
        $document = $this->loadHtml($html);
108
109
        $images = $document->getElementsByTagName('img');
110
111
        foreach ($images as $image) {
112
            $src = $image->getAttribute('src');
113
            $alt = $image->getAttribute('alt');
114
            $title = $image->getAttribute('title');
115
116
            $filename = pathinfo($src);
117
            $filename = $filename['filename'];
118
119
            $alt = empty($alt) ? $filename : $alt;
120
            $title = empty($title) ? $filename : $title;
121
122
            $image->setAttribute('alt', $alt);
123
            $image->setAttribute('title', $title);
124
        }
125
126
        $html = $this->htmlInsidebody($document);
127
128
        return $html;
129
    }
130
131
    public function getExternalXml($identifier)
132
    {
133
        $xpath = Cc1p3Convert::newxPath(Cc1p3Convert::$manifest, Cc1p3Convert::$namespaces);
134
135
        $files = $xpath->query('/imscc:manifest/imscc:resources/imscc:resource[@identifier="'.
136
            $identifier.'"]/imscc:file/@href');
137
138
        if (empty($files)) {
139
            $response = '';
140
        } else {
141
            $response = $files->item(0)->nodeValue;
142
        }
143
144
        return $response;
145
    }
146
147
    public function generateRandomString($length = 6)
148
    {
149
        $response = '';
150
        $source = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
151
152
        if ($length > 0) {
153
            $response = '';
154
            $source = str_split($source, 1);
155
156
            for ($i = 1; $i <= $length; $i++) {
157
                $num = mt_rand(1, count($source));
158
                $response .= $source[$num - 1];
159
            }
160
        }
161
162
        return $response;
163
    }
164
165
    public function truncateText($text, $max, $remove_html)
166
    {
167
        if ($max > 10) {
168
            $text = substr($text, 0, ($max - 6)).' [...]';
169
        } else {
170
            $text = substr($text, 0, $max);
171
        }
172
173
        $text = $remove_html ? strip_tags($text) : $text;
174
175
        return $text;
176
    }
177
178
    protected function prepareContent($content)
179
    {
180
        $result = $content;
181
        if (empty($result)) {
182
            return '';
183
        }
184
        $encoding = null;
185
        $xml_error = new LibxmlErrorsMgr();
186
        $dom = new DOMDocument();
187
        $dom->validateOnParse = false;
188
        $dom->strictErrorChecking = false;
189
        if ($dom->loadHTML($content)) {
190
            $encoding = $dom->xmlEncoding;
191
        }
192
        if (empty($encoding)) {
193
            $encoding = mb_detect_encoding($content, 'auto', true);
194
        }
195
        if (!empty($encoding) && !mb_check_encoding($content, 'UTF-8')) {
196
            $result = mb_convert_encoding($content, 'UTF-8', $encoding);
197
        }
198
199
        // See if we can strip off body tag and anything outside of it.
200
        foreach (['body', 'html'] as $tagname) {
201
            $regex = str_replace('##', $tagname, "/<##[^>]*>(.+)<\/##>/is");
202
            if (preg_match($regex, $result, $matches)) {
203
                $result = $matches[1];
204
                break;
205
            }
206
        }
207
208
        return $result;
209
    }
210
211
    /**
212
     * @param string $html
213
     *
214
     * @return DOMDocument
215
     */
216
    private function loadHtml($html)
217
    {
218
        // Need to make sure that the html passed has charset meta tag.
219
        $metatag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />';
220
        if (strpos($html, $metatag) === false) {
221
            $html = '<html><head>'.$metatag.'</head><body>'.$html.'</body></html>';
222
        }
223
224
        $document = new DOMDocument();
225
        @$document->loadHTML($html);
0 ignored issues
show
Security Best Practice introduced by
It seems like you do not handle an error condition for loadHTML(). This can introduce security issues, and is generally not recommended. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unhandled  annotation

225
        /** @scrutinizer ignore-unhandled */ @$document->loadHTML($html);

If you suppress an error, we recommend checking for the error condition explicitly:

// For example instead of
@mkdir($dir);

// Better use
if (@mkdir($dir) === false) {
    throw new \RuntimeException('The directory '.$dir.' could not be created.');
}
Loading history...
226
227
        return $document;
228
    }
229
230
    /**
231
     * @param DOMDocument $domdocument
232
     *
233
     * @return string
234
     */
235
    private function htmlInsidebody($domdocument)
236
    {
237
        $html = '';
238
        $bodyitems = $domdocument->getElementsByTagName('body');
239
        if ($bodyitems->length > 0) {
240
            $body = $bodyitems->item(0);
241
            $html = str_ireplace(['<body>', '</body>'], '', $body->C14N());
242
        }
243
244
        return $html;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $html also could return the type array which is incompatible with the documented return type string.
Loading history...
245
    }
246
}
247