1 | <?php |
||
2 | /* For licensing terms, see /license.txt */ |
||
3 | |||
4 | class CcEntities |
||
5 | { |
||
6 | /** |
||
7 | * Prepares convert for inclusion into XML. |
||
8 | * |
||
9 | * @param string $value |
||
10 | * |
||
11 | * @return string |
||
12 | */ |
||
13 | public static function safexml($value) |
||
14 | { |
||
15 | $result = htmlspecialchars(html_entity_decode($value, ENT_QUOTES, 'UTF-8'), |
||
16 | ENT_NOQUOTES, |
||
17 | 'UTF-8', |
||
18 | false); |
||
19 | |||
20 | return $result; |
||
21 | } |
||
22 | |||
23 | public function loadXmlResource($path_to_file) |
||
24 | { |
||
25 | $resource = new DOMDocument(); |
||
26 | |||
27 | Cc1p3Convert::logAction('Load the XML resource file: '.$path_to_file); |
||
28 | |||
29 | if (!$resource->load($path_to_file)) { |
||
30 | Cc1p3Convert::logAction('Cannot load the XML resource file: '.$path_to_file, false); |
||
31 | } |
||
32 | |||
33 | return $resource; |
||
34 | } |
||
35 | |||
36 | public function updateSources($html, $rootPath = '') |
||
37 | { |
||
38 | $document = $this->loadHtml($html); |
||
39 | |||
40 | $tags = ['img' => 'src', 'a' => 'href']; |
||
41 | |||
42 | foreach ($tags as $tag => $attribute) { |
||
43 | $elements = $document->getElementsByTagName($tag); |
||
44 | |||
45 | foreach ($elements as $element) { |
||
46 | $attribute_value = $element->getAttribute($attribute); |
||
47 | $protocol = parse_url($attribute_value, PHP_URL_SCHEME); |
||
48 | |||
49 | if (empty($protocol)) { |
||
50 | $attribute_value = str_replace("\$IMS-CC-FILEBASE\$", "", $attribute_value); |
||
51 | $attribute_value = $this->fullPath($rootPath."/".$attribute_value, "/"); |
||
52 | $attribute_value = "\$@FILEPHP@\$"."/".$attribute_value; |
||
53 | } |
||
54 | |||
55 | $element->setAttribute($attribute, $attribute_value); |
||
56 | } |
||
57 | } |
||
58 | |||
59 | $html = $this->htmlInsidebody($document); |
||
60 | |||
61 | return $html; |
||
62 | } |
||
63 | |||
64 | public function fullPath($path, $dir_sep = DIRECTORY_SEPARATOR) |
||
65 | { |
||
66 | $token = '$IMS-CC-FILEBASE$'; |
||
67 | $path = str_replace($token, '', $path); |
||
68 | |||
69 | if (is_string($path) && ($path != '')) { |
||
70 | $dot_dir = '.'; |
||
71 | $up_dir = '..'; |
||
72 | $length = strlen($path); |
||
73 | $rtemp = trim($path); |
||
74 | $start = strrpos($path, $dir_sep); |
||
75 | $can_continue = ($start !== false); |
||
76 | $result = $can_continue ? '' : $path; |
||
77 | $rcount = 0; |
||
78 | |||
79 | while ($can_continue) { |
||
80 | $dir_part = ($start !== false) ? substr($rtemp, $start + 1, $length - $start) : $rtemp; |
||
81 | $can_continue = ($dir_part !== false); |
||
82 | |||
83 | if ($can_continue) { |
||
84 | if ($dir_part != $dot_dir) { |
||
85 | if ($dir_part == $up_dir) { |
||
86 | $rcount++; |
||
87 | } else { |
||
88 | if ($rcount > 0) { |
||
89 | $rcount--; |
||
90 | } else { |
||
91 | $result = ($result == '') ? $dir_part : $dir_part.$dir_sep.$result; |
||
92 | } |
||
93 | } |
||
94 | } |
||
95 | $rtemp = substr($path, 0, $start); |
||
96 | $start = strrpos($rtemp, $dir_sep); |
||
97 | $can_continue = (($start !== false) || (strlen($rtemp) > 0)); |
||
98 | } |
||
99 | } |
||
100 | } |
||
101 | |||
102 | return $result; |
||
0 ignored issues
–
show
Comprehensibility
Best Practice
introduced
by
Loading history...
|
|||
103 | } |
||
104 | |||
105 | public function includeTitles($html) |
||
106 | { |
||
107 | $document = $this->loadHtml($html); |
||
108 | |||
109 | $images = $document->getElementsByTagName('img'); |
||
110 | |||
111 | foreach ($images as $image) { |
||
112 | $src = $image->getAttribute('src'); |
||
113 | $alt = $image->getAttribute('alt'); |
||
114 | $title = $image->getAttribute('title'); |
||
115 | |||
116 | $filename = pathinfo($src); |
||
117 | $filename = $filename['filename']; |
||
118 | |||
119 | $alt = empty($alt) ? $filename : $alt; |
||
120 | $title = empty($title) ? $filename : $title; |
||
121 | |||
122 | $image->setAttribute('alt', $alt); |
||
123 | $image->setAttribute('title', $title); |
||
124 | } |
||
125 | |||
126 | $html = $this->htmlInsidebody($document); |
||
127 | |||
128 | return $html; |
||
129 | } |
||
130 | |||
131 | public function getExternalXml($identifier) |
||
132 | { |
||
133 | $xpath = Cc1p3Convert::newxPath(Cc1p3Convert::$manifest, Cc1p3Convert::$namespaces); |
||
134 | |||
135 | $files = $xpath->query('/imscc:manifest/imscc:resources/imscc:resource[@identifier="'. |
||
136 | $identifier.'"]/imscc:file/@href'); |
||
137 | |||
138 | if (empty($files)) { |
||
139 | $response = ''; |
||
140 | } else { |
||
141 | $response = $files->item(0)->nodeValue; |
||
142 | } |
||
143 | |||
144 | return $response; |
||
145 | } |
||
146 | |||
147 | public function generateRandomString($length = 6) |
||
148 | { |
||
149 | $response = ''; |
||
150 | $source = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'; |
||
151 | |||
152 | if ($length > 0) { |
||
153 | $response = ''; |
||
154 | $source = str_split($source, 1); |
||
155 | |||
156 | for ($i = 1; $i <= $length; $i++) { |
||
157 | $num = mt_rand(1, count($source)); |
||
158 | $response .= $source[$num - 1]; |
||
159 | } |
||
160 | } |
||
161 | |||
162 | return $response; |
||
163 | } |
||
164 | |||
165 | public function truncateText($text, $max, $remove_html) |
||
166 | { |
||
167 | if ($max > 10) { |
||
168 | $text = substr($text, 0, ($max - 6)).' [...]'; |
||
169 | } else { |
||
170 | $text = substr($text, 0, $max); |
||
171 | } |
||
172 | |||
173 | $text = $remove_html ? strip_tags($text) : $text; |
||
174 | |||
175 | return $text; |
||
176 | } |
||
177 | |||
178 | protected function prepareContent($content) |
||
179 | { |
||
180 | $result = $content; |
||
181 | if (empty($result)) { |
||
182 | return ''; |
||
183 | } |
||
184 | $encoding = null; |
||
185 | $xml_error = new LibxmlErrorsMgr(); |
||
186 | $dom = new DOMDocument(); |
||
187 | $dom->validateOnParse = false; |
||
188 | $dom->strictErrorChecking = false; |
||
189 | if ($dom->loadHTML($content)) { |
||
190 | $encoding = $dom->xmlEncoding; |
||
191 | } |
||
192 | if (empty($encoding)) { |
||
193 | $encoding = mb_detect_encoding($content, 'auto', true); |
||
194 | } |
||
195 | if (!empty($encoding) && !mb_check_encoding($content, 'UTF-8')) { |
||
196 | $result = mb_convert_encoding($content, 'UTF-8', $encoding); |
||
197 | } |
||
198 | |||
199 | // See if we can strip off body tag and anything outside of it. |
||
200 | foreach (['body', 'html'] as $tagname) { |
||
201 | $regex = str_replace('##', $tagname, "/<##[^>]*>(.+)<\/##>/is"); |
||
202 | if (preg_match($regex, $result, $matches)) { |
||
203 | $result = $matches[1]; |
||
204 | break; |
||
205 | } |
||
206 | } |
||
207 | |||
208 | return $result; |
||
209 | } |
||
210 | |||
211 | /** |
||
212 | * @param string $html |
||
213 | * |
||
214 | * @return DOMDocument |
||
215 | */ |
||
216 | private function loadHtml($html) |
||
217 | { |
||
218 | // Need to make sure that the html passed has charset meta tag. |
||
219 | $metatag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />'; |
||
220 | if (strpos($html, $metatag) === false) { |
||
221 | $html = '<html><head>'.$metatag.'</head><body>'.$html.'</body></html>'; |
||
222 | } |
||
223 | |||
224 | $document = new DOMDocument(); |
||
225 | @$document->loadHTML($html); |
||
226 | |||
227 | return $document; |
||
228 | } |
||
229 | |||
230 | /** |
||
231 | * @param DOMDocument $domdocument |
||
232 | * |
||
233 | * @return string |
||
234 | */ |
||
235 | private function htmlInsidebody($domdocument) |
||
236 | { |
||
237 | $html = ''; |
||
238 | $bodyitems = $domdocument->getElementsByTagName('body'); |
||
239 | if ($bodyitems->length > 0) { |
||
240 | $body = $bodyitems->item(0); |
||
241 | $html = str_ireplace(['<body>', '</body>'], '', $body->C14N()); |
||
242 | } |
||
243 | |||
244 | return $html; |
||
245 | } |
||
246 | } |
||
247 |