@@ -54,451 +54,451 @@ |
||
| 54 | 54 | class ImportField extends UploadField |
| 55 | 55 | { |
| 56 | 56 | |
| 57 | - private static $allowed_actions = ['upload']; |
|
| 58 | - |
|
| 59 | - private static $importer_class = ServiceConnector::class; |
|
| 60 | - |
|
| 61 | - /** |
|
| 62 | - * Process the document immediately upon upload. |
|
| 63 | - */ |
|
| 64 | - public function upload(HTTPRequest $request) |
|
| 65 | - { |
|
| 66 | - if ($this->isDisabled() || $this->isReadonly()) { |
|
| 67 | - return $this->httpError(403); |
|
| 68 | - } |
|
| 69 | - |
|
| 70 | - // Protect against CSRF on destructive action |
|
| 71 | - $token = $this->getForm()->getSecurityToken(); |
|
| 72 | - if (!$token->checkRequest($request)) { |
|
| 73 | - return $this->httpError(400); |
|
| 74 | - } |
|
| 75 | - |
|
| 76 | - $tmpfile = $request->postVar('Upload'); |
|
| 77 | - |
|
| 78 | - // Check if the file has been uploaded into the temporary storage. |
|
| 79 | - if (!$tmpfile) { |
|
| 80 | - $return = [ |
|
| 81 | - 'error' => _t( |
|
| 82 | - 'SilverStripe\\AssetAdmin\\Forms\\UploadField.FIELDNOTSET', |
|
| 83 | - 'File information not found' |
|
| 84 | - ) |
|
| 85 | - ]; |
|
| 86 | - } else { |
|
| 87 | - $return = [ |
|
| 88 | - 'name' => $tmpfile['name'], |
|
| 89 | - 'size' => $tmpfile['size'], |
|
| 90 | - 'type' => $tmpfile['type'], |
|
| 91 | - 'error' => $tmpfile['error'] |
|
| 92 | - ]; |
|
| 93 | - } |
|
| 94 | - |
|
| 95 | - if (!$return['error']) { |
|
| 96 | - // Get options for this import. |
|
| 97 | - $splitHeader = (int)$request->postVar('SplitHeader'); |
|
| 98 | - $keepSource = (bool)$request->postVar('KeepSource'); |
|
| 99 | - $chosenFolderID = (int)$request->postVar('ChosenFolderID'); |
|
| 100 | - $publishPages = (bool)$request->postVar('PublishPages'); |
|
| 101 | - $includeTOC = (bool)$request->postVar('IncludeTOC'); |
|
| 102 | - |
|
| 103 | - // Process the document and write the page. |
|
| 104 | - $preservedDocument = null; |
|
| 105 | - if ($keepSource) { |
|
| 106 | - $preservedDocument = $this->preserveSourceDocument($tmpfile, $chosenFolderID); |
|
| 107 | - } |
|
| 108 | - |
|
| 109 | - $importResult = $this->importFromPOST($tmpfile, $splitHeader, $publishPages, $chosenFolderID); |
|
| 110 | - if (is_array($importResult) && isset($importResult['error'])) { |
|
| 111 | - $return['error'] = $importResult['error']; |
|
| 112 | - } elseif ($includeTOC) { |
|
| 113 | - $this->writeTOC($publishPages, $keepSource ? $preservedDocument : null); |
|
| 114 | - } |
|
| 115 | - } |
|
| 116 | - |
|
| 117 | - $response = HTTPResponse::create(Convert::raw2json([$return])); |
|
| 118 | - $response->addHeader('Content-Type', 'application/json'); |
|
| 119 | - return $response; |
|
| 120 | - } |
|
| 121 | - |
|
| 122 | - /** |
|
| 123 | - * Preserves the source file by copying it to a specified folder. |
|
| 124 | - * |
|
| 125 | - * @param $tmpfile Temporary file data structure. |
|
| 126 | - * @param int $chosenFolderID Target folder. |
|
| 127 | - * @return File Stored file. |
|
| 128 | - */ |
|
| 129 | - protected function preserveSourceDocument($tmpfile, $chosenFolderID = null) |
|
| 130 | - { |
|
| 131 | - $upload = Upload::create(); |
|
| 132 | - |
|
| 133 | - $file = File::create(); |
|
| 134 | - $upload->loadIntoFile($tmpfile, $file, $chosenFolderID); |
|
| 135 | - |
|
| 136 | - $page = $this->form->getRecord(); |
|
| 137 | - $page->ImportedFromFileID = $file->ID; |
|
| 138 | - $page->write(); |
|
| 139 | - |
|
| 140 | - return $file; |
|
| 141 | - } |
|
| 142 | - |
|
| 143 | - /** |
|
| 144 | - * Builds and writes the table of contents for the document. |
|
| 145 | - * |
|
| 146 | - * @param bool $publishPage Should the parent page be published. |
|
| 147 | - * @param File $preservedDocument Set if the link to the original document should be added. |
|
| 148 | - */ |
|
| 149 | - protected function writeTOC($publishPages = false, $preservedDocument = null) |
|
| 150 | - { |
|
| 151 | - $page = $this->form->getRecord(); |
|
| 152 | - $content = '<ul>'; |
|
| 153 | - |
|
| 154 | - if ($page) { |
|
| 155 | - if ($page->Children()->Count() > 0) { |
|
| 156 | - foreach ($page->Children() as $child) { |
|
| 157 | - $content .= '<li><a href="' . $child->Link() . '">' . $child->Title . '</a></li>'; |
|
| 158 | - } |
|
| 159 | - $page->Content = $content . '</ul>'; |
|
| 160 | - } else { |
|
| 161 | - $doc = new DOMDocument(); |
|
| 162 | - $doc->loadHTML($page->Content); |
|
| 163 | - $body = $doc->getElementsByTagName('body')->item(0); |
|
| 164 | - $node = $body->firstChild; |
|
| 165 | - $h1 = $h2 = 1; |
|
| 166 | - while ($node) { |
|
| 167 | - if ($node instanceof DOMElement && $node->tagName == 'h1') { |
|
| 168 | - $content .= '<li><a href="#h1.' . $h1 . '">' . |
|
| 169 | - trim(preg_replace('/\n|\r/', '', Convert::html2raw($node->textContent))) . |
|
| 170 | - '</a></li>'; |
|
| 171 | - $node->setAttributeNode(new DOMAttr("id", "h1.".$h1)); |
|
| 172 | - $h1++; |
|
| 173 | - } elseif ($node instanceof DOMElement && $node->tagName == 'h2') { |
|
| 174 | - $content .= '<li class="menu-h2"><a href="#h2.' . $h2 . '">' . |
|
| 175 | - trim(preg_replace('/\n|\r/', '', Convert::html2raw($node->textContent))) . |
|
| 176 | - '</a></li>'; |
|
| 177 | - $node->setAttributeNode(new DOMAttr("id", "h2.".$h2)); |
|
| 178 | - $h2++; |
|
| 179 | - } |
|
| 180 | - $node = $node->nextSibling; |
|
| 181 | - } |
|
| 182 | - $page->Content = $content . '</ul>' . $doc->saveHTML(); |
|
| 183 | - } |
|
| 184 | - |
|
| 185 | - // Add in the link to the original document, if provided. |
|
| 186 | - if ($preservedDocument) { |
|
| 187 | - $page->Content = '<a href="' . |
|
| 188 | - $preservedDocument->Link() . |
|
| 189 | - '" title="download original document">download original document (' . |
|
| 190 | - $preservedDocument->getSize() . |
|
| 191 | - ')</a>' . |
|
| 192 | - $page->Content; |
|
| 193 | - } |
|
| 194 | - |
|
| 195 | - // Store the result |
|
| 196 | - $page->write(); |
|
| 197 | - if ($publishPages) { |
|
| 198 | - $page->publishRecursive(); |
|
| 199 | - } |
|
| 200 | - } |
|
| 201 | - } |
|
| 202 | - |
|
| 203 | - protected function getBodyText($doc, $node) |
|
| 204 | - { |
|
| 205 | - // Build a new doc |
|
| 206 | - $htmldoc = new DOMDocument(); |
|
| 207 | - // Create the html element |
|
| 208 | - $html = $htmldoc->createElement('html'); |
|
| 209 | - $htmldoc->appendChild($html); |
|
| 210 | - // Append the body node |
|
| 211 | - $html->appendChild($htmldoc->importNode($node, true)); |
|
| 212 | - |
|
| 213 | - // Get the text as html, remove the entry and exit root tags and return |
|
| 214 | - $text = $htmldoc->saveHTML(); |
|
| 215 | - $text = preg_replace('/^.*<body>/', '', $text); |
|
| 216 | - $text = preg_replace('/<\/body>.*$/', '', $text); |
|
| 217 | - |
|
| 218 | - return $text; |
|
| 219 | - } |
|
| 220 | - |
|
| 221 | - /** |
|
| 222 | - * Used only when writing the document that has been split by headers. |
|
| 223 | - * Can write both to the chapter pages as well as the master page. |
|
| 224 | - * |
|
| 225 | - * @param string $subtitle Title of the chapter - if missing, it will write to the master page. |
|
| 226 | - * @param $subdoc |
|
| 227 | - * @param $subnode |
|
| 228 | - * @param int $sort Order of the chapter page. |
|
| 229 | - * @param $publishPages Whether to publish the resulting child/master pages. |
|
| 230 | - */ |
|
| 231 | - protected function writeContent($subtitle, $subdoc, $subnode, $sort = null, $publishPages = false) |
|
| 232 | - { |
|
| 233 | - $record = $this->form->getRecord(); |
|
| 234 | - |
|
| 235 | - if ($subtitle) { |
|
| 236 | - // Write the chapter page to a subpage. |
|
| 237 | - $page = DataObject::get_one( |
|
| 238 | - 'Page', |
|
| 239 | - sprintf('"Title" = \'%s\' AND "ParentID" = %d', $subtitle, $record->ID) |
|
| 240 | - ); |
|
| 241 | - if (!$page) { |
|
| 242 | - $page = Page::create(); |
|
| 243 | - $page->ParentID = $record->ID; |
|
| 244 | - $page->Title = $subtitle; |
|
| 245 | - } |
|
| 246 | - |
|
| 247 | - unset($this->unusedChildren[$page->ID]); |
|
| 248 | - file_put_contents(ASSETS_PATH . '/index-' . $sort . '.html', $this->getBodyText($subdoc, $subnode)); |
|
| 249 | - |
|
| 250 | - if ($sort) { |
|
| 251 | - $page->Sort = $sort; |
|
| 252 | - } |
|
| 253 | - $page->Content = $this->getBodyText($subdoc, $subnode); |
|
| 254 | - $page->write(); |
|
| 255 | - if ($publishPages) { |
|
| 256 | - $page->publishRecursive(); |
|
| 257 | - } |
|
| 258 | - } else { |
|
| 259 | - // Write to the master page. |
|
| 260 | - $record->Content = $this->getBodyText($subdoc, $subnode); |
|
| 261 | - $record->write(); |
|
| 262 | - |
|
| 263 | - if ($publishPages) { |
|
| 264 | - $record->publishRecursive(); |
|
| 265 | - } |
|
| 266 | - } |
|
| 267 | - } |
|
| 268 | - |
|
| 269 | - /** |
|
| 270 | - * Imports a document at a certain path onto the current page and writes it. |
|
| 271 | - * CAUTION: Overwrites any existing content on the page! |
|
| 272 | - * |
|
| 273 | - * @param array $tmpFile Array as received from PHP's POST upload. |
|
| 274 | - * @param bool $splitHeader Heading level to split by. |
|
| 275 | - * @param bool $publishPages Whether the underlying pages should be published after import. |
|
| 276 | - * @param int $chosenFolderID ID of the working folder - here the converted file and images will be stored. |
|
| 277 | - */ |
|
| 278 | - public function importFromPOST($tmpFile, $splitHeader = false, $publishPages = false, $chosenFolderID = null) |
|
| 279 | - { |
|
| 280 | - |
|
| 281 | - $fileDescriptor = [ |
|
| 282 | - 'name' => $tmpFile['name'], |
|
| 283 | - 'path' => $tmpFile['tmp_name'], |
|
| 284 | - 'mimeType' => $tmpFile['type'] |
|
| 285 | - ]; |
|
| 286 | - |
|
| 287 | - $sourcePage = $this->form->getRecord(); |
|
| 288 | - $importerClass = $this->config()->get('importer_class'); |
|
| 289 | - $importer = Injector::inst()->create($importerClass, $fileDescriptor, $chosenFolderID); |
|
| 290 | - $content = $importer->import(); |
|
| 291 | - |
|
| 292 | - if (is_array($content) && isset($content['error'])) { |
|
| 293 | - return $content; |
|
| 294 | - } |
|
| 295 | - |
|
| 296 | - // Clean up with tidy (requires tidy module) |
|
| 297 | - $tidy = new Tidy(); |
|
| 298 | - $tidy->parseString($content, ['output-xhtml' => true], 'utf8'); |
|
| 299 | - $tidy->cleanRepair(); |
|
| 300 | - |
|
| 301 | - $fragment = []; |
|
| 302 | - foreach ($tidy->body()->child as $child) { |
|
| 303 | - $fragment[] = $child->value; |
|
| 304 | - } |
|
| 305 | - |
|
| 306 | - $htmlValue = Injector::inst()->create(HTMLValue::class, implode("\n", $fragment)); |
|
| 307 | - |
|
| 308 | - // Sanitise |
|
| 309 | - $santiser = Injector::inst()->create(HTMLEditorSanitiser::class, HTMLEditorConfig::get_active()); |
|
| 310 | - $santiser->sanitise($htmlValue); |
|
| 311 | - |
|
| 312 | - // Load in the HTML |
|
| 313 | - $doc = $htmlValue->getDocument(); |
|
| 314 | - $xpath = new DOMXPath($doc); |
|
| 315 | - |
|
| 316 | - // make sure any images are added as Image records with a relative link to assets |
|
| 317 | - $chosenFolder = ($this->chosenFolderID) ? DataObject::get_by_id(Folder::class, $this->chosenFolderID) : null; |
|
| 318 | - $folderName = ($chosenFolder) ? '/' . $chosenFolder->Name : ''; |
|
| 319 | - $imgs = $xpath->query('//img'); |
|
| 320 | - for ($i = 0; $i < $imgs->length; $i++) { |
|
| 321 | - $img = $imgs->item($i); |
|
| 322 | - $originalPath = 'assets/' . $folderName . '/' . $img->getAttribute('src'); |
|
| 323 | - $name = FileNameFilter::create()->filter(basename($originalPath)); |
|
| 324 | - |
|
| 325 | - $image = Image::get()->filter([ |
|
| 326 | - 'Name' => $name, |
|
| 327 | - 'ParentID' => (int)$chosenFolderID |
|
| 328 | - ])->first(); |
|
| 329 | - if (!($image && $image->exists())) { |
|
| 330 | - $image = Image::create(); |
|
| 331 | - $image->ParentID = (int)$chosenFolderID; |
|
| 332 | - $image->Name = $name; |
|
| 333 | - $image->write(); |
|
| 334 | - } |
|
| 335 | - |
|
| 336 | - // make sure it's put in place correctly so Image record knows where it is. |
|
| 337 | - // e.g. in the case of underscores being renamed to dashes. |
|
| 338 | - @rename(Director::getAbsFile($originalPath), Director::getAbsFile($image->getFilename())); |
|
| 339 | - |
|
| 340 | - $img->setAttribute('src', $image->getFilename()); |
|
| 341 | - } |
|
| 342 | - |
|
| 343 | - $remove_rules = [ |
|
| 344 | - // Change any headers that contain font tags (other than font face tags) into p elements |
|
| 345 | - '//h1[.//font[not(@face)]]' => 'p', |
|
| 346 | - // Remove any font tags |
|
| 347 | - '//font' |
|
| 348 | - ]; |
|
| 349 | - |
|
| 350 | - foreach ($remove_rules as $rule => $parenttag) { |
|
| 351 | - if (is_numeric($rule)) { |
|
| 352 | - $rule = $parenttag; |
|
| 353 | - $parenttag = null; |
|
| 354 | - } |
|
| 355 | - |
|
| 356 | - $nodes = []; |
|
| 357 | - foreach ($xpath->query($rule) as $node) { |
|
| 358 | - $nodes[] = $node; |
|
| 359 | - } |
|
| 360 | - |
|
| 361 | - foreach ($nodes as $node) { |
|
| 362 | - $parent = $node->parentNode; |
|
| 363 | - |
|
| 364 | - if ($parenttag) { |
|
| 365 | - $parent = $doc->createElement($parenttag); |
|
| 366 | - $node->nextSibling ? |
|
| 367 | - $node->parentNode->insertBefore($parent, $node->nextSibling) : |
|
| 368 | - $node->parentNode->appendChild($parent); |
|
| 369 | - } |
|
| 370 | - |
|
| 371 | - while ($node->firstChild) { |
|
| 372 | - $parent->appendChild($node->firstChild); |
|
| 373 | - } |
|
| 374 | - $node->parentNode->removeChild($node); |
|
| 375 | - } |
|
| 376 | - } |
|
| 377 | - |
|
| 378 | - // Strip style, class, lang attributes. |
|
| 379 | - $els = $doc->getElementsByTagName('*'); |
|
| 380 | - for ($i = 0; $i < $els->length; $i++) { |
|
| 381 | - $el = $els->item($i); |
|
| 382 | - $el->removeAttribute('class'); |
|
| 383 | - $el->removeAttribute('style'); |
|
| 384 | - $el->removeAttribute('lang'); |
|
| 385 | - } |
|
| 386 | - |
|
| 387 | - $els = $doc->getElementsByTagName('*'); |
|
| 388 | - |
|
| 389 | - $headingXPath = [ |
|
| 390 | - 'self::h1', |
|
| 391 | - 'self::h2', |
|
| 392 | - 'self::h3', |
|
| 393 | - 'self::h4', |
|
| 394 | - 'self::h5', |
|
| 395 | - 'self::h6', |
|
| 396 | - ]; |
|
| 397 | - // Remove a bunch of unwanted elements |
|
| 398 | - $clean = [ |
|
| 399 | - // Empty paragraphs |
|
| 400 | - '//p[not(descendant-or-self::text() | descendant-or-self::img)]', |
|
| 401 | - // Empty headers |
|
| 402 | - '//*[' . implode(' | ', $headingXPath) . '][not(descendant-or-self::text() | descendant-or-self::img)]', |
|
| 403 | - // Anchors |
|
| 404 | - '//a[not(@href)]', |
|
| 405 | - // BR tags |
|
| 406 | - '//br' |
|
| 407 | - ]; |
|
| 408 | - |
|
| 409 | - foreach ($clean as $query) { |
|
| 410 | - // First get all the nodes. Need to build array, as they'll disappear from the |
|
| 411 | - // nodelist while we're deleteing them, causing the indexing to screw up. |
|
| 412 | - $nodes = []; |
|
| 413 | - foreach ($xpath->query($query) as $node) { |
|
| 414 | - $nodes[] = $node; |
|
| 415 | - } |
|
| 416 | - |
|
| 417 | - // Then remove them all |
|
| 418 | - foreach ($nodes as $node) { |
|
| 419 | - if ($node->parentNode) { |
|
| 420 | - $node->parentNode->removeChild($node); |
|
| 421 | - } |
|
| 422 | - } |
|
| 423 | - } |
|
| 424 | - |
|
| 425 | - // Now split the document into portions by H1 |
|
| 426 | - $body = $doc->getElementsByTagName('body')->item(0); |
|
| 427 | - |
|
| 428 | - $this->unusedChildren = []; |
|
| 429 | - foreach ($sourcePage->Children() as $child) { |
|
| 430 | - $this->unusedChildren[$child->ID] = $child; |
|
| 431 | - } |
|
| 432 | - |
|
| 433 | - $documentImporterFieldError = false; |
|
| 434 | - |
|
| 435 | - $documentImporterFieldErrorHandler = function ( |
|
| 436 | - $errno, |
|
| 437 | - $errstr, |
|
| 438 | - $errfile, |
|
| 439 | - $errline |
|
| 440 | - ) use ($documentImporterFieldError) { |
|
| 441 | - $documentImporterFieldError = _t( |
|
| 442 | - 'SilverStripe\\DocumentConverter\\ServiceConnector.PROCESSFAILED', |
|
| 443 | - 'Could not process document, please double-check you uploaded a .doc or .docx format.', |
|
| 444 | - 'Document Converter processes Word documents into HTML.' |
|
| 445 | - ); |
|
| 446 | - |
|
| 447 | - // Do not cascade the error through other handlers |
|
| 448 | - return true; |
|
| 449 | - }; |
|
| 450 | - |
|
| 451 | - set_error_handler($documentImporterFieldErrorHandler); |
|
| 452 | - |
|
| 453 | - $subtitle = null; |
|
| 454 | - $subdoc = new DOMDocument(); |
|
| 455 | - $subnode = $subdoc->createElement('body'); |
|
| 456 | - $node = $body->firstChild; |
|
| 457 | - $sort = 1; |
|
| 458 | - if ($splitHeader == 1 || $splitHeader == 2) { |
|
| 459 | - while ($node && !$documentImporterFieldError) { |
|
| 460 | - if ($node instanceof DOMElement && $node->tagName == 'h' . $splitHeader) { |
|
| 461 | - if ($subnode->hasChildNodes()) { |
|
| 462 | - $this->writeContent($subtitle, $subdoc, $subnode, $sort, $publishPages); |
|
| 463 | - $sort++; |
|
| 464 | - } |
|
| 465 | - |
|
| 466 | - $subdoc = new DOMDocument(); |
|
| 467 | - $subnode = $subdoc->createElement('body'); |
|
| 468 | - $subtitle = trim(preg_replace('/\n|\r/', '', Convert::html2raw($node->textContent))); |
|
| 469 | - } else { |
|
| 470 | - $subnode->appendChild($subdoc->importNode($node, true)); |
|
| 471 | - } |
|
| 472 | - |
|
| 473 | - $node = $node->nextSibling; |
|
| 474 | - } |
|
| 475 | - } else { |
|
| 476 | - $this->writeContent($subtitle, $subdoc, $body, null, $publishPages); |
|
| 477 | - } |
|
| 478 | - |
|
| 479 | - if ($subnode->hasChildNodes() && !$documentImporterFieldError) { |
|
| 480 | - $this->writeContent($subtitle, $subdoc, $subnode, null, $publishPages); |
|
| 481 | - } |
|
| 482 | - |
|
| 483 | - restore_error_handler(); |
|
| 484 | - if ($documentImporterFieldError) { |
|
| 485 | - return ['error' => $documentImporterFieldError]; |
|
| 486 | - } |
|
| 487 | - |
|
| 488 | - foreach ($this->unusedChildren as $child) { |
|
| 489 | - $origStage = Versioned::current_stage(); |
|
| 490 | - |
|
| 491 | - Versioned::set_stage(Versioned::DRAFT); |
|
| 492 | - $draft = clone $child; |
|
| 493 | - $draft->delete(); |
|
| 494 | - |
|
| 495 | - Versioned::set_stage(Versioned::LIVE); |
|
| 496 | - $published = clone $child; |
|
| 497 | - $published->delete(); |
|
| 498 | - |
|
| 499 | - Versioned::set_stage($origStage); |
|
| 500 | - } |
|
| 501 | - |
|
| 502 | - $sourcePage->write(); |
|
| 503 | - } |
|
| 57 | + private static $allowed_actions = ['upload']; |
|
| 58 | + |
|
| 59 | + private static $importer_class = ServiceConnector::class; |
|
| 60 | + |
|
| 61 | + /** |
|
| 62 | + * Process the document immediately upon upload. |
|
| 63 | + */ |
|
| 64 | + public function upload(HTTPRequest $request) |
|
| 65 | + { |
|
| 66 | + if ($this->isDisabled() || $this->isReadonly()) { |
|
| 67 | + return $this->httpError(403); |
|
| 68 | + } |
|
| 69 | + |
|
| 70 | + // Protect against CSRF on destructive action |
|
| 71 | + $token = $this->getForm()->getSecurityToken(); |
|
| 72 | + if (!$token->checkRequest($request)) { |
|
| 73 | + return $this->httpError(400); |
|
| 74 | + } |
|
| 75 | + |
|
| 76 | + $tmpfile = $request->postVar('Upload'); |
|
| 77 | + |
|
| 78 | + // Check if the file has been uploaded into the temporary storage. |
|
| 79 | + if (!$tmpfile) { |
|
| 80 | + $return = [ |
|
| 81 | + 'error' => _t( |
|
| 82 | + 'SilverStripe\\AssetAdmin\\Forms\\UploadField.FIELDNOTSET', |
|
| 83 | + 'File information not found' |
|
| 84 | + ) |
|
| 85 | + ]; |
|
| 86 | + } else { |
|
| 87 | + $return = [ |
|
| 88 | + 'name' => $tmpfile['name'], |
|
| 89 | + 'size' => $tmpfile['size'], |
|
| 90 | + 'type' => $tmpfile['type'], |
|
| 91 | + 'error' => $tmpfile['error'] |
|
| 92 | + ]; |
|
| 93 | + } |
|
| 94 | + |
|
| 95 | + if (!$return['error']) { |
|
| 96 | + // Get options for this import. |
|
| 97 | + $splitHeader = (int)$request->postVar('SplitHeader'); |
|
| 98 | + $keepSource = (bool)$request->postVar('KeepSource'); |
|
| 99 | + $chosenFolderID = (int)$request->postVar('ChosenFolderID'); |
|
| 100 | + $publishPages = (bool)$request->postVar('PublishPages'); |
|
| 101 | + $includeTOC = (bool)$request->postVar('IncludeTOC'); |
|
| 102 | + |
|
| 103 | + // Process the document and write the page. |
|
| 104 | + $preservedDocument = null; |
|
| 105 | + if ($keepSource) { |
|
| 106 | + $preservedDocument = $this->preserveSourceDocument($tmpfile, $chosenFolderID); |
|
| 107 | + } |
|
| 108 | + |
|
| 109 | + $importResult = $this->importFromPOST($tmpfile, $splitHeader, $publishPages, $chosenFolderID); |
|
| 110 | + if (is_array($importResult) && isset($importResult['error'])) { |
|
| 111 | + $return['error'] = $importResult['error']; |
|
| 112 | + } elseif ($includeTOC) { |
|
| 113 | + $this->writeTOC($publishPages, $keepSource ? $preservedDocument : null); |
|
| 114 | + } |
|
| 115 | + } |
|
| 116 | + |
|
| 117 | + $response = HTTPResponse::create(Convert::raw2json([$return])); |
|
| 118 | + $response->addHeader('Content-Type', 'application/json'); |
|
| 119 | + return $response; |
|
| 120 | + } |
|
| 121 | + |
|
| 122 | + /** |
|
| 123 | + * Preserves the source file by copying it to a specified folder. |
|
| 124 | + * |
|
| 125 | + * @param $tmpfile Temporary file data structure. |
|
| 126 | + * @param int $chosenFolderID Target folder. |
|
| 127 | + * @return File Stored file. |
|
| 128 | + */ |
|
| 129 | + protected function preserveSourceDocument($tmpfile, $chosenFolderID = null) |
|
| 130 | + { |
|
| 131 | + $upload = Upload::create(); |
|
| 132 | + |
|
| 133 | + $file = File::create(); |
|
| 134 | + $upload->loadIntoFile($tmpfile, $file, $chosenFolderID); |
|
| 135 | + |
|
| 136 | + $page = $this->form->getRecord(); |
|
| 137 | + $page->ImportedFromFileID = $file->ID; |
|
| 138 | + $page->write(); |
|
| 139 | + |
|
| 140 | + return $file; |
|
| 141 | + } |
|
| 142 | + |
|
| 143 | + /** |
|
| 144 | + * Builds and writes the table of contents for the document. |
|
| 145 | + * |
|
| 146 | + * @param bool $publishPage Should the parent page be published. |
|
| 147 | + * @param File $preservedDocument Set if the link to the original document should be added. |
|
| 148 | + */ |
|
| 149 | + protected function writeTOC($publishPages = false, $preservedDocument = null) |
|
| 150 | + { |
|
| 151 | + $page = $this->form->getRecord(); |
|
| 152 | + $content = '<ul>'; |
|
| 153 | + |
|
| 154 | + if ($page) { |
|
| 155 | + if ($page->Children()->Count() > 0) { |
|
| 156 | + foreach ($page->Children() as $child) { |
|
| 157 | + $content .= '<li><a href="' . $child->Link() . '">' . $child->Title . '</a></li>'; |
|
| 158 | + } |
|
| 159 | + $page->Content = $content . '</ul>'; |
|
| 160 | + } else { |
|
| 161 | + $doc = new DOMDocument(); |
|
| 162 | + $doc->loadHTML($page->Content); |
|
| 163 | + $body = $doc->getElementsByTagName('body')->item(0); |
|
| 164 | + $node = $body->firstChild; |
|
| 165 | + $h1 = $h2 = 1; |
|
| 166 | + while ($node) { |
|
| 167 | + if ($node instanceof DOMElement && $node->tagName == 'h1') { |
|
| 168 | + $content .= '<li><a href="#h1.' . $h1 . '">' . |
|
| 169 | + trim(preg_replace('/\n|\r/', '', Convert::html2raw($node->textContent))) . |
|
| 170 | + '</a></li>'; |
|
| 171 | + $node->setAttributeNode(new DOMAttr("id", "h1.".$h1)); |
|
| 172 | + $h1++; |
|
| 173 | + } elseif ($node instanceof DOMElement && $node->tagName == 'h2') { |
|
| 174 | + $content .= '<li class="menu-h2"><a href="#h2.' . $h2 . '">' . |
|
| 175 | + trim(preg_replace('/\n|\r/', '', Convert::html2raw($node->textContent))) . |
|
| 176 | + '</a></li>'; |
|
| 177 | + $node->setAttributeNode(new DOMAttr("id", "h2.".$h2)); |
|
| 178 | + $h2++; |
|
| 179 | + } |
|
| 180 | + $node = $node->nextSibling; |
|
| 181 | + } |
|
| 182 | + $page->Content = $content . '</ul>' . $doc->saveHTML(); |
|
| 183 | + } |
|
| 184 | + |
|
| 185 | + // Add in the link to the original document, if provided. |
|
| 186 | + if ($preservedDocument) { |
|
| 187 | + $page->Content = '<a href="' . |
|
| 188 | + $preservedDocument->Link() . |
|
| 189 | + '" title="download original document">download original document (' . |
|
| 190 | + $preservedDocument->getSize() . |
|
| 191 | + ')</a>' . |
|
| 192 | + $page->Content; |
|
| 193 | + } |
|
| 194 | + |
|
| 195 | + // Store the result |
|
| 196 | + $page->write(); |
|
| 197 | + if ($publishPages) { |
|
| 198 | + $page->publishRecursive(); |
|
| 199 | + } |
|
| 200 | + } |
|
| 201 | + } |
|
| 202 | + |
|
| 203 | + protected function getBodyText($doc, $node) |
|
| 204 | + { |
|
| 205 | + // Build a new doc |
|
| 206 | + $htmldoc = new DOMDocument(); |
|
| 207 | + // Create the html element |
|
| 208 | + $html = $htmldoc->createElement('html'); |
|
| 209 | + $htmldoc->appendChild($html); |
|
| 210 | + // Append the body node |
|
| 211 | + $html->appendChild($htmldoc->importNode($node, true)); |
|
| 212 | + |
|
| 213 | + // Get the text as html, remove the entry and exit root tags and return |
|
| 214 | + $text = $htmldoc->saveHTML(); |
|
| 215 | + $text = preg_replace('/^.*<body>/', '', $text); |
|
| 216 | + $text = preg_replace('/<\/body>.*$/', '', $text); |
|
| 217 | + |
|
| 218 | + return $text; |
|
| 219 | + } |
|
| 220 | + |
|
| 221 | + /** |
|
| 222 | + * Used only when writing the document that has been split by headers. |
|
| 223 | + * Can write both to the chapter pages as well as the master page. |
|
| 224 | + * |
|
| 225 | + * @param string $subtitle Title of the chapter - if missing, it will write to the master page. |
|
| 226 | + * @param $subdoc |
|
| 227 | + * @param $subnode |
|
| 228 | + * @param int $sort Order of the chapter page. |
|
| 229 | + * @param $publishPages Whether to publish the resulting child/master pages. |
|
| 230 | + */ |
|
| 231 | + protected function writeContent($subtitle, $subdoc, $subnode, $sort = null, $publishPages = false) |
|
| 232 | + { |
|
| 233 | + $record = $this->form->getRecord(); |
|
| 234 | + |
|
| 235 | + if ($subtitle) { |
|
| 236 | + // Write the chapter page to a subpage. |
|
| 237 | + $page = DataObject::get_one( |
|
| 238 | + 'Page', |
|
| 239 | + sprintf('"Title" = \'%s\' AND "ParentID" = %d', $subtitle, $record->ID) |
|
| 240 | + ); |
|
| 241 | + if (!$page) { |
|
| 242 | + $page = Page::create(); |
|
| 243 | + $page->ParentID = $record->ID; |
|
| 244 | + $page->Title = $subtitle; |
|
| 245 | + } |
|
| 246 | + |
|
| 247 | + unset($this->unusedChildren[$page->ID]); |
|
| 248 | + file_put_contents(ASSETS_PATH . '/index-' . $sort . '.html', $this->getBodyText($subdoc, $subnode)); |
|
| 249 | + |
|
| 250 | + if ($sort) { |
|
| 251 | + $page->Sort = $sort; |
|
| 252 | + } |
|
| 253 | + $page->Content = $this->getBodyText($subdoc, $subnode); |
|
| 254 | + $page->write(); |
|
| 255 | + if ($publishPages) { |
|
| 256 | + $page->publishRecursive(); |
|
| 257 | + } |
|
| 258 | + } else { |
|
| 259 | + // Write to the master page. |
|
| 260 | + $record->Content = $this->getBodyText($subdoc, $subnode); |
|
| 261 | + $record->write(); |
|
| 262 | + |
|
| 263 | + if ($publishPages) { |
|
| 264 | + $record->publishRecursive(); |
|
| 265 | + } |
|
| 266 | + } |
|
| 267 | + } |
|
| 268 | + |
|
| 269 | + /** |
|
| 270 | + * Imports a document at a certain path onto the current page and writes it. |
|
| 271 | + * CAUTION: Overwrites any existing content on the page! |
|
| 272 | + * |
|
| 273 | + * @param array $tmpFile Array as received from PHP's POST upload. |
|
| 274 | + * @param bool $splitHeader Heading level to split by. |
|
| 275 | + * @param bool $publishPages Whether the underlying pages should be published after import. |
|
| 276 | + * @param int $chosenFolderID ID of the working folder - here the converted file and images will be stored. |
|
| 277 | + */ |
|
| 278 | + public function importFromPOST($tmpFile, $splitHeader = false, $publishPages = false, $chosenFolderID = null) |
|
| 279 | + { |
|
| 280 | + |
|
| 281 | + $fileDescriptor = [ |
|
| 282 | + 'name' => $tmpFile['name'], |
|
| 283 | + 'path' => $tmpFile['tmp_name'], |
|
| 284 | + 'mimeType' => $tmpFile['type'] |
|
| 285 | + ]; |
|
| 286 | + |
|
| 287 | + $sourcePage = $this->form->getRecord(); |
|
| 288 | + $importerClass = $this->config()->get('importer_class'); |
|
| 289 | + $importer = Injector::inst()->create($importerClass, $fileDescriptor, $chosenFolderID); |
|
| 290 | + $content = $importer->import(); |
|
| 291 | + |
|
| 292 | + if (is_array($content) && isset($content['error'])) { |
|
| 293 | + return $content; |
|
| 294 | + } |
|
| 295 | + |
|
| 296 | + // Clean up with tidy (requires tidy module) |
|
| 297 | + $tidy = new Tidy(); |
|
| 298 | + $tidy->parseString($content, ['output-xhtml' => true], 'utf8'); |
|
| 299 | + $tidy->cleanRepair(); |
|
| 300 | + |
|
| 301 | + $fragment = []; |
|
| 302 | + foreach ($tidy->body()->child as $child) { |
|
| 303 | + $fragment[] = $child->value; |
|
| 304 | + } |
|
| 305 | + |
|
| 306 | + $htmlValue = Injector::inst()->create(HTMLValue::class, implode("\n", $fragment)); |
|
| 307 | + |
|
| 308 | + // Sanitise |
|
| 309 | + $santiser = Injector::inst()->create(HTMLEditorSanitiser::class, HTMLEditorConfig::get_active()); |
|
| 310 | + $santiser->sanitise($htmlValue); |
|
| 311 | + |
|
| 312 | + // Load in the HTML |
|
| 313 | + $doc = $htmlValue->getDocument(); |
|
| 314 | + $xpath = new DOMXPath($doc); |
|
| 315 | + |
|
| 316 | + // make sure any images are added as Image records with a relative link to assets |
|
| 317 | + $chosenFolder = ($this->chosenFolderID) ? DataObject::get_by_id(Folder::class, $this->chosenFolderID) : null; |
|
| 318 | + $folderName = ($chosenFolder) ? '/' . $chosenFolder->Name : ''; |
|
| 319 | + $imgs = $xpath->query('//img'); |
|
| 320 | + for ($i = 0; $i < $imgs->length; $i++) { |
|
| 321 | + $img = $imgs->item($i); |
|
| 322 | + $originalPath = 'assets/' . $folderName . '/' . $img->getAttribute('src'); |
|
| 323 | + $name = FileNameFilter::create()->filter(basename($originalPath)); |
|
| 324 | + |
|
| 325 | + $image = Image::get()->filter([ |
|
| 326 | + 'Name' => $name, |
|
| 327 | + 'ParentID' => (int)$chosenFolderID |
|
| 328 | + ])->first(); |
|
| 329 | + if (!($image && $image->exists())) { |
|
| 330 | + $image = Image::create(); |
|
| 331 | + $image->ParentID = (int)$chosenFolderID; |
|
| 332 | + $image->Name = $name; |
|
| 333 | + $image->write(); |
|
| 334 | + } |
|
| 335 | + |
|
| 336 | + // make sure it's put in place correctly so Image record knows where it is. |
|
| 337 | + // e.g. in the case of underscores being renamed to dashes. |
|
| 338 | + @rename(Director::getAbsFile($originalPath), Director::getAbsFile($image->getFilename())); |
|
| 339 | + |
|
| 340 | + $img->setAttribute('src', $image->getFilename()); |
|
| 341 | + } |
|
| 342 | + |
|
| 343 | + $remove_rules = [ |
|
| 344 | + // Change any headers that contain font tags (other than font face tags) into p elements |
|
| 345 | + '//h1[.//font[not(@face)]]' => 'p', |
|
| 346 | + // Remove any font tags |
|
| 347 | + '//font' |
|
| 348 | + ]; |
|
| 349 | + |
|
| 350 | + foreach ($remove_rules as $rule => $parenttag) { |
|
| 351 | + if (is_numeric($rule)) { |
|
| 352 | + $rule = $parenttag; |
|
| 353 | + $parenttag = null; |
|
| 354 | + } |
|
| 355 | + |
|
| 356 | + $nodes = []; |
|
| 357 | + foreach ($xpath->query($rule) as $node) { |
|
| 358 | + $nodes[] = $node; |
|
| 359 | + } |
|
| 360 | + |
|
| 361 | + foreach ($nodes as $node) { |
|
| 362 | + $parent = $node->parentNode; |
|
| 363 | + |
|
| 364 | + if ($parenttag) { |
|
| 365 | + $parent = $doc->createElement($parenttag); |
|
| 366 | + $node->nextSibling ? |
|
| 367 | + $node->parentNode->insertBefore($parent, $node->nextSibling) : |
|
| 368 | + $node->parentNode->appendChild($parent); |
|
| 369 | + } |
|
| 370 | + |
|
| 371 | + while ($node->firstChild) { |
|
| 372 | + $parent->appendChild($node->firstChild); |
|
| 373 | + } |
|
| 374 | + $node->parentNode->removeChild($node); |
|
| 375 | + } |
|
| 376 | + } |
|
| 377 | + |
|
| 378 | + // Strip style, class, lang attributes. |
|
| 379 | + $els = $doc->getElementsByTagName('*'); |
|
| 380 | + for ($i = 0; $i < $els->length; $i++) { |
|
| 381 | + $el = $els->item($i); |
|
| 382 | + $el->removeAttribute('class'); |
|
| 383 | + $el->removeAttribute('style'); |
|
| 384 | + $el->removeAttribute('lang'); |
|
| 385 | + } |
|
| 386 | + |
|
| 387 | + $els = $doc->getElementsByTagName('*'); |
|
| 388 | + |
|
| 389 | + $headingXPath = [ |
|
| 390 | + 'self::h1', |
|
| 391 | + 'self::h2', |
|
| 392 | + 'self::h3', |
|
| 393 | + 'self::h4', |
|
| 394 | + 'self::h5', |
|
| 395 | + 'self::h6', |
|
| 396 | + ]; |
|
| 397 | + // Remove a bunch of unwanted elements |
|
| 398 | + $clean = [ |
|
| 399 | + // Empty paragraphs |
|
| 400 | + '//p[not(descendant-or-self::text() | descendant-or-self::img)]', |
|
| 401 | + // Empty headers |
|
| 402 | + '//*[' . implode(' | ', $headingXPath) . '][not(descendant-or-self::text() | descendant-or-self::img)]', |
|
| 403 | + // Anchors |
|
| 404 | + '//a[not(@href)]', |
|
| 405 | + // BR tags |
|
| 406 | + '//br' |
|
| 407 | + ]; |
|
| 408 | + |
|
| 409 | + foreach ($clean as $query) { |
|
| 410 | + // First get all the nodes. Need to build array, as they'll disappear from the |
|
| 411 | + // nodelist while we're deleteing them, causing the indexing to screw up. |
|
| 412 | + $nodes = []; |
|
| 413 | + foreach ($xpath->query($query) as $node) { |
|
| 414 | + $nodes[] = $node; |
|
| 415 | + } |
|
| 416 | + |
|
| 417 | + // Then remove them all |
|
| 418 | + foreach ($nodes as $node) { |
|
| 419 | + if ($node->parentNode) { |
|
| 420 | + $node->parentNode->removeChild($node); |
|
| 421 | + } |
|
| 422 | + } |
|
| 423 | + } |
|
| 424 | + |
|
| 425 | + // Now split the document into portions by H1 |
|
| 426 | + $body = $doc->getElementsByTagName('body')->item(0); |
|
| 427 | + |
|
| 428 | + $this->unusedChildren = []; |
|
| 429 | + foreach ($sourcePage->Children() as $child) { |
|
| 430 | + $this->unusedChildren[$child->ID] = $child; |
|
| 431 | + } |
|
| 432 | + |
|
| 433 | + $documentImporterFieldError = false; |
|
| 434 | + |
|
| 435 | + $documentImporterFieldErrorHandler = function ( |
|
| 436 | + $errno, |
|
| 437 | + $errstr, |
|
| 438 | + $errfile, |
|
| 439 | + $errline |
|
| 440 | + ) use ($documentImporterFieldError) { |
|
| 441 | + $documentImporterFieldError = _t( |
|
| 442 | + 'SilverStripe\\DocumentConverter\\ServiceConnector.PROCESSFAILED', |
|
| 443 | + 'Could not process document, please double-check you uploaded a .doc or .docx format.', |
|
| 444 | + 'Document Converter processes Word documents into HTML.' |
|
| 445 | + ); |
|
| 446 | + |
|
| 447 | + // Do not cascade the error through other handlers |
|
| 448 | + return true; |
|
| 449 | + }; |
|
| 450 | + |
|
| 451 | + set_error_handler($documentImporterFieldErrorHandler); |
|
| 452 | + |
|
| 453 | + $subtitle = null; |
|
| 454 | + $subdoc = new DOMDocument(); |
|
| 455 | + $subnode = $subdoc->createElement('body'); |
|
| 456 | + $node = $body->firstChild; |
|
| 457 | + $sort = 1; |
|
| 458 | + if ($splitHeader == 1 || $splitHeader == 2) { |
|
| 459 | + while ($node && !$documentImporterFieldError) { |
|
| 460 | + if ($node instanceof DOMElement && $node->tagName == 'h' . $splitHeader) { |
|
| 461 | + if ($subnode->hasChildNodes()) { |
|
| 462 | + $this->writeContent($subtitle, $subdoc, $subnode, $sort, $publishPages); |
|
| 463 | + $sort++; |
|
| 464 | + } |
|
| 465 | + |
|
| 466 | + $subdoc = new DOMDocument(); |
|
| 467 | + $subnode = $subdoc->createElement('body'); |
|
| 468 | + $subtitle = trim(preg_replace('/\n|\r/', '', Convert::html2raw($node->textContent))); |
|
| 469 | + } else { |
|
| 470 | + $subnode->appendChild($subdoc->importNode($node, true)); |
|
| 471 | + } |
|
| 472 | + |
|
| 473 | + $node = $node->nextSibling; |
|
| 474 | + } |
|
| 475 | + } else { |
|
| 476 | + $this->writeContent($subtitle, $subdoc, $body, null, $publishPages); |
|
| 477 | + } |
|
| 478 | + |
|
| 479 | + if ($subnode->hasChildNodes() && !$documentImporterFieldError) { |
|
| 480 | + $this->writeContent($subtitle, $subdoc, $subnode, null, $publishPages); |
|
| 481 | + } |
|
| 482 | + |
|
| 483 | + restore_error_handler(); |
|
| 484 | + if ($documentImporterFieldError) { |
|
| 485 | + return ['error' => $documentImporterFieldError]; |
|
| 486 | + } |
|
| 487 | + |
|
| 488 | + foreach ($this->unusedChildren as $child) { |
|
| 489 | + $origStage = Versioned::current_stage(); |
|
| 490 | + |
|
| 491 | + Versioned::set_stage(Versioned::DRAFT); |
|
| 492 | + $draft = clone $child; |
|
| 493 | + $draft->delete(); |
|
| 494 | + |
|
| 495 | + Versioned::set_stage(Versioned::LIVE); |
|
| 496 | + $published = clone $child; |
|
| 497 | + $published->delete(); |
|
| 498 | + |
|
| 499 | + Versioned::set_stage($origStage); |
|
| 500 | + } |
|
| 501 | + |
|
| 502 | + $sourcePage->write(); |
|
| 503 | + } |
|
| 504 | 504 | } |
@@ -16,182 +16,182 @@ |
||
| 16 | 16 | class ServiceConnector |
| 17 | 17 | { |
| 18 | 18 | |
| 19 | - use Configurable; |
|
| 20 | - use Injectable; |
|
| 21 | - |
|
| 22 | - /** |
|
| 23 | - * @config |
|
| 24 | - * @var array Docvert connection username |
|
| 25 | - */ |
|
| 26 | - private static $username = null; |
|
| 27 | - |
|
| 28 | - /** |
|
| 29 | - * @config |
|
| 30 | - * @var array Docvert connection password |
|
| 31 | - */ |
|
| 32 | - private static $password = null; |
|
| 33 | - |
|
| 34 | - /** |
|
| 35 | - * @config |
|
| 36 | - * @var array Docvert service URL |
|
| 37 | - */ |
|
| 38 | - private static $url = null; |
|
| 39 | - |
|
| 40 | - /** |
|
| 41 | - * Associative array of: |
|
| 42 | - * - name: the full name of the file including the extension. |
|
| 43 | - * - path: the path to the file on the local filesystem. |
|
| 44 | - * - mimeType |
|
| 45 | - */ |
|
| 46 | - protected $fileDescriptor; |
|
| 47 | - |
|
| 48 | - /** |
|
| 49 | - * @var int |
|
| 50 | - * ID of a SilverStripe\Assets\Folder |
|
| 51 | - */ |
|
| 52 | - protected $chosenFolderID; |
|
| 53 | - |
|
| 54 | - /** |
|
| 55 | - * @var array instance specific connection details |
|
| 56 | - */ |
|
| 57 | - protected $docvertDetails = [ |
|
| 58 | - 'username' => null, |
|
| 59 | - 'password' => null, |
|
| 60 | - 'url' => null |
|
| 61 | - ]; |
|
| 62 | - |
|
| 63 | - public function __construct($fileDescriptor, $chosenFolderID = null) |
|
| 64 | - { |
|
| 65 | - $this->fileDescriptor = $fileDescriptor; |
|
| 66 | - $this->chosenFolderID = $chosenFolderID; |
|
| 67 | - } |
|
| 68 | - |
|
| 69 | - |
|
| 70 | - /** |
|
| 71 | - * Retrieves detail in priority order from |
|
| 72 | - * 1. local instance field |
|
| 73 | - * 2. Config |
|
| 74 | - * 3. Environment |
|
| 75 | - * |
|
| 76 | - * @param string $detail key name for detail |
|
| 77 | - * @return string the value for that key |
|
| 78 | - */ |
|
| 79 | - protected function getDetail($detail) |
|
| 80 | - { |
|
| 81 | - $fromDetails = $this->docvertDetails[$detail]; |
|
| 82 | - if ($fromDetails) { |
|
| 83 | - return $fromDetails; |
|
| 84 | - } |
|
| 19 | + use Configurable; |
|
| 20 | + use Injectable; |
|
| 21 | + |
|
| 22 | + /** |
|
| 23 | + * @config |
|
| 24 | + * @var array Docvert connection username |
|
| 25 | + */ |
|
| 26 | + private static $username = null; |
|
| 27 | + |
|
| 28 | + /** |
|
| 29 | + * @config |
|
| 30 | + * @var array Docvert connection password |
|
| 31 | + */ |
|
| 32 | + private static $password = null; |
|
| 33 | + |
|
| 34 | + /** |
|
| 35 | + * @config |
|
| 36 | + * @var array Docvert service URL |
|
| 37 | + */ |
|
| 38 | + private static $url = null; |
|
| 39 | + |
|
| 40 | + /** |
|
| 41 | + * Associative array of: |
|
| 42 | + * - name: the full name of the file including the extension. |
|
| 43 | + * - path: the path to the file on the local filesystem. |
|
| 44 | + * - mimeType |
|
| 45 | + */ |
|
| 46 | + protected $fileDescriptor; |
|
| 47 | + |
|
| 48 | + /** |
|
| 49 | + * @var int |
|
| 50 | + * ID of a SilverStripe\Assets\Folder |
|
| 51 | + */ |
|
| 52 | + protected $chosenFolderID; |
|
| 53 | + |
|
| 54 | + /** |
|
| 55 | + * @var array instance specific connection details |
|
| 56 | + */ |
|
| 57 | + protected $docvertDetails = [ |
|
| 58 | + 'username' => null, |
|
| 59 | + 'password' => null, |
|
| 60 | + 'url' => null |
|
| 61 | + ]; |
|
| 62 | + |
|
| 63 | + public function __construct($fileDescriptor, $chosenFolderID = null) |
|
| 64 | + { |
|
| 65 | + $this->fileDescriptor = $fileDescriptor; |
|
| 66 | + $this->chosenFolderID = $chosenFolderID; |
|
| 67 | + } |
|
| 68 | + |
|
| 69 | + |
|
| 70 | + /** |
|
| 71 | + * Retrieves detail in priority order from |
|
| 72 | + * 1. local instance field |
|
| 73 | + * 2. Config |
|
| 74 | + * 3. Environment |
|
| 75 | + * |
|
| 76 | + * @param string $detail key name for detail |
|
| 77 | + * @return string the value for that key |
|
| 78 | + */ |
|
| 79 | + protected function getDetail($detail) |
|
| 80 | + { |
|
| 81 | + $fromDetails = $this->docvertDetails[$detail]; |
|
| 82 | + if ($fromDetails) { |
|
| 83 | + return $fromDetails; |
|
| 84 | + } |
|
| 85 | 85 | |
| 86 | - $fromConfig = $this->config()->get($detail); |
|
| 87 | - if ($fromConfig) { |
|
| 88 | - return $fromConfig; |
|
| 89 | - } |
|
| 90 | - |
|
| 91 | - $fromEnv = Environment::getEnv('DOCVERT_' . strtoupper($detail)); |
|
| 92 | - if ($fromEnv) { |
|
| 93 | - return $fromEnv; |
|
| 94 | - } |
|
| 95 | - } |
|
| 96 | - |
|
| 97 | - public function setUsername($username = null) |
|
| 98 | - { |
|
| 99 | - $this->docvertDetails['username'] = $username; |
|
| 100 | - return $this; |
|
| 101 | - } |
|
| 102 | - |
|
| 103 | - public function getUsername() |
|
| 104 | - { |
|
| 105 | - return $this->getDetail('username'); |
|
| 106 | - } |
|
| 107 | - |
|
| 108 | - public function setPassword($password = null) |
|
| 109 | - { |
|
| 110 | - $this->docvertDetails['password'] = $password; |
|
| 111 | - return $this; |
|
| 112 | - } |
|
| 113 | - |
|
| 114 | - public function getPassword() |
|
| 115 | - { |
|
| 116 | - return $this->getDetail('password'); |
|
| 117 | - } |
|
| 118 | - |
|
| 119 | - public function setUrl($url = null) |
|
| 120 | - { |
|
| 121 | - $this->docvertDetails['url'] = $url; |
|
| 122 | - return $this; |
|
| 123 | - } |
|
| 124 | - |
|
| 125 | - public function getUrl() |
|
| 126 | - { |
|
| 127 | - return $this->getDetail('url'); |
|
| 128 | - } |
|
| 129 | - |
|
| 130 | - public function import() |
|
| 131 | - { |
|
| 132 | - $ch = curl_init(); |
|
| 133 | - |
|
| 134 | - $file = new CURLFile( |
|
| 135 | - $this->fileDescriptor['path'], |
|
| 136 | - $this->fileDescriptor['mimeType'], |
|
| 137 | - $this->fileDescriptor['name'] |
|
| 138 | - ); |
|
| 139 | - |
|
| 140 | - curl_setopt_array($ch, [ |
|
| 141 | - CURLOPT_URL => $this->getUrl(), |
|
| 142 | - CURLOPT_USERPWD => sprintf('%s:%s', $this->getUsername(), $this->getPassword()), |
|
| 143 | - CURLOPT_POST => 1, |
|
| 144 | - CURLOPT_POSTFIELDS => ['file' => $file], |
|
| 145 | - CURLOPT_CONNECTTIMEOUT => 25, |
|
| 146 | - CURLOPT_TIMEOUT => 100, |
|
| 147 | - ]); |
|
| 148 | - |
|
| 149 | - $chosenFolder = ($this->chosenFolderID) ? DataObject::get_by_id(Folder::class, $this->chosenFolderID) : null; |
|
| 150 | - $folderName = ($chosenFolder) ? '/' . $chosenFolder->Name : ''; |
|
| 151 | - $outname = tempnam(ASSETS_PATH, 'convert'); |
|
| 152 | - $outzip = $outname . '.zip'; |
|
| 153 | - $out = fopen($outzip, 'w'); |
|
| 154 | - curl_setopt($ch, CURLOPT_FILE, $out); |
|
| 155 | - $returnValue = curl_exec($ch); |
|
| 156 | - $status = curl_getinfo($ch, CURLINFO_HTTP_CODE); |
|
| 157 | - curl_close($ch); |
|
| 158 | - fclose($out); |
|
| 159 | - chmod($outzip, 0666); |
|
| 160 | - |
|
| 161 | - if (!$returnValue || ($status != 200)) { |
|
| 162 | - return ['error' => _t( |
|
| 163 | - __CLASS__ . '.SERVERUNREACHABLE', |
|
| 164 | - 'Could not contact document conversion server. Please try again later ' . |
|
| 165 | - 'or contact your system administrator.', |
|
| 166 | - 'Document Converter process Word documents into HTML.' |
|
| 167 | - )]; |
|
| 168 | - } |
|
| 169 | - |
|
| 170 | - // extract the converted document into assets |
|
| 171 | - // you need php zip, e.g. apt-get install php-zip |
|
| 172 | - $zip = new ZipArchive(); |
|
| 173 | - |
|
| 174 | - if ($zip->open($outzip)) { |
|
| 175 | - $zip->extractTo(ASSETS_PATH .$folderName); |
|
| 176 | - $zip->close(); |
|
| 177 | - } |
|
| 178 | - |
|
| 179 | - // remove temporary files |
|
| 180 | - unlink($outname); |
|
| 181 | - unlink($outzip); |
|
| 182 | - |
|
| 183 | - if (!file_exists(ASSETS_PATH . $folderName . '/index.html')) { |
|
| 184 | - return ['error' => _t( |
|
| 185 | - __CLASS__ . '.PROCESSFAILED', |
|
| 186 | - 'Could not process document, please double-check you uploaded a .doc or .docx format.', |
|
| 187 | - 'Document Converter processes Word documents into HTML.' |
|
| 188 | - )]; |
|
| 189 | - } |
|
| 190 | - |
|
| 191 | - $content = file_get_contents(ASSETS_PATH . $folderName . '/index.html'); |
|
| 192 | - |
|
| 193 | - unlink(ASSETS_PATH . $folderName . '/index.html'); |
|
| 194 | - |
|
| 195 | - return $content; |
|
| 196 | - } |
|
| 86 | + $fromConfig = $this->config()->get($detail); |
|
| 87 | + if ($fromConfig) { |
|
| 88 | + return $fromConfig; |
|
| 89 | + } |
|
| 90 | + |
|
| 91 | + $fromEnv = Environment::getEnv('DOCVERT_' . strtoupper($detail)); |
|
| 92 | + if ($fromEnv) { |
|
| 93 | + return $fromEnv; |
|
| 94 | + } |
|
| 95 | + } |
|
| 96 | + |
|
| 97 | + public function setUsername($username = null) |
|
| 98 | + { |
|
| 99 | + $this->docvertDetails['username'] = $username; |
|
| 100 | + return $this; |
|
| 101 | + } |
|
| 102 | + |
|
| 103 | + public function getUsername() |
|
| 104 | + { |
|
| 105 | + return $this->getDetail('username'); |
|
| 106 | + } |
|
| 107 | + |
|
| 108 | + public function setPassword($password = null) |
|
| 109 | + { |
|
| 110 | + $this->docvertDetails['password'] = $password; |
|
| 111 | + return $this; |
|
| 112 | + } |
|
| 113 | + |
|
| 114 | + public function getPassword() |
|
| 115 | + { |
|
| 116 | + return $this->getDetail('password'); |
|
| 117 | + } |
|
| 118 | + |
|
| 119 | + public function setUrl($url = null) |
|
| 120 | + { |
|
| 121 | + $this->docvertDetails['url'] = $url; |
|
| 122 | + return $this; |
|
| 123 | + } |
|
| 124 | + |
|
| 125 | + public function getUrl() |
|
| 126 | + { |
|
| 127 | + return $this->getDetail('url'); |
|
| 128 | + } |
|
| 129 | + |
|
| 130 | + public function import() |
|
| 131 | + { |
|
| 132 | + $ch = curl_init(); |
|
| 133 | + |
|
| 134 | + $file = new CURLFile( |
|
| 135 | + $this->fileDescriptor['path'], |
|
| 136 | + $this->fileDescriptor['mimeType'], |
|
| 137 | + $this->fileDescriptor['name'] |
|
| 138 | + ); |
|
| 139 | + |
|
| 140 | + curl_setopt_array($ch, [ |
|
| 141 | + CURLOPT_URL => $this->getUrl(), |
|
| 142 | + CURLOPT_USERPWD => sprintf('%s:%s', $this->getUsername(), $this->getPassword()), |
|
| 143 | + CURLOPT_POST => 1, |
|
| 144 | + CURLOPT_POSTFIELDS => ['file' => $file], |
|
| 145 | + CURLOPT_CONNECTTIMEOUT => 25, |
|
| 146 | + CURLOPT_TIMEOUT => 100, |
|
| 147 | + ]); |
|
| 148 | + |
|
| 149 | + $chosenFolder = ($this->chosenFolderID) ? DataObject::get_by_id(Folder::class, $this->chosenFolderID) : null; |
|
| 150 | + $folderName = ($chosenFolder) ? '/' . $chosenFolder->Name : ''; |
|
| 151 | + $outname = tempnam(ASSETS_PATH, 'convert'); |
|
| 152 | + $outzip = $outname . '.zip'; |
|
| 153 | + $out = fopen($outzip, 'w'); |
|
| 154 | + curl_setopt($ch, CURLOPT_FILE, $out); |
|
| 155 | + $returnValue = curl_exec($ch); |
|
| 156 | + $status = curl_getinfo($ch, CURLINFO_HTTP_CODE); |
|
| 157 | + curl_close($ch); |
|
| 158 | + fclose($out); |
|
| 159 | + chmod($outzip, 0666); |
|
| 160 | + |
|
| 161 | + if (!$returnValue || ($status != 200)) { |
|
| 162 | + return ['error' => _t( |
|
| 163 | + __CLASS__ . '.SERVERUNREACHABLE', |
|
| 164 | + 'Could not contact document conversion server. Please try again later ' . |
|
| 165 | + 'or contact your system administrator.', |
|
| 166 | + 'Document Converter process Word documents into HTML.' |
|
| 167 | + )]; |
|
| 168 | + } |
|
| 169 | + |
|
| 170 | + // extract the converted document into assets |
|
| 171 | + // you need php zip, e.g. apt-get install php-zip |
|
| 172 | + $zip = new ZipArchive(); |
|
| 173 | + |
|
| 174 | + if ($zip->open($outzip)) { |
|
| 175 | + $zip->extractTo(ASSETS_PATH .$folderName); |
|
| 176 | + $zip->close(); |
|
| 177 | + } |
|
| 178 | + |
|
| 179 | + // remove temporary files |
|
| 180 | + unlink($outname); |
|
| 181 | + unlink($outzip); |
|
| 182 | + |
|
| 183 | + if (!file_exists(ASSETS_PATH . $folderName . '/index.html')) { |
|
| 184 | + return ['error' => _t( |
|
| 185 | + __CLASS__ . '.PROCESSFAILED', |
|
| 186 | + 'Could not process document, please double-check you uploaded a .doc or .docx format.', |
|
| 187 | + 'Document Converter processes Word documents into HTML.' |
|
| 188 | + )]; |
|
| 189 | + } |
|
| 190 | + |
|
| 191 | + $content = file_get_contents(ASSETS_PATH . $folderName . '/index.html'); |
|
| 192 | + |
|
| 193 | + unlink(ASSETS_PATH . $folderName . '/index.html'); |
|
| 194 | + |
|
| 195 | + return $content; |
|
| 196 | + } |
|
| 197 | 197 | } |
@@ -88,7 +88,7 @@ discard block |
||
| 88 | 88 | return $fromConfig; |
| 89 | 89 | } |
| 90 | 90 | |
| 91 | - $fromEnv = Environment::getEnv('DOCVERT_' . strtoupper($detail)); |
|
| 91 | + $fromEnv = Environment::getEnv('DOCVERT_'.strtoupper($detail)); |
|
| 92 | 92 | if ($fromEnv) { |
| 93 | 93 | return $fromEnv; |
| 94 | 94 | } |
@@ -147,9 +147,9 @@ discard block |
||
| 147 | 147 | ]); |
| 148 | 148 | |
| 149 | 149 | $chosenFolder = ($this->chosenFolderID) ? DataObject::get_by_id(Folder::class, $this->chosenFolderID) : null; |
| 150 | - $folderName = ($chosenFolder) ? '/' . $chosenFolder->Name : ''; |
|
| 150 | + $folderName = ($chosenFolder) ? '/'.$chosenFolder->Name : ''; |
|
| 151 | 151 | $outname = tempnam(ASSETS_PATH, 'convert'); |
| 152 | - $outzip = $outname . '.zip'; |
|
| 152 | + $outzip = $outname.'.zip'; |
|
| 153 | 153 | $out = fopen($outzip, 'w'); |
| 154 | 154 | curl_setopt($ch, CURLOPT_FILE, $out); |
| 155 | 155 | $returnValue = curl_exec($ch); |
@@ -160,8 +160,8 @@ discard block |
||
| 160 | 160 | |
| 161 | 161 | if (!$returnValue || ($status != 200)) { |
| 162 | 162 | return ['error' => _t( |
| 163 | - __CLASS__ . '.SERVERUNREACHABLE', |
|
| 164 | - 'Could not contact document conversion server. Please try again later ' . |
|
| 163 | + __CLASS__.'.SERVERUNREACHABLE', |
|
| 164 | + 'Could not contact document conversion server. Please try again later '. |
|
| 165 | 165 | 'or contact your system administrator.', |
| 166 | 166 | 'Document Converter process Word documents into HTML.' |
| 167 | 167 | )]; |
@@ -172,7 +172,7 @@ discard block |
||
| 172 | 172 | $zip = new ZipArchive(); |
| 173 | 173 | |
| 174 | 174 | if ($zip->open($outzip)) { |
| 175 | - $zip->extractTo(ASSETS_PATH .$folderName); |
|
| 175 | + $zip->extractTo(ASSETS_PATH.$folderName); |
|
| 176 | 176 | $zip->close(); |
| 177 | 177 | } |
| 178 | 178 | |
@@ -180,17 +180,17 @@ discard block |
||
| 180 | 180 | unlink($outname); |
| 181 | 181 | unlink($outzip); |
| 182 | 182 | |
| 183 | - if (!file_exists(ASSETS_PATH . $folderName . '/index.html')) { |
|
| 183 | + if (!file_exists(ASSETS_PATH.$folderName.'/index.html')) { |
|
| 184 | 184 | return ['error' => _t( |
| 185 | - __CLASS__ . '.PROCESSFAILED', |
|
| 185 | + __CLASS__.'.PROCESSFAILED', |
|
| 186 | 186 | 'Could not process document, please double-check you uploaded a .doc or .docx format.', |
| 187 | 187 | 'Document Converter processes Word documents into HTML.' |
| 188 | 188 | )]; |
| 189 | 189 | } |
| 190 | 190 | |
| 191 | - $content = file_get_contents(ASSETS_PATH . $folderName . '/index.html'); |
|
| 191 | + $content = file_get_contents(ASSETS_PATH.$folderName.'/index.html'); |
|
| 192 | 192 | |
| 193 | - unlink(ASSETS_PATH . $folderName . '/index.html'); |
|
| 193 | + unlink(ASSETS_PATH.$folderName.'/index.html'); |
|
| 194 | 194 | |
| 195 | 195 | return $content; |
| 196 | 196 | } |
@@ -8,9 +8,9 @@ |
||
| 8 | 8 | |
| 9 | 9 | class TestPage extends Page implements TestOnly |
| 10 | 10 | { |
| 11 | - private static $table_name = 'DocvertTestPage'; |
|
| 12 | - private static $extensions = [PageExtension::class]; |
|
| 13 | - private static $defaults = [ |
|
| 14 | - 'Content' => '<h1>Default TestPage</h1><p>With pre-import content.</p>' |
|
| 15 | - ]; |
|
| 11 | + private static $table_name = 'DocvertTestPage'; |
|
| 12 | + private static $extensions = [PageExtension::class]; |
|
| 13 | + private static $defaults = [ |
|
| 14 | + 'Content' => '<h1>Default TestPage</h1><p>With pre-import content.</p>' |
|
| 15 | + ]; |
|
| 16 | 16 | } |