Inspection of "Merge pull request #7 from creative-commoners/pull..." - silverstripe/silverstripe-documentconverter - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( 54ac69...cc2d4e )

by Robbie

created 2017-12-22 00:47 UTC

Status

Indentation +447 added lines, -447 removed lines patch added patch discarded remove patch

@@ -54,451 +54,451 @@
 block discarded – undo
 class ImportField extends UploadField
 {
 
-    private static $allowed_actions = ['upload'];
-
-    private static $importer_class = ServiceConnector::class;
-
-    /**
-     * Process the document immediately upon upload.
-     */
-    public function upload(HTTPRequest $request)
-    {
-        if ($this->isDisabled() || $this->isReadonly()) {
-            return $this->httpError(403);
-        }
-
-        // Protect against CSRF on destructive action
-        $token = $this->getForm()->getSecurityToken();
-        if (!$token->checkRequest($request)) {
-            return $this->httpError(400);
-        }
-
-        $tmpfile = $request->postVar('Upload');
-
-        // Check if the file has been uploaded into the temporary storage.
-        if (!$tmpfile) {
-            $return = [
-                'error' => _t(
-                    'SilverStripe\\AssetAdmin\\Forms\\UploadField.FIELDNOTSET',
-                    'File information not found'
-                )
-            ];
-        } else {
-            $return = [
-                'name' => $tmpfile['name'],
-                'size' => $tmpfile['size'],
-                'type' => $tmpfile['type'],
-                'error' => $tmpfile['error']
-            ];
-        }
-
-        if (!$return['error']) {
-            // Get options for this import.
-            $splitHeader = (int)$request->postVar('SplitHeader');
-            $keepSource = (bool)$request->postVar('KeepSource');
-            $chosenFolderID = (int)$request->postVar('ChosenFolderID');
-            $publishPages = (bool)$request->postVar('PublishPages');
-            $includeTOC = (bool)$request->postVar('IncludeTOC');
-
-            // Process the document and write the page.
-            $preservedDocument = null;
-            if ($keepSource) {
-                $preservedDocument = $this->preserveSourceDocument($tmpfile, $chosenFolderID);
-            }
-
-            $importResult = $this->importFromPOST($tmpfile, $splitHeader, $publishPages, $chosenFolderID);
-            if (is_array($importResult) && isset($importResult['error'])) {
-                $return['error'] = $importResult['error'];
-            } elseif ($includeTOC) {
-                $this->writeTOC($publishPages, $keepSource ? $preservedDocument : null);
-            }
-        }
-
-        $response = HTTPResponse::create(Convert::raw2json([$return]));
-        $response->addHeader('Content-Type', 'application/json');
-        return $response;
-    }
-
-    /**
-     * Preserves the source file by copying it to a specified folder.
-     *
-     * @param $tmpfile Temporary file data structure.
-     * @param int $chosenFolderID Target folder.
-     * @return File Stored file.
-     */
-    protected function preserveSourceDocument($tmpfile, $chosenFolderID = null)
-    {
-        $upload = Upload::create();
-
-        $file = File::create();
-        $upload->loadIntoFile($tmpfile, $file, $chosenFolderID);
-
-        $page = $this->form->getRecord();
-        $page->ImportedFromFileID = $file->ID;
-        $page->write();
-
-        return $file;
-    }
-
-    /**
-     * Builds and writes the table of contents for the document.
-     *
-     * @param bool $publishPage Should the parent page be published.
-     * @param File $preservedDocument Set if the link to the original document should be added.
-     */
-    protected function writeTOC($publishPages = false, $preservedDocument = null)
-    {
-        $page = $this->form->getRecord();
-        $content = '<ul>';
-
-        if ($page) {
-            if ($page->Children()->Count() > 0) {
-                foreach ($page->Children() as $child) {
-                    $content .= '<li><a href="' . $child->Link() . '">' . $child->Title . '</a></li>';
-                }
-                $page->Content = $content . '</ul>';
-            } else {
-                $doc = new DOMDocument();
-                $doc->loadHTML($page->Content);
-                $body = $doc->getElementsByTagName('body')->item(0);
-                $node = $body->firstChild;
-                $h1 = $h2 = 1;
-                while ($node) {
-                    if ($node instanceof DOMElement && $node->tagName == 'h1') {
-                        $content .= '<li><a href="#h1.' . $h1 . '">' .
-                            trim(preg_replace('/\n|\r/', '', Convert::html2raw($node->textContent))) .
-                            '</a></li>';
-                        $node->setAttributeNode(new DOMAttr("id", "h1.".$h1));
-                        $h1++;
-                    } elseif ($node instanceof DOMElement && $node->tagName == 'h2') {
-                        $content .= '<li class="menu-h2"><a href="#h2.' . $h2 . '">' .
-                            trim(preg_replace('/\n|\r/', '', Convert::html2raw($node->textContent))) .
-                            '</a></li>';
-                        $node->setAttributeNode(new DOMAttr("id", "h2.".$h2));
-                        $h2++;
-                    }
-                    $node = $node->nextSibling;
-                }
-                $page->Content = $content . '</ul>' . $doc->saveHTML();
-            }
-
-            // Add in the link to the original document, if provided.
-            if ($preservedDocument) {
-                $page->Content = '<a href="' .
-                    $preservedDocument->Link() .
-                    '" title="download original document">download original document (' .
-                    $preservedDocument->getSize() .
-                    ')</a>' .
-                    $page->Content;
-            }
-
-            // Store the result
-            $page->write();
-            if ($publishPages) {
-                $page->publishRecursive();
-            }
-        }
-    }
-
-    protected function getBodyText($doc, $node)
-    {
-        // Build a new doc
-        $htmldoc = new DOMDocument();
-        // Create the html element
-        $html = $htmldoc->createElement('html');
-        $htmldoc->appendChild($html);
-        // Append the body node
-        $html->appendChild($htmldoc->importNode($node, true));
-
-        // Get the text as html, remove the entry and exit root tags and return
-        $text = $htmldoc->saveHTML();
-        $text = preg_replace('/^.*<body>/', '', $text);
-        $text = preg_replace('/<\/body>.*$/', '', $text);
-
-        return $text;
-    }
-
-    /**
-     * Used only when writing the document that has been split by headers.
-     * Can write both to the chapter pages as well as the master page.
-     *
-     * @param string $subtitle Title of the chapter - if missing, it will write to the master page.
-     * @param $subdoc
-     * @param $subnode
-     * @param int $sort Order of the chapter page.
-     * @param $publishPages Whether to publish the resulting child/master pages.
-     */
-    protected function writeContent($subtitle, $subdoc, $subnode, $sort = null, $publishPages = false)
-    {
-        $record = $this->form->getRecord();
-
-        if ($subtitle) {
-            // Write the chapter page to a subpage.
-            $page = DataObject::get_one(
-                'Page',
-                sprintf('"Title" = \'%s\' AND "ParentID" = %d', $subtitle, $record->ID)
-            );
-            if (!$page) {
-                $page = Page::create();
-                $page->ParentID = $record->ID;
-                $page->Title = $subtitle;
-            }
-
-            unset($this->unusedChildren[$page->ID]);
-            file_put_contents(ASSETS_PATH . '/index-' . $sort . '.html', $this->getBodyText($subdoc, $subnode));
-
-            if ($sort) {
-                $page->Sort = $sort;
-            }
-            $page->Content = $this->getBodyText($subdoc, $subnode);
-            $page->write();
-            if ($publishPages) {
-                $page->publishRecursive();
-            }
-        } else {
-            // Write to the master page.
-            $record->Content = $this->getBodyText($subdoc, $subnode);
-            $record->write();
-
-            if ($publishPages) {
-                $record->publishRecursive();
-            }
-        }
-    }
-
-    /**
-     * Imports a document at a certain path onto the current page and writes it.
-     * CAUTION: Overwrites any existing content on the page!
-     *
-     * @param array $tmpFile Array as received from PHP's POST upload.
-     * @param bool $splitHeader Heading level to split by.
-     * @param bool $publishPages Whether the underlying pages should be published after import.
-     * @param int $chosenFolderID ID of the working folder - here the converted file and images will be stored.
-     */
-    public function importFromPOST($tmpFile, $splitHeader = false, $publishPages = false, $chosenFolderID = null)
-    {
-
-        $fileDescriptor = [
-            'name' => $tmpFile['name'],
-            'path' => $tmpFile['tmp_name'],
-            'mimeType' => $tmpFile['type']
-        ];
-
-        $sourcePage = $this->form->getRecord();
-        $importerClass = $this->config()->get('importer_class');
-        $importer = Injector::inst()->create($importerClass, $fileDescriptor, $chosenFolderID);
-        $content = $importer->import();
-
-        if (is_array($content) && isset($content['error'])) {
-            return $content;
-        }
-
-        // Clean up with tidy (requires tidy module)
-        $tidy = new Tidy();
-        $tidy->parseString($content, ['output-xhtml' => true], 'utf8');
-        $tidy->cleanRepair();
-
-        $fragment = [];
-        foreach ($tidy->body()->child as $child) {
-            $fragment[] = $child->value;
-        }
-
-        $htmlValue = Injector::inst()->create(HTMLValue::class, implode("\n", $fragment));
-
-        // Sanitise
-        $santiser = Injector::inst()->create(HTMLEditorSanitiser::class, HTMLEditorConfig::get_active());
-        $santiser->sanitise($htmlValue);
-
-        // Load in the HTML
-        $doc = $htmlValue->getDocument();
-        $xpath = new DOMXPath($doc);
-
-        // make sure any images are added as Image records with a relative link to assets
-        $chosenFolder = ($this->chosenFolderID) ? DataObject::get_by_id(Folder::class, $this->chosenFolderID) : null;
-        $folderName = ($chosenFolder) ? '/' . $chosenFolder->Name : '';
-        $imgs = $xpath->query('//img');
-        for ($i = 0; $i < $imgs->length; $i++) {
-            $img = $imgs->item($i);
-            $originalPath = 'assets/' . $folderName . '/' . $img->getAttribute('src');
-            $name = FileNameFilter::create()->filter(basename($originalPath));
-
-            $image = Image::get()->filter([
-                'Name' => $name,
-                'ParentID' => (int)$chosenFolderID
-            ])->first();
-            if (!($image && $image->exists())) {
-                $image = Image::create();
-                $image->ParentID = (int)$chosenFolderID;
-                $image->Name = $name;
-                $image->write();
-            }
-
-            // make sure it's put in place correctly so Image record knows where it is.
-            // e.g. in the case of underscores being renamed to dashes.
-            @rename(Director::getAbsFile($originalPath), Director::getAbsFile($image->getFilename()));
-
-            $img->setAttribute('src', $image->getFilename());
-        }
-
-        $remove_rules = [
-            // Change any headers that contain font tags (other than font face tags) into p elements
-            '//h1[.//font[not(@face)]]' => 'p',
-            // Remove any font tags
-            '//font'
-        ];
-
-        foreach ($remove_rules as $rule => $parenttag) {
-            if (is_numeric($rule)) {
-                $rule = $parenttag;
-                $parenttag = null;
-            }
-
-            $nodes = [];
-            foreach ($xpath->query($rule) as $node) {
-                $nodes[] = $node;
-            }
-
-            foreach ($nodes as $node) {
-                $parent = $node->parentNode;
-
-                if ($parenttag) {
-                    $parent = $doc->createElement($parenttag);
-                    $node->nextSibling ?
-                        $node->parentNode->insertBefore($parent, $node->nextSibling) :
-                        $node->parentNode->appendChild($parent);
-                }
-
-                while ($node->firstChild) {
-                    $parent->appendChild($node->firstChild);
-                }
-                $node->parentNode->removeChild($node);
-            }
-        }
-
-        // Strip style, class, lang attributes.
-        $els = $doc->getElementsByTagName('*');
-        for ($i = 0; $i < $els->length; $i++) {
-            $el = $els->item($i);
-            $el->removeAttribute('class');
-            $el->removeAttribute('style');
-            $el->removeAttribute('lang');
-        }
-
-        $els = $doc->getElementsByTagName('*');
-
-        $headingXPath = [
-            'self::h1',
-            'self::h2',
-            'self::h3',
-            'self::h4',
-            'self::h5',
-            'self::h6',
-        ];
-        // Remove a bunch of unwanted elements
-        $clean = [
-            // Empty paragraphs
-            '//p[not(descendant-or-self::text() | descendant-or-self::img)]',
-            // Empty headers
-            '//*[' . implode(' | ', $headingXPath) . '][not(descendant-or-self::text() | descendant-or-self::img)]',
-            // Anchors
-            '//a[not(@href)]',
-            // BR tags
-            '//br'
-        ];
-
-        foreach ($clean as $query) {
-            // First get all the nodes. Need to build array, as they'll disappear from the
-            // nodelist while we're deleteing them, causing the indexing to screw up.
-            $nodes = [];
-            foreach ($xpath->query($query) as $node) {
-                $nodes[] = $node;
-            }
-
-            // Then remove them all
-            foreach ($nodes as $node) {
-                if ($node->parentNode) {
-                    $node->parentNode->removeChild($node);
-                }
-            }
-        }
-
-        // Now split the document into portions by H1
-        $body = $doc->getElementsByTagName('body')->item(0);
-
-        $this->unusedChildren = [];
-        foreach ($sourcePage->Children() as $child) {
-            $this->unusedChildren[$child->ID] = $child;
-        }
-
-        $documentImporterFieldError = false;
-
-        $documentImporterFieldErrorHandler = function (
-            $errno,
-            $errstr,
-            $errfile,
-            $errline
-        ) use ($documentImporterFieldError) {
-            $documentImporterFieldError = _t(
-                'SilverStripe\\DocumentConverter\\ServiceConnector.PROCESSFAILED',
-                'Could not process document, please double-check you uploaded a .doc or .docx format.',
-                'Document Converter processes Word documents into HTML.'
-            );
-
-            // Do not cascade the error through other handlers
-            return true;
-        };
-
-        set_error_handler($documentImporterFieldErrorHandler);
-
-        $subtitle = null;
-        $subdoc = new DOMDocument();
-        $subnode = $subdoc->createElement('body');
-        $node = $body->firstChild;
-        $sort = 1;
-        if ($splitHeader == 1 || $splitHeader == 2) {
-            while ($node && !$documentImporterFieldError) {
-                if ($node instanceof DOMElement && $node->tagName == 'h' . $splitHeader) {
-                    if ($subnode->hasChildNodes()) {
-                        $this->writeContent($subtitle, $subdoc, $subnode, $sort, $publishPages);
-                        $sort++;
-                    }
-
-                    $subdoc = new DOMDocument();
-                    $subnode = $subdoc->createElement('body');
-                    $subtitle = trim(preg_replace('/\n|\r/', '', Convert::html2raw($node->textContent)));
-                } else {
-                    $subnode->appendChild($subdoc->importNode($node, true));
-                }
-
-                $node = $node->nextSibling;
-            }
-        } else {
-            $this->writeContent($subtitle, $subdoc, $body, null, $publishPages);
-        }
-
-        if ($subnode->hasChildNodes() && !$documentImporterFieldError) {
-            $this->writeContent($subtitle, $subdoc, $subnode, null, $publishPages);
-        }
-
-        restore_error_handler();
-        if ($documentImporterFieldError) {
-            return ['error' => $documentImporterFieldError];
-        }
-
-        foreach ($this->unusedChildren as $child) {
-            $origStage = Versioned::current_stage();
-
-            Versioned::set_stage(Versioned::DRAFT);
-            $draft = clone $child;
-            $draft->delete();
-
-            Versioned::set_stage(Versioned::LIVE);
-            $published = clone $child;
-            $published->delete();
-
-            Versioned::set_stage($origStage);
-        }
-
-        $sourcePage->write();
-    }
+	private static $allowed_actions = ['upload'];
+
+	private static $importer_class = ServiceConnector::class;
+
+	/**
+	 * Process the document immediately upon upload.
+	 */
+	public function upload(HTTPRequest $request)
+	{
+		if ($this->isDisabled() || $this->isReadonly()) {
+			return $this->httpError(403);
+		}
+
+		// Protect against CSRF on destructive action
+		$token = $this->getForm()->getSecurityToken();
+		if (!$token->checkRequest($request)) {
+			return $this->httpError(400);
+		}
+
+		$tmpfile = $request->postVar('Upload');
+
+		// Check if the file has been uploaded into the temporary storage.
+		if (!$tmpfile) {
+			$return = [
+				'error' => _t(
+					'SilverStripe\\AssetAdmin\\Forms\\UploadField.FIELDNOTSET',
+					'File information not found'
+				)
+			];
+		} else {
+			$return = [
+				'name' => $tmpfile['name'],
+				'size' => $tmpfile['size'],
+				'type' => $tmpfile['type'],
+				'error' => $tmpfile['error']
+			];
+		}
+
+		if (!$return['error']) {
+			// Get options for this import.
+			$splitHeader = (int)$request->postVar('SplitHeader');
+			$keepSource = (bool)$request->postVar('KeepSource');
+			$chosenFolderID = (int)$request->postVar('ChosenFolderID');
+			$publishPages = (bool)$request->postVar('PublishPages');
+			$includeTOC = (bool)$request->postVar('IncludeTOC');
+
+			// Process the document and write the page.
+			$preservedDocument = null;
+			if ($keepSource) {
+				$preservedDocument = $this->preserveSourceDocument($tmpfile, $chosenFolderID);
+			}
+
+			$importResult = $this->importFromPOST($tmpfile, $splitHeader, $publishPages, $chosenFolderID);
+			if (is_array($importResult) && isset($importResult['error'])) {
+				$return['error'] = $importResult['error'];
+			} elseif ($includeTOC) {
+				$this->writeTOC($publishPages, $keepSource ? $preservedDocument : null);
+			}
+		}
+
+		$response = HTTPResponse::create(Convert::raw2json([$return]));
+		$response->addHeader('Content-Type', 'application/json');
+		return $response;
+	}
+
+	/**
+	 * Preserves the source file by copying it to a specified folder.
+	 *
+	 * @param $tmpfile Temporary file data structure.
+	 * @param int $chosenFolderID Target folder.
+	 * @return File Stored file.
+	 */
+	protected function preserveSourceDocument($tmpfile, $chosenFolderID = null)
+	{
+		$upload = Upload::create();
+
+		$file = File::create();
+		$upload->loadIntoFile($tmpfile, $file, $chosenFolderID);
+
+		$page = $this->form->getRecord();
+		$page->ImportedFromFileID = $file->ID;
+		$page->write();
+
+		return $file;
+	}
+
+	/**
+	 * Builds and writes the table of contents for the document.
+	 *
+	 * @param bool $publishPage Should the parent page be published.
+	 * @param File $preservedDocument Set if the link to the original document should be added.
+	 */
+	protected function writeTOC($publishPages = false, $preservedDocument = null)
+	{
+		$page = $this->form->getRecord();
+		$content = '<ul>';
+
+		if ($page) {
+			if ($page->Children()->Count() > 0) {
+				foreach ($page->Children() as $child) {
+					$content .= '<li><a href="' . $child->Link() . '">' . $child->Title . '</a></li>';
+				}
+				$page->Content = $content . '</ul>';
+			} else {
+				$doc = new DOMDocument();
+				$doc->loadHTML($page->Content);
+				$body = $doc->getElementsByTagName('body')->item(0);
+				$node = $body->firstChild;
+				$h1 = $h2 = 1;
+				while ($node) {
+					if ($node instanceof DOMElement && $node->tagName == 'h1') {
+						$content .= '<li><a href="#h1.' . $h1 . '">' .
+							trim(preg_replace('/\n|\r/', '', Convert::html2raw($node->textContent))) .
+							'</a></li>';
+						$node->setAttributeNode(new DOMAttr("id", "h1.".$h1));
+						$h1++;
+					} elseif ($node instanceof DOMElement && $node->tagName == 'h2') {
+						$content .= '<li class="menu-h2"><a href="#h2.' . $h2 . '">' .
+							trim(preg_replace('/\n|\r/', '', Convert::html2raw($node->textContent))) .
+							'</a></li>';
+						$node->setAttributeNode(new DOMAttr("id", "h2.".$h2));
+						$h2++;
+					}
+					$node = $node->nextSibling;
+				}
+				$page->Content = $content . '</ul>' . $doc->saveHTML();
+			}
+
+			// Add in the link to the original document, if provided.
+			if ($preservedDocument) {
+				$page->Content = '<a href="' .
+					$preservedDocument->Link() .
+					'" title="download original document">download original document (' .
+					$preservedDocument->getSize() .
+					')</a>' .
+					$page->Content;
+			}
+
+			// Store the result
+			$page->write();
+			if ($publishPages) {
+				$page->publishRecursive();
+			}
+		}
+	}
+
+	protected function getBodyText($doc, $node)
+	{
+		// Build a new doc
+		$htmldoc = new DOMDocument();
+		// Create the html element
+		$html = $htmldoc->createElement('html');
+		$htmldoc->appendChild($html);
+		// Append the body node
+		$html->appendChild($htmldoc->importNode($node, true));
+
+		// Get the text as html, remove the entry and exit root tags and return
+		$text = $htmldoc->saveHTML();
+		$text = preg_replace('/^.*<body>/', '', $text);
+		$text = preg_replace('/<\/body>.*$/', '', $text);
+
+		return $text;
+	}
+
+	/**
+	 * Used only when writing the document that has been split by headers.
+	 * Can write both to the chapter pages as well as the master page.
+	 *
+	 * @param string $subtitle Title of the chapter - if missing, it will write to the master page.
+	 * @param $subdoc
+	 * @param $subnode
+	 * @param int $sort Order of the chapter page.
+	 * @param $publishPages Whether to publish the resulting child/master pages.
+	 */
+	protected function writeContent($subtitle, $subdoc, $subnode, $sort = null, $publishPages = false)
+	{
+		$record = $this->form->getRecord();
+
+		if ($subtitle) {
+			// Write the chapter page to a subpage.
+			$page = DataObject::get_one(
+				'Page',
+				sprintf('"Title" = \'%s\' AND "ParentID" = %d', $subtitle, $record->ID)
+			);
+			if (!$page) {
+				$page = Page::create();
+				$page->ParentID = $record->ID;
+				$page->Title = $subtitle;
+			}
+
+			unset($this->unusedChildren[$page->ID]);
+			file_put_contents(ASSETS_PATH . '/index-' . $sort . '.html', $this->getBodyText($subdoc, $subnode));
+
+			if ($sort) {
+				$page->Sort = $sort;
+			}
+			$page->Content = $this->getBodyText($subdoc, $subnode);
+			$page->write();
+			if ($publishPages) {
+				$page->publishRecursive();
+			}
+		} else {
+			// Write to the master page.
+			$record->Content = $this->getBodyText($subdoc, $subnode);
+			$record->write();
+
+			if ($publishPages) {
+				$record->publishRecursive();
+			}
+		}
+	}
+
+	/**
+	 * Imports a document at a certain path onto the current page and writes it.
+	 * CAUTION: Overwrites any existing content on the page!
+	 *
+	 * @param array $tmpFile Array as received from PHP's POST upload.
+	 * @param bool $splitHeader Heading level to split by.
+	 * @param bool $publishPages Whether the underlying pages should be published after import.
+	 * @param int $chosenFolderID ID of the working folder - here the converted file and images will be stored.
+	 */
+	public function importFromPOST($tmpFile, $splitHeader = false, $publishPages = false, $chosenFolderID = null)
+	{
+
+		$fileDescriptor = [
+			'name' => $tmpFile['name'],
+			'path' => $tmpFile['tmp_name'],
+			'mimeType' => $tmpFile['type']
+		];
+
+		$sourcePage = $this->form->getRecord();
+		$importerClass = $this->config()->get('importer_class');
+		$importer = Injector::inst()->create($importerClass, $fileDescriptor, $chosenFolderID);
+		$content = $importer->import();
+
+		if (is_array($content) && isset($content['error'])) {
+			return $content;
+		}
+
+		// Clean up with tidy (requires tidy module)
+		$tidy = new Tidy();
+		$tidy->parseString($content, ['output-xhtml' => true], 'utf8');
+		$tidy->cleanRepair();
+
+		$fragment = [];
+		foreach ($tidy->body()->child as $child) {
+			$fragment[] = $child->value;
+		}
+
+		$htmlValue = Injector::inst()->create(HTMLValue::class, implode("\n", $fragment));
+
+		// Sanitise
+		$santiser = Injector::inst()->create(HTMLEditorSanitiser::class, HTMLEditorConfig::get_active());
+		$santiser->sanitise($htmlValue);
+
+		// Load in the HTML
+		$doc = $htmlValue->getDocument();
+		$xpath = new DOMXPath($doc);
+
+		// make sure any images are added as Image records with a relative link to assets
+		$chosenFolder = ($this->chosenFolderID) ? DataObject::get_by_id(Folder::class, $this->chosenFolderID) : null;
+		$folderName = ($chosenFolder) ? '/' . $chosenFolder->Name : '';
+		$imgs = $xpath->query('//img');
+		for ($i = 0; $i < $imgs->length; $i++) {
+			$img = $imgs->item($i);
+			$originalPath = 'assets/' . $folderName . '/' . $img->getAttribute('src');
+			$name = FileNameFilter::create()->filter(basename($originalPath));
+
+			$image = Image::get()->filter([
+				'Name' => $name,
+				'ParentID' => (int)$chosenFolderID
+			])->first();
+			if (!($image && $image->exists())) {
+				$image = Image::create();
+				$image->ParentID = (int)$chosenFolderID;
+				$image->Name = $name;
+				$image->write();
+			}
+
+			// make sure it's put in place correctly so Image record knows where it is.
+			// e.g. in the case of underscores being renamed to dashes.
+			@rename(Director::getAbsFile($originalPath), Director::getAbsFile($image->getFilename()));
+
+			$img->setAttribute('src', $image->getFilename());
+		}
+
+		$remove_rules = [
+			// Change any headers that contain font tags (other than font face tags) into p elements
+			'//h1[.//font[not(@face)]]' => 'p',
+			// Remove any font tags
+			'//font'
+		];
+
+		foreach ($remove_rules as $rule => $parenttag) {
+			if (is_numeric($rule)) {
+				$rule = $parenttag;
+				$parenttag = null;
+			}
+
+			$nodes = [];
+			foreach ($xpath->query($rule) as $node) {
+				$nodes[] = $node;
+			}
+
+			foreach ($nodes as $node) {
+				$parent = $node->parentNode;
+
+				if ($parenttag) {
+					$parent = $doc->createElement($parenttag);
+					$node->nextSibling ?
+						$node->parentNode->insertBefore($parent, $node->nextSibling) :
+						$node->parentNode->appendChild($parent);
+				}
+
+				while ($node->firstChild) {
+					$parent->appendChild($node->firstChild);
+				}
+				$node->parentNode->removeChild($node);
+			}
+		}
+
+		// Strip style, class, lang attributes.
+		$els = $doc->getElementsByTagName('*');
+		for ($i = 0; $i < $els->length; $i++) {
+			$el = $els->item($i);
+			$el->removeAttribute('class');
+			$el->removeAttribute('style');
+			$el->removeAttribute('lang');
+		}
+
+		$els = $doc->getElementsByTagName('*');
+
+		$headingXPath = [
+			'self::h1',
+			'self::h2',
+			'self::h3',
+			'self::h4',
+			'self::h5',
+			'self::h6',
+		];
+		// Remove a bunch of unwanted elements
+		$clean = [
+			// Empty paragraphs
+			'//p[not(descendant-or-self::text() | descendant-or-self::img)]',
+			// Empty headers
+			'//*[' . implode(' | ', $headingXPath) . '][not(descendant-or-self::text() | descendant-or-self::img)]',
+			// Anchors
+			'//a[not(@href)]',
+			// BR tags
+			'//br'
+		];
+
+		foreach ($clean as $query) {
+			// First get all the nodes. Need to build array, as they'll disappear from the
+			// nodelist while we're deleteing them, causing the indexing to screw up.
+			$nodes = [];
+			foreach ($xpath->query($query) as $node) {
+				$nodes[] = $node;
+			}
+
+			// Then remove them all
+			foreach ($nodes as $node) {
+				if ($node->parentNode) {
+					$node->parentNode->removeChild($node);
+				}
+			}
+		}
+
+		// Now split the document into portions by H1
+		$body = $doc->getElementsByTagName('body')->item(0);
+
+		$this->unusedChildren = [];
+		foreach ($sourcePage->Children() as $child) {
+			$this->unusedChildren[$child->ID] = $child;
+		}
+
+		$documentImporterFieldError = false;
+
+		$documentImporterFieldErrorHandler = function (
+			$errno,
+			$errstr,
+			$errfile,
+			$errline
+		) use ($documentImporterFieldError) {
+			$documentImporterFieldError = _t(
+				'SilverStripe\\DocumentConverter\\ServiceConnector.PROCESSFAILED',
+				'Could not process document, please double-check you uploaded a .doc or .docx format.',
+				'Document Converter processes Word documents into HTML.'
+			);
+
+			// Do not cascade the error through other handlers
+			return true;
+		};
+
+		set_error_handler($documentImporterFieldErrorHandler);
+
+		$subtitle = null;
+		$subdoc = new DOMDocument();
+		$subnode = $subdoc->createElement('body');
+		$node = $body->firstChild;
+		$sort = 1;
+		if ($splitHeader == 1 || $splitHeader == 2) {
+			while ($node && !$documentImporterFieldError) {
+				if ($node instanceof DOMElement && $node->tagName == 'h' . $splitHeader) {
+					if ($subnode->hasChildNodes()) {
+						$this->writeContent($subtitle, $subdoc, $subnode, $sort, $publishPages);
+						$sort++;
+					}
+
+					$subdoc = new DOMDocument();
+					$subnode = $subdoc->createElement('body');
+					$subtitle = trim(preg_replace('/\n|\r/', '', Convert::html2raw($node->textContent)));
+				} else {
+					$subnode->appendChild($subdoc->importNode($node, true));
+				}
+
+				$node = $node->nextSibling;
+			}
+		} else {
+			$this->writeContent($subtitle, $subdoc, $body, null, $publishPages);
+		}
+
+		if ($subnode->hasChildNodes() && !$documentImporterFieldError) {
+			$this->writeContent($subtitle, $subdoc, $subnode, null, $publishPages);
+		}
+
+		restore_error_handler();
+		if ($documentImporterFieldError) {
+			return ['error' => $documentImporterFieldError];
+		}
+
+		foreach ($this->unusedChildren as $child) {
+			$origStage = Versioned::current_stage();
+
+			Versioned::set_stage(Versioned::DRAFT);
+			$draft = clone $child;
+			$draft->delete();
+
+			Versioned::set_stage(Versioned::LIVE);
+			$published = clone $child;
+			$published->delete();
+
+			Versioned::set_stage($origStage);
+		}
+
+		$sourcePage->write();
+	}
 }

Please login to merge, or discard this patch.

src/ServiceConnector.php 2 patches

Indentation +177 added lines, -177 removed lines patch added patch discarded remove patch

@@ -16,182 +16,182 @@
 block discarded – undo
 class ServiceConnector
 {
 
-    use Configurable;
-    use Injectable;
-
-    /**
-     * @config
-     * @var array Docvert connection username
-     */
-    private static $username = null;
-
-    /**
-     * @config
-     * @var array Docvert connection password
-     */
-    private static $password = null;
-
-    /**
-     * @config
-     * @var array Docvert service URL
-     */
-    private static $url = null;
-
-    /**
-     * Associative array of:
-     * - name: the full name of the file including the extension.
-     * - path: the path to the file on the local filesystem.
-     * - mimeType
-     */
-    protected $fileDescriptor;
-
-    /**
-     * @var int
-     * ID of a SilverStripe\Assets\Folder
-     */
-    protected $chosenFolderID;
-
-    /**
-     * @var array instance specific connection details
-     */
-    protected $docvertDetails = [
-        'username' => null,
-        'password' => null,
-        'url' => null
-    ];
-
-    public function __construct($fileDescriptor, $chosenFolderID = null)
-    {
-        $this->fileDescriptor = $fileDescriptor;
-        $this->chosenFolderID = $chosenFolderID;
-    }
-
-
-    /**
-     * Retrieves detail in priority order from
-     * 1. local instance field
-     * 2. Config
-     * 3. Environment
-     *
-     * @param string $detail key name for detail
-     * @return string the value for that key
-     */
-    protected function getDetail($detail)
-    {
-        $fromDetails = $this->docvertDetails[$detail];
-        if ($fromDetails) {
-            return $fromDetails;
-        }
+	use Configurable;
+	use Injectable;
+
+	/**
+	 * @config
+	 * @var array Docvert connection username
+	 */
+	private static $username = null;
+
+	/**
+	 * @config
+	 * @var array Docvert connection password
+	 */
+	private static $password = null;
+
+	/**
+	 * @config
+	 * @var array Docvert service URL
+	 */
+	private static $url = null;
+
+	/**
+	 * Associative array of:
+	 * - name: the full name of the file including the extension.
+	 * - path: the path to the file on the local filesystem.
+	 * - mimeType
+	 */
+	protected $fileDescriptor;
+
+	/**
+	 * @var int
+	 * ID of a SilverStripe\Assets\Folder
+	 */
+	protected $chosenFolderID;
+
+	/**
+	 * @var array instance specific connection details
+	 */
+	protected $docvertDetails = [
+		'username' => null,
+		'password' => null,
+		'url' => null
+	];
+
+	public function __construct($fileDescriptor, $chosenFolderID = null)
+	{
+		$this->fileDescriptor = $fileDescriptor;
+		$this->chosenFolderID = $chosenFolderID;
+	}
+
+
+	/**
+	 * Retrieves detail in priority order from
+	 * 1. local instance field
+	 * 2. Config
+	 * 3. Environment
+	 *
+	 * @param string $detail key name for detail
+	 * @return string the value for that key
+	 */
+	protected function getDetail($detail)
+	{
+		$fromDetails = $this->docvertDetails[$detail];
+		if ($fromDetails) {
+			return $fromDetails;
+		}
         
-        $fromConfig = $this->config()->get($detail);
-        if ($fromConfig) {
-            return $fromConfig;
-        }
-
-        $fromEnv = Environment::getEnv('DOCVERT_' . strtoupper($detail));
-        if ($fromEnv) {
-            return $fromEnv;
-        }
-    }
-
-    public function setUsername($username = null)
-    {
-        $this->docvertDetails['username'] = $username;
-        return $this;
-    }
-
-    public function getUsername()
-    {
-        return $this->getDetail('username');
-    }
-
-    public function setPassword($password = null)
-    {
-        $this->docvertDetails['password'] = $password;
-        return $this;
-    }
-
-    public function getPassword()
-    {
-        return $this->getDetail('password');
-    }
-
-    public function setUrl($url = null)
-    {
-        $this->docvertDetails['url'] = $url;
-        return $this;
-    }
-
-    public function getUrl()
-    {
-        return $this->getDetail('url');
-    }
-
-    public function import()
-    {
-        $ch = curl_init();
-
-        $file = new CURLFile(
-            $this->fileDescriptor['path'],
-            $this->fileDescriptor['mimeType'],
-            $this->fileDescriptor['name']
-        );
-
-        curl_setopt_array($ch, [
-            CURLOPT_URL => $this->getUrl(),
-            CURLOPT_USERPWD => sprintf('%s:%s', $this->getUsername(), $this->getPassword()),
-            CURLOPT_POST => 1,
-            CURLOPT_POSTFIELDS => ['file' => $file],
-            CURLOPT_CONNECTTIMEOUT => 25,
-            CURLOPT_TIMEOUT => 100,
-        ]);
-
-        $chosenFolder = ($this->chosenFolderID) ? DataObject::get_by_id(Folder::class, $this->chosenFolderID) : null;
-        $folderName = ($chosenFolder) ? '/' . $chosenFolder->Name : '';
-        $outname = tempnam(ASSETS_PATH, 'convert');
-        $outzip = $outname . '.zip';
-        $out = fopen($outzip, 'w');
-        curl_setopt($ch, CURLOPT_FILE, $out);
-        $returnValue = curl_exec($ch);
-        $status = curl_getinfo($ch, CURLINFO_HTTP_CODE);
-        curl_close($ch);
-        fclose($out);
-        chmod($outzip, 0666);
-
-        if (!$returnValue || ($status != 200)) {
-            return ['error' => _t(
-                __CLASS__ . '.SERVERUNREACHABLE',
-                'Could not contact document conversion server. Please try again later ' .
-                    'or contact your system administrator.',
-                'Document Converter process Word documents into HTML.'
-            )];
-        }
-
-        // extract the converted document into assets
-        // you need php zip, e.g. apt-get install php-zip
-        $zip = new ZipArchive();
-
-        if ($zip->open($outzip)) {
-            $zip->extractTo(ASSETS_PATH .$folderName);
-            $zip->close();
-        }
-
-        // remove temporary files
-        unlink($outname);
-        unlink($outzip);
-
-        if (!file_exists(ASSETS_PATH . $folderName . '/index.html')) {
-            return ['error' =>  _t(
-                __CLASS__ . '.PROCESSFAILED',
-                'Could not process document, please double-check you uploaded a .doc or .docx format.',
-                'Document Converter processes Word documents into HTML.'
-            )];
-        }
-
-        $content = file_get_contents(ASSETS_PATH . $folderName . '/index.html');
-
-        unlink(ASSETS_PATH . $folderName . '/index.html');
-
-        return $content;
-    }
+		$fromConfig = $this->config()->get($detail);
+		if ($fromConfig) {
+			return $fromConfig;
+		}
+
+		$fromEnv = Environment::getEnv('DOCVERT_' . strtoupper($detail));
+		if ($fromEnv) {
+			return $fromEnv;
+		}
+	}
+
+	public function setUsername($username = null)
+	{
+		$this->docvertDetails['username'] = $username;
+		return $this;
+	}
+
+	public function getUsername()
+	{
+		return $this->getDetail('username');
+	}
+
+	public function setPassword($password = null)
+	{
+		$this->docvertDetails['password'] = $password;
+		return $this;
+	}
+
+	public function getPassword()
+	{
+		return $this->getDetail('password');
+	}
+
+	public function setUrl($url = null)
+	{
+		$this->docvertDetails['url'] = $url;
+		return $this;
+	}
+
+	public function getUrl()
+	{
+		return $this->getDetail('url');
+	}
+
+	public function import()
+	{
+		$ch = curl_init();
+
+		$file = new CURLFile(
+			$this->fileDescriptor['path'],
+			$this->fileDescriptor['mimeType'],
+			$this->fileDescriptor['name']
+		);
+
+		curl_setopt_array($ch, [
+			CURLOPT_URL => $this->getUrl(),
+			CURLOPT_USERPWD => sprintf('%s:%s', $this->getUsername(), $this->getPassword()),
+			CURLOPT_POST => 1,
+			CURLOPT_POSTFIELDS => ['file' => $file],
+			CURLOPT_CONNECTTIMEOUT => 25,
+			CURLOPT_TIMEOUT => 100,
+		]);
+
+		$chosenFolder = ($this->chosenFolderID) ? DataObject::get_by_id(Folder::class, $this->chosenFolderID) : null;
+		$folderName = ($chosenFolder) ? '/' . $chosenFolder->Name : '';
+		$outname = tempnam(ASSETS_PATH, 'convert');
+		$outzip = $outname . '.zip';
+		$out = fopen($outzip, 'w');
+		curl_setopt($ch, CURLOPT_FILE, $out);
+		$returnValue = curl_exec($ch);
+		$status = curl_getinfo($ch, CURLINFO_HTTP_CODE);
+		curl_close($ch);
+		fclose($out);
+		chmod($outzip, 0666);
+
+		if (!$returnValue || ($status != 200)) {
+			return ['error' => _t(
+				__CLASS__ . '.SERVERUNREACHABLE',
+				'Could not contact document conversion server. Please try again later ' .
+					'or contact your system administrator.',
+				'Document Converter process Word documents into HTML.'
+			)];
+		}
+
+		// extract the converted document into assets
+		// you need php zip, e.g. apt-get install php-zip
+		$zip = new ZipArchive();
+
+		if ($zip->open($outzip)) {
+			$zip->extractTo(ASSETS_PATH .$folderName);
+			$zip->close();
+		}
+
+		// remove temporary files
+		unlink($outname);
+		unlink($outzip);
+
+		if (!file_exists(ASSETS_PATH . $folderName . '/index.html')) {
+			return ['error' =>  _t(
+				__CLASS__ . '.PROCESSFAILED',
+				'Could not process document, please double-check you uploaded a .doc or .docx format.',
+				'Document Converter processes Word documents into HTML.'
+			)];
+		}
+
+		$content = file_get_contents(ASSETS_PATH . $folderName . '/index.html');
+
+		unlink(ASSETS_PATH . $folderName . '/index.html');
+
+		return $content;
+	}
 }

Please login to merge, or discard this patch.

Spacing +10 added lines, -10 removed lines patch added patch discarded remove patch

@@ -88,7 +88,7 @@  discard block
 block discarded – undo
             return $fromConfig;
         }
 
-        $fromEnv = Environment::getEnv('DOCVERT_' . strtoupper($detail));
+        $fromEnv = Environment::getEnv('DOCVERT_'.strtoupper($detail));
         if ($fromEnv) {
             return $fromEnv;
         }
@@ -147,9 +147,9 @@  discard block
 block discarded – undo
         ]);
 
         $chosenFolder = ($this->chosenFolderID) ? DataObject::get_by_id(Folder::class, $this->chosenFolderID) : null;
-        $folderName = ($chosenFolder) ? '/' . $chosenFolder->Name : '';
+        $folderName = ($chosenFolder) ? '/'.$chosenFolder->Name : '';
         $outname = tempnam(ASSETS_PATH, 'convert');
-        $outzip = $outname . '.zip';
+        $outzip = $outname.'.zip';
         $out = fopen($outzip, 'w');
         curl_setopt($ch, CURLOPT_FILE, $out);
         $returnValue = curl_exec($ch);
@@ -160,8 +160,8 @@  discard block
 block discarded – undo
 
         if (!$returnValue || ($status != 200)) {
             return ['error' => _t(
-                __CLASS__ . '.SERVERUNREACHABLE',
-                'Could not contact document conversion server. Please try again later ' .
+                __CLASS__.'.SERVERUNREACHABLE',
+                'Could not contact document conversion server. Please try again later '.
                     'or contact your system administrator.',
                 'Document Converter process Word documents into HTML.'
             )];
@@ -172,7 +172,7 @@  discard block
 block discarded – undo
         $zip = new ZipArchive();
 
         if ($zip->open($outzip)) {
-            $zip->extractTo(ASSETS_PATH .$folderName);
+            $zip->extractTo(ASSETS_PATH.$folderName);
             $zip->close();
         }
 
@@ -180,17 +180,17 @@  discard block
 block discarded – undo
         unlink($outname);
         unlink($outzip);
 
-        if (!file_exists(ASSETS_PATH . $folderName . '/index.html')) {
+        if (!file_exists(ASSETS_PATH.$folderName.'/index.html')) {
             return ['error' =>  _t(
-                __CLASS__ . '.PROCESSFAILED',
+                __CLASS__.'.PROCESSFAILED',
                 'Could not process document, please double-check you uploaded a .doc or .docx format.',
                 'Document Converter processes Word documents into HTML.'
             )];
         }
 
-        $content = file_get_contents(ASSETS_PATH . $folderName . '/index.html');
+        $content = file_get_contents(ASSETS_PATH.$folderName.'/index.html');
 
-        unlink(ASSETS_PATH . $folderName . '/index.html');
+        unlink(ASSETS_PATH.$folderName.'/index.html');
 
         return $content;
     }

Please login to merge, or discard this patch.

tests/Stubs/TestPage.php 1 patch

Indentation +5 added lines, -5 removed lines patch added patch discarded remove patch

@@ -8,9 +8,9 @@
 block discarded – undo
 
 class TestPage extends Page implements TestOnly
 {
-    private static $table_name = 'DocvertTestPage';
-    private static $extensions = [PageExtension::class];
-    private static $defaults = [
-        'Content' => '<h1>Default TestPage</h1><p>With pre-import content.</p>'
-    ];
+	private static $table_name = 'DocvertTestPage';
+	private static $extensions = [PageExtension::class];
+	private static $defaults = [
+		'Content' => '<h1>Default TestPage</h1><p>With pre-import content.</p>'
+	];
 }

Please login to merge, or discard this patch.

		@@ -88,7 +88,7 @@ discard block
		block discarded – undo
88	88	return $fromConfig;
89	89	}
90	90
91		- $fromEnv = Environment::getEnv('DOCVERT_' . strtoupper($detail));
	91	+ $fromEnv = Environment::getEnv('DOCVERT_'.strtoupper($detail));
92	92	if ($fromEnv) {
93	93	return $fromEnv;
94	94	}
		@@ -147,9 +147,9 @@ discard block
		block discarded – undo
147	147	]);
148	148
149	149	$chosenFolder = ($this->chosenFolderID) ? DataObject::get_by_id(Folder::class, $this->chosenFolderID) : null;
150		- $folderName = ($chosenFolder) ? '/' . $chosenFolder->Name : '';
	150	+ $folderName = ($chosenFolder) ? '/'.$chosenFolder->Name : '';
151	151	$outname = tempnam(ASSETS_PATH, 'convert');
152		- $outzip = $outname . '.zip';
	152	+ $outzip = $outname.'.zip';
153	153	$out = fopen($outzip, 'w');
154	154	curl_setopt($ch, CURLOPT_FILE, $out);
155	155	$returnValue = curl_exec($ch);
		@@ -160,8 +160,8 @@ discard block
		block discarded – undo
160	160
161	161	if (!$returnValue \|\| ($status != 200)) {
162	162	return ['error' => _t(
163		- __CLASS__ . '.SERVERUNREACHABLE',
164		- 'Could not contact document conversion server. Please try again later ' .
	163	+ __CLASS__.'.SERVERUNREACHABLE',
	164	+ 'Could not contact document conversion server. Please try again later '.
165	165	'or contact your system administrator.',
166	166	'Document Converter process Word documents into HTML.'
167	167	)];
		@@ -172,7 +172,7 @@ discard block
		block discarded – undo
172	172	$zip = new ZipArchive();
173	173
174	174	if ($zip->open($outzip)) {
175		- $zip->extractTo(ASSETS_PATH .$folderName);
	175	+ $zip->extractTo(ASSETS_PATH.$folderName);
176	176	$zip->close();
177	177	}
178	178
		@@ -180,17 +180,17 @@ discard block
		block discarded – undo
180	180	unlink($outname);
181	181	unlink($outzip);
182	182
183		- if (!file_exists(ASSETS_PATH . $folderName . '/index.html')) {
	183	+ if (!file_exists(ASSETS_PATH.$folderName.'/index.html')) {
184	184	return ['error' => _t(
185		- __CLASS__ . '.PROCESSFAILED',
	185	+ __CLASS__.'.PROCESSFAILED',
186	186	'Could not process document, please double-check you uploaded a .doc or .docx format.',
187	187	'Document Converter processes Word documents into HTML.'
188	188	)];
189	189	}
190	190
191		- $content = file_get_contents(ASSETS_PATH . $folderName . '/index.html');
	191	+ $content = file_get_contents(ASSETS_PATH.$folderName.'/index.html');
192	192
193		- unlink(ASSETS_PATH . $folderName . '/index.html');
	193	+ unlink(ASSETS_PATH.$folderName.'/index.html');
194	194
195	195	return $content;
196	196	}

		@@ -16,182 +16,182 @@
		block discarded – undo
16	16	class ServiceConnector
17	17	{
18	18
19		- use Configurable;
20		- use Injectable;
21		-
22		- /**
23		- * @config
24		- * @var array Docvert connection username
25		- */
26		- private static $username = null;
27		-
28		- /**
29		- * @config
30		- * @var array Docvert connection password
31		- */
32		- private static $password = null;
33		-
34		- /**
35		- * @config
36		- * @var array Docvert service URL
37		- */
38		- private static $url = null;
39		-
40		- /**
41		- * Associative array of:
42		- * - name: the full name of the file including the extension.
43		- * - path: the path to the file on the local filesystem.
44		- * - mimeType
45		- */
46		- protected $fileDescriptor;
47		-
48		- /**
49		- * @var int
50		- * ID of a SilverStripe\Assets\Folder
51		- */
52		- protected $chosenFolderID;
53		-
54		- /**
55		- * @var array instance specific connection details
56		- */
57		- protected $docvertDetails = [
58		- 'username' => null,
59		- 'password' => null,
60		- 'url' => null
61		- ];
62		-
63		- public function __construct($fileDescriptor, $chosenFolderID = null)
64		- {
65		- $this->fileDescriptor = $fileDescriptor;
66		- $this->chosenFolderID = $chosenFolderID;
67		- }
68		-
69		-
70		- /**
71		- * Retrieves detail in priority order from
72		- * 1. local instance field
73		- * 2. Config
74		- * 3. Environment
75		- *
76		- * @param string $detail key name for detail
77		- * @return string the value for that key
78		- */
79		- protected function getDetail($detail)
80		- {
81		- $fromDetails = $this->docvertDetails[$detail];
82		- if ($fromDetails) {
83		- return $fromDetails;
84		- }
	19	+ use Configurable;
	20	+ use Injectable;
	21	+
	22	+ /**
	23	+ * @config
	24	+ * @var array Docvert connection username
	25	+ */
	26	+ private static $username = null;
	27	+
	28	+ /**
	29	+ * @config
	30	+ * @var array Docvert connection password
	31	+ */
	32	+ private static $password = null;
	33	+
	34	+ /**
	35	+ * @config
	36	+ * @var array Docvert service URL
	37	+ */
	38	+ private static $url = null;
	39	+
	40	+ /**
	41	+ * Associative array of:
	42	+ * - name: the full name of the file including the extension.
	43	+ * - path: the path to the file on the local filesystem.
	44	+ * - mimeType
	45	+ */
	46	+ protected $fileDescriptor;
	47	+
	48	+ /**
	49	+ * @var int
	50	+ * ID of a SilverStripe\Assets\Folder
	51	+ */
	52	+ protected $chosenFolderID;
	53	+
	54	+ /**
	55	+ * @var array instance specific connection details
	56	+ */
	57	+ protected $docvertDetails = [
	58	+ 'username' => null,
	59	+ 'password' => null,
	60	+ 'url' => null
	61	+ ];
	62	+
	63	+ public function __construct($fileDescriptor, $chosenFolderID = null)
	64	+ {
	65	+ $this->fileDescriptor = $fileDescriptor;
	66	+ $this->chosenFolderID = $chosenFolderID;
	67	+ }
	68	+
	69	+
	70	+ /**
	71	+ * Retrieves detail in priority order from
	72	+ * 1. local instance field
	73	+ * 2. Config
	74	+ * 3. Environment
	75	+ *
	76	+ * @param string $detail key name for detail
	77	+ * @return string the value for that key
	78	+ */
	79	+ protected function getDetail($detail)
	80	+ {
	81	+ $fromDetails = $this->docvertDetails[$detail];
	82	+ if ($fromDetails) {
	83	+ return $fromDetails;
	84	+ }
85	85
86		- $fromConfig = $this->config()->get($detail);
87		- if ($fromConfig) {
88		- return $fromConfig;
89		- }
90		-
91		- $fromEnv = Environment::getEnv('DOCVERT_' . strtoupper($detail));
92		- if ($fromEnv) {
93		- return $fromEnv;
94		- }
95		- }
96		-
97		- public function setUsername($username = null)
98		- {
99		- $this->docvertDetails['username'] = $username;
100		- return $this;
101		- }
102		-
103		- public function getUsername()
104		- {
105		- return $this->getDetail('username');
106		- }
107		-
108		- public function setPassword($password = null)
109		- {
110		- $this->docvertDetails['password'] = $password;
111		- return $this;
112		- }
113		-
114		- public function getPassword()
115		- {
116		- return $this->getDetail('password');
117		- }
118		-
119		- public function setUrl($url = null)
120		- {
121		- $this->docvertDetails['url'] = $url;
122		- return $this;
123		- }
124		-
125		- public function getUrl()
126		- {
127		- return $this->getDetail('url');
128		- }
129		-
130		- public function import()
131		- {
132		- $ch = curl_init();
133		-
134		- $file = new CURLFile(
135		- $this->fileDescriptor['path'],
136		- $this->fileDescriptor['mimeType'],
137		- $this->fileDescriptor['name']
138		- );
139		-
140		- curl_setopt_array($ch, [
141		- CURLOPT_URL => $this->getUrl(),
142		- CURLOPT_USERPWD => sprintf('%s:%s', $this->getUsername(), $this->getPassword()),
143		- CURLOPT_POST => 1,
144		- CURLOPT_POSTFIELDS => ['file' => $file],
145		- CURLOPT_CONNECTTIMEOUT => 25,
146		- CURLOPT_TIMEOUT => 100,
147		- ]);
148		-
149		- $chosenFolder = ($this->chosenFolderID) ? DataObject::get_by_id(Folder::class, $this->chosenFolderID) : null;
150		- $folderName = ($chosenFolder) ? '/' . $chosenFolder->Name : '';
151		- $outname = tempnam(ASSETS_PATH, 'convert');
152		- $outzip = $outname . '.zip';
153		- $out = fopen($outzip, 'w');
154		- curl_setopt($ch, CURLOPT_FILE, $out);
155		- $returnValue = curl_exec($ch);
156		- $status = curl_getinfo($ch, CURLINFO_HTTP_CODE);
157		- curl_close($ch);
158		- fclose($out);
159		- chmod($outzip, 0666);
160		-
161		- if (!$returnValue \|\| ($status != 200)) {
162		- return ['error' => _t(
163		- __CLASS__ . '.SERVERUNREACHABLE',
164		- 'Could not contact document conversion server. Please try again later ' .
165		- 'or contact your system administrator.',
166		- 'Document Converter process Word documents into HTML.'
167		- )];
168		- }
169		-
170		- // extract the converted document into assets
171		- // you need php zip, e.g. apt-get install php-zip
172		- $zip = new ZipArchive();
173		-
174		- if ($zip->open($outzip)) {
175		- $zip->extractTo(ASSETS_PATH .$folderName);
176		- $zip->close();
177		- }
178		-
179		- // remove temporary files
180		- unlink($outname);
181		- unlink($outzip);
182		-
183		- if (!file_exists(ASSETS_PATH . $folderName . '/index.html')) {
184		- return ['error' => _t(
185		- __CLASS__ . '.PROCESSFAILED',
186		- 'Could not process document, please double-check you uploaded a .doc or .docx format.',
187		- 'Document Converter processes Word documents into HTML.'
188		- )];
189		- }
190		-
191		- $content = file_get_contents(ASSETS_PATH . $folderName . '/index.html');
192		-
193		- unlink(ASSETS_PATH . $folderName . '/index.html');
194		-
195		- return $content;
196		- }
	86	+ $fromConfig = $this->config()->get($detail);
	87	+ if ($fromConfig) {
	88	+ return $fromConfig;
	89	+ }
	90	+
	91	+ $fromEnv = Environment::getEnv('DOCVERT_' . strtoupper($detail));
	92	+ if ($fromEnv) {
	93	+ return $fromEnv;
	94	+ }
	95	+ }
	96	+
	97	+ public function setUsername($username = null)
	98	+ {
	99	+ $this->docvertDetails['username'] = $username;
	100	+ return $this;
	101	+ }
	102	+
	103	+ public function getUsername()
	104	+ {
	105	+ return $this->getDetail('username');
	106	+ }
	107	+
	108	+ public function setPassword($password = null)
	109	+ {
	110	+ $this->docvertDetails['password'] = $password;
	111	+ return $this;
	112	+ }
	113	+
	114	+ public function getPassword()
	115	+ {
	116	+ return $this->getDetail('password');
	117	+ }
	118	+
	119	+ public function setUrl($url = null)
	120	+ {
	121	+ $this->docvertDetails['url'] = $url;
	122	+ return $this;
	123	+ }
	124	+
	125	+ public function getUrl()
	126	+ {
	127	+ return $this->getDetail('url');
	128	+ }
	129	+
	130	+ public function import()
	131	+ {
	132	+ $ch = curl_init();
	133	+
	134	+ $file = new CURLFile(
	135	+ $this->fileDescriptor['path'],
	136	+ $this->fileDescriptor['mimeType'],
	137	+ $this->fileDescriptor['name']
	138	+ );
	139	+
	140	+ curl_setopt_array($ch, [
	141	+ CURLOPT_URL => $this->getUrl(),
	142	+ CURLOPT_USERPWD => sprintf('%s:%s', $this->getUsername(), $this->getPassword()),
	143	+ CURLOPT_POST => 1,
	144	+ CURLOPT_POSTFIELDS => ['file' => $file],
	145	+ CURLOPT_CONNECTTIMEOUT => 25,
	146	+ CURLOPT_TIMEOUT => 100,
	147	+ ]);
	148	+
	149	+ $chosenFolder = ($this->chosenFolderID) ? DataObject::get_by_id(Folder::class, $this->chosenFolderID) : null;
	150	+ $folderName = ($chosenFolder) ? '/' . $chosenFolder->Name : '';
	151	+ $outname = tempnam(ASSETS_PATH, 'convert');
	152	+ $outzip = $outname . '.zip';
	153	+ $out = fopen($outzip, 'w');
	154	+ curl_setopt($ch, CURLOPT_FILE, $out);
	155	+ $returnValue = curl_exec($ch);
	156	+ $status = curl_getinfo($ch, CURLINFO_HTTP_CODE);
	157	+ curl_close($ch);
	158	+ fclose($out);
	159	+ chmod($outzip, 0666);
	160	+
	161	+ if (!$returnValue \|\| ($status != 200)) {
	162	+ return ['error' => _t(
	163	+ __CLASS__ . '.SERVERUNREACHABLE',
	164	+ 'Could not contact document conversion server. Please try again later ' .
	165	+ 'or contact your system administrator.',
	166	+ 'Document Converter process Word documents into HTML.'
	167	+ )];
	168	+ }
	169	+
	170	+ // extract the converted document into assets
	171	+ // you need php zip, e.g. apt-get install php-zip
	172	+ $zip = new ZipArchive();
	173	+
	174	+ if ($zip->open($outzip)) {
	175	+ $zip->extractTo(ASSETS_PATH .$folderName);
	176	+ $zip->close();
	177	+ }
	178	+
	179	+ // remove temporary files
	180	+ unlink($outname);
	181	+ unlink($outzip);
	182	+
	183	+ if (!file_exists(ASSETS_PATH . $folderName . '/index.html')) {
	184	+ return ['error' => _t(
	185	+ __CLASS__ . '.PROCESSFAILED',
	186	+ 'Could not process document, please double-check you uploaded a .doc or .docx format.',
	187	+ 'Document Converter processes Word documents into HTML.'
	188	+ )];
	189	+ }
	190	+
	191	+ $content = file_get_contents(ASSETS_PATH . $folderName . '/index.html');
	192	+
	193	+ unlink(ASSETS_PATH . $folderName . '/index.html');
	194	+
	195	+ return $content;
	196	+ }
197	197	}

		@@ -8,9 +8,9 @@
		block discarded – undo
8	8
9	9	class TestPage extends Page implements TestOnly
10	10	{
11		- private static $table_name = 'DocvertTestPage';
12		- private static $extensions = [PageExtension::class];
13		- private static $defaults = [
14		- 'Content' => '<h1>Default TestPage</h1><p>With pre-import content.</p>'
15		- ];
	11	+ private static $table_name = 'DocvertTestPage';
	12	+ private static $extensions = [PageExtension::class];
	13	+ private static $defaults = [
	14	+ 'Content' => '<h1>Default TestPage</h1><p>With pre-import content.</p>'
	15	+ ];
16	16	}

silverstripe / silverstripe-documentconverter

Push — master ( 54ac69...cc2d4e )

Status

Category

Indentation +447 added lines, -447 removed lines patch added patch discarded remove patch

Indentation +177 added lines, -177 removed lines patch added patch discarded remove patch

Spacing +10 added lines, -10 removed lines patch added patch discarded remove patch

Indentation +5 added lines, -5 removed lines patch added patch discarded remove patch