arthurkushman /
querypath
| 1 | <?php |
||
| 2 | /** |
||
| 3 | * DocX Parser |
||
| 4 | * |
||
| 5 | * For namespaces use | instead of : |
||
| 6 | * |
||
| 7 | * |
||
| 8 | * @author Emily Brand |
||
| 9 | * @license LGPL The GNU Lesser GPL (LGPL) or an MIT-like license. |
||
| 10 | * @see http://www.urbandictionary.com/ |
||
| 11 | */ |
||
| 12 | require_once '../src/QueryPath/QueryPath.php'; |
||
| 13 | $path = 'http://eabrand.com/images/test.docx'; |
||
| 14 | |||
| 15 | //$path = 'docx_document.xml'; |
||
| 16 | |||
| 17 | $data = docx2text('test.docx'); |
||
| 18 | |||
| 19 | $path = $data; |
||
| 20 | |||
| 21 | |||
| 22 | foreach(qp($path, 'w|p') as $qp) { |
||
| 23 | $qr = $qp->branch(); |
||
| 24 | print format($qr->find('w|r:first'), 'w|r:first').' '; |
||
| 25 | $qp->find('w|r:first'); |
||
| 26 | while($qp->next('w|r')->html() != null) { |
||
| 27 | $qr = $qp->branch(); |
||
| 28 | print format($qr->find('w|r'), 'w|r').' '; |
||
| 29 | // print $qp->text(); |
||
| 30 | } |
||
| 31 | print '</br>'; |
||
| 32 | } |
||
| 33 | |||
| 34 | /** |
||
| 35 | * |
||
| 36 | * @param QueryPath $qp |
||
| 37 | * @param String $findSelector |
||
| 38 | * @return String |
||
| 39 | */ |
||
| 40 | function format($qp, $findSelector = null) { |
||
| 41 | |||
| 42 | // Create a new branch for printing later. |
||
| 43 | $qr = $qp->branch(); |
||
| 44 | |||
| 45 | $text = ""; |
||
|
0 ignored issues
–
show
Unused Code
introduced
by
Loading history...
|
|||
| 46 | |||
| 47 | $text = $qr->find($findSelector)->find('w|t')->text(); |
||
| 48 | |||
| 49 | $text = (checkUnderline($qp->branch())) ? '<u>'.$text.'</u>' : $text; |
||
| 50 | $text = (checkBold($qp->branch())) ? '<b>'.$text.'</b>' : $text; |
||
| 51 | |||
| 52 | return $text; |
||
| 53 | } |
||
| 54 | |||
| 55 | /** |
||
| 56 | * |
||
| 57 | * @param QueryPath $qp |
||
| 58 | * @return String |
||
| 59 | */ |
||
| 60 | function checkBold($qp) { |
||
| 61 | $qp->children("w|rPr"); |
||
| 62 | return ($qp->children('w|b')->html()) ? true : false; |
||
|
0 ignored issues
–
show
|
|||
| 63 | } |
||
| 64 | |||
| 65 | /** |
||
| 66 | * |
||
| 67 | * @param QueryPath $qp |
||
| 68 | * @return String |
||
| 69 | */ |
||
| 70 | function checkUnderline($qp) { |
||
| 71 | $qp->children("w|rPr"); |
||
| 72 | return ($qp->children('w|u')->html()) ? true : false; |
||
|
0 ignored issues
–
show
|
|||
| 73 | } |
||
| 74 | |||
| 75 | |||
| 76 | function docx2text($filename) { |
||
| 77 | return readZippedXML($filename, "word/document.xml"); |
||
| 78 | } |
||
| 79 | |||
| 80 | function readZippedXML($archiveFile, $dataFile) { |
||
| 81 | if (!class_exists('ZipArchive', false)) { |
||
| 82 | return "ZipArchive Class Doesn't Exist."; |
||
| 83 | } |
||
| 84 | // Create new ZIP archive |
||
| 85 | $zip = new ZipArchive(); |
||
| 86 | // Open received archive file |
||
| 87 | if (true === $zip->open($archiveFile)) { |
||
| 88 | // If done, search for the data file in the archive |
||
| 89 | if (($index = $zip->locateName($dataFile)) !== false) { |
||
| 90 | // If found, read it to the string |
||
| 91 | $data = $zip->getFromIndex($index); |
||
| 92 | // Close archive file |
||
| 93 | $zip->close(); |
||
| 94 | // Load XML from a string |
||
| 95 | // Skip errors and warnings |
||
| 96 | return $data; |
||
| 97 | // $xml = DOMDocument::loadXML($data, LIBXML_NOENT | LIBXML_XINCLUDE | LIBXML_NOERROR | LIBXML_NOWARNING); |
||
| 98 | // // Return data without XML formatting tags |
||
| 99 | // return strip_tags($xml->saveXML()); |
||
| 100 | } |
||
| 101 | $zip->close(); |
||
| 102 | } |
||
| 103 | |||
| 104 | // In case of failure return empty string |
||
| 105 | return $zip->getStatusString(); |
||
| 106 | } |
||
| 107 |