| 1 |  |  | <?php | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2 |  |  | /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3 |  |  |  * DocX Parser | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4 |  |  |  * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5 |  |  |  * For namespaces use | instead of : | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6 |  |  |  * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 7 |  |  |  *  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 8 |  |  |  * @author Emily Brand | 
            
                                                                                                            
                            
            
                                    
            
            
                | 9 |  |  |  * @license LGPL The GNU Lesser GPL (LGPL) or an MIT-like license. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 10 |  |  |  * @see http://www.urbandictionary.com/ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 11 |  |  |  */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 12 |  |  | require_once '../src/QueryPath/QueryPath.php'; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 13 |  |  | $path = 'http://eabrand.com/images/test.docx'; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 14 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 15 |  |  | //$path = 'docx_document.xml'; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 16 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 17 |  |  | $data = docx2text('test.docx'); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 18 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 19 |  |  | $path = $data; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 20 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 21 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 22 |  |  | foreach(qp($path, 'w|p') as $qp) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 23 |  |  |   $qr = $qp->branch(); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 24 |  |  |   print format($qr->find('w|r:first'), 'w|r:first').' '; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 25 |  |  |   $qp->find('w|r:first'); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 26 |  |  |   while($qp->next('w|r')->html() != null) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 27 |  |  |     $qr = $qp->branch(); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 28 |  |  |     print format($qr->find('w|r'), 'w|r').' '; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 29 |  |  |     //    print $qp->text(); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 30 |  |  |   } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 31 |  |  |   print '</br>'; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 32 |  |  | } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 33 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 34 |  |  | /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 35 |  |  |  * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 36 |  |  |  * @param QueryPath $qp | 
            
                                                                                                            
                            
            
                                    
            
            
                | 37 |  |  |  * @param String $findSelector | 
            
                                                                                                            
                            
            
                                    
            
            
                | 38 |  |  |  * @return String | 
            
                                                                                                            
                            
            
                                    
            
            
                | 39 |  |  |  */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 40 |  |  | function format($qp, $findSelector = null) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 41 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 42 |  |  |   // Create a new branch for printing later. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 43 |  |  |   $qr = $qp->branch(); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 44 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 45 |  |  |   $text = ""; | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 46 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 47 |  |  |   $text = $qr->find($findSelector)->find('w|t')->text(); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 48 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 49 |  |  |   $text = (checkUnderline($qp->branch())) ? '<u>'.$text.'</u>' : $text; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 50 |  |  |   $text = (checkBold($qp->branch())) ? '<b>'.$text.'</b>' : $text; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 51 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 52 |  |  |   return $text; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 53 |  |  | } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 54 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 55 |  |  | /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 56 |  |  |  * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 57 |  |  |  * @param QueryPath $qp | 
            
                                                                                                            
                            
            
                                    
            
            
                | 58 |  |  |  * @return String | 
            
                                                                                                            
                            
            
                                    
            
            
                | 59 |  |  |  */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 60 |  |  | function checkBold($qp) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 61 |  |  |   $qp->children("w|rPr"); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 62 |  |  |   return ($qp->children('w|b')->html()) ? true : false; | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 63 |  |  | } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 64 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 65 |  |  | /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 66 |  |  |  * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 67 |  |  |  * @param QueryPath $qp | 
            
                                                                                                            
                            
            
                                    
            
            
                | 68 |  |  |  * @return String | 
            
                                                                                                            
                            
            
                                    
            
            
                | 69 |  |  |  */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 70 |  |  | function checkUnderline($qp) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 71 |  |  |   $qp->children("w|rPr"); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 72 |  |  |   return ($qp->children('w|u')->html()) ? true : false; | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 73 |  |  | } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 74 |  |  |  | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 75 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 76 |  |  | function docx2text($filename) { | 
            
                                                                        
                            
            
                                    
            
            
                | 77 |  |  |     return readZippedXML($filename, "word/document.xml"); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 78 |  |  | } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 79 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 80 |  |  | function readZippedXML($archiveFile, $dataFile) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 81 |  |  |   if (!class_exists('ZipArchive', false)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 82 |  |  |       return "ZipArchive Class Doesn't Exist."; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 83 |  |  |   } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 84 |  |  |     // Create new ZIP archive | 
            
                                                                                                            
                            
            
                                    
            
            
                | 85 |  |  |     $zip = new ZipArchive(); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 86 |  |  |     // Open received archive file | 
            
                                                                                                            
                            
            
                                    
            
            
                | 87 |  |  |     if (true === $zip->open($archiveFile)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 88 |  |  |         // If done, search for the data file in the archive | 
            
                                                                                                            
                            
            
                                    
            
            
                | 89 |  |  |         if (($index = $zip->locateName($dataFile)) !== false) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 90 |  |  |             // If found, read it to the string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 91 |  |  |             $data = $zip->getFromIndex($index); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 92 |  |  |             // Close archive file | 
            
                                                                                                            
                            
            
                                    
            
            
                | 93 |  |  |             $zip->close(); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 94 |  |  |             // Load XML from a string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 95 |  |  |             // Skip errors and warnings | 
            
                                                                                                            
                            
            
                                    
            
            
                | 96 |  |  |             return $data; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 97 |  |  | //            $xml = DOMDocument::loadXML($data, LIBXML_NOENT | LIBXML_XINCLUDE | LIBXML_NOERROR | LIBXML_NOWARNING); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 98 |  |  | //            // Return data without XML formatting tags | 
            
                                                                                                            
                            
            
                                    
            
            
                | 99 |  |  | //            return strip_tags($xml->saveXML()); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 100 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 101 |  |  |         $zip->close(); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 102 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 103 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 104 |  |  |     // In case of failure return empty string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 105 |  |  |     return $zip->getStatusString(); | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 106 |  |  | }  | 
            
                                                        
            
                                    
            
            
                | 107 |  |  |  |