Passed
Push — master ( 28bf1e...61fea9 )
by Arthur
02:09
created

readZippedXML()   A

Complexity

Conditions 4
Paths 4

Size

Total Lines 26
Code Lines 10

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 4
eloc 10
nc 4
nop 2
dl 0
loc 26
rs 9.9332
c 1
b 0
f 0
1
<?php
2
/**
3
 * DocX Parser
4
 *
5
 * For namespaces use | instead of :
6
 *
7
 * 
8
 * @author Emily Brand
9
 * @license LGPL The GNU Lesser GPL (LGPL) or an MIT-like license.
10
 * @see http://www.urbandictionary.com/
11
 */
12
require_once '../src/QueryPath/QueryPath.php';
13
$path = 'http://eabrand.com/images/test.docx';
14
15
//$path = 'docx_document.xml';
16
17
$data = docx2text('test.docx');
18
19
$path = $data;
20
21
22
foreach(qp($path, 'w|p') as $qp) {
23
  $qr = $qp->branch();
24
  print format($qr->find('w|r:first'), 'w|r:first').' ';
25
  $qp->find('w|r:first');
26
  while($qp->next('w|r')->html() != null) {
27
    $qr = $qp->branch();
28
    print format($qr->find('w|r'), 'w|r').' ';
29
    //    print $qp->text();
30
  }
31
  print '</br>';
32
}
33
34
/**
35
 *
36
 * @param QueryPath $qp
37
 * @param String $findSelector
38
 * @return String
39
 */
40
function format($qp, $findSelector = null) {
41
42
  // Create a new branch for printing later.
43
  $qr = $qp->branch();
44
45
  $text = "";
0 ignored issues
show
Unused Code introduced by
The assignment to $text is dead and can be removed.
Loading history...
46
47
  $text = $qr->find($findSelector)->find('w|t')->text();
48
49
  $text = (checkUnderline($qp->branch())) ? '<u>'.$text.'</u>' : $text;
50
  $text = (checkBold($qp->branch())) ? '<b>'.$text.'</b>' : $text;
51
52
  return $text;
53
}
54
55
/**
56
 *
57
 * @param QueryPath $qp
58
 * @return String
59
 */
60
function checkBold($qp) {
61
  $qp->children("w|rPr");
62
  return ($qp->children('w|b')->html()) ? true : false;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $qp->children('w|...->html() ? true : false returns the type boolean which is incompatible with the documented return type string.
Loading history...
63
}
64
65
/**
66
 *
67
 * @param QueryPath $qp
68
 * @return String
69
 */
70
function checkUnderline($qp) {
71
  $qp->children("w|rPr");
72
  return ($qp->children('w|u')->html()) ? true : false;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $qp->children('w|...->html() ? true : false returns the type boolean which is incompatible with the documented return type string.
Loading history...
73
}
74
75
76
function docx2text($filename) {
77
    return readZippedXML($filename, "word/document.xml");
78
}
79
80
function readZippedXML($archiveFile, $dataFile) {
81
  if (!class_exists('ZipArchive', false)) {
82
      return "ZipArchive Class Doesn't Exist.";
83
  }
84
    // Create new ZIP archive
85
    $zip = new ZipArchive();
86
    // Open received archive file
87
    if (true === $zip->open($archiveFile)) {
88
        // If done, search for the data file in the archive
89
        if (($index = $zip->locateName($dataFile)) !== false) {
90
            // If found, read it to the string
91
            $data = $zip->getFromIndex($index);
92
            // Close archive file
93
            $zip->close();
94
            // Load XML from a string
95
            // Skip errors and warnings
96
            return $data;
97
//            $xml = DOMDocument::loadXML($data, LIBXML_NOENT | LIBXML_XINCLUDE | LIBXML_NOERROR | LIBXML_NOWARNING);
98
//            // Return data without XML formatting tags
99
//            return strip_tags($xml->saveXML());
100
        }
101
        $zip->close();
102
    }
103
104
    // In case of failure return empty string
105
    return $zip->getStatusString();
106
} 
107