1 | <?php |
||
2 | /** |
||
3 | * @file |
||
4 | * |
||
5 | * QueryPath functions. |
||
6 | * |
||
7 | * This file holds the QueryPath functions, qp() and htmlqp(). |
||
8 | * |
||
9 | * Usage: |
||
10 | * |
||
11 | * @code |
||
12 | * <?php |
||
13 | * require 'qp.php'; |
||
14 | * |
||
15 | * qp($xml)->find('foo')->count(); |
||
16 | * ?> |
||
17 | * @endcode |
||
18 | */ |
||
19 | |||
20 | use QueryPath\QueryPath; |
||
21 | |||
22 | /** @addtogroup querypath_core Core API |
||
23 | * Core classes and functions for QueryPath. |
||
24 | * |
||
25 | * These are the classes, objects, and functions that developers who use QueryPath |
||
26 | * are likely to use. The qp() and htmlqp() functions are the best place to start, |
||
27 | * while most of the frequently used methods are part of the QueryPath object. |
||
28 | */ |
||
29 | |||
30 | /** @addtogroup querypath_util Utilities |
||
31 | * Utility classes for QueryPath. |
||
32 | * |
||
33 | * These classes add important, but less-often used features to QueryPath. Some of |
||
34 | * these are used transparently (QueryPathIterator). Others you can use directly in your |
||
35 | * code (QueryPathEntities). |
||
36 | */ |
||
37 | |||
38 | /** @namespace QueryPath |
||
39 | * The core classes that compose QueryPath. |
||
40 | * |
||
41 | * The QueryPath classes contain the brunt of the QueryPath code. If you are |
||
42 | * interested in working with just the CSS engine, you may want to look at CssEventHandler, |
||
43 | * which can be used without the rest of QueryPath. If you are interested in looking |
||
44 | * carefully at QueryPath's implementation details, then the QueryPath class is where you |
||
45 | * should begin. If you are interested in writing extensions, than you may want to look at |
||
46 | * QueryPathExtension, and also at some of the simple extensions, such as QPXML. |
||
47 | */ |
||
48 | |||
49 | /** |
||
50 | * Build a new Query Path. |
||
51 | * This builds a new Query Path object. The new object can be used for |
||
52 | * reading, search, and modifying a document. |
||
53 | * |
||
54 | * While it is permissible to directly create new instances of a QueryPath |
||
55 | * implementation, it is not advised. Instead, you should use this function |
||
56 | * as a factory. |
||
57 | * |
||
58 | * Example: |
||
59 | * |
||
60 | * @code |
||
61 | * <?php |
||
62 | * qp(); // New empty QueryPath |
||
63 | * qp('path/to/file.xml'); // From a file |
||
64 | * qp('<html><head></head><body></body></html>'); // From HTML or XML |
||
65 | * qp(QueryPath::XHTML_STUB); // From a basic HTML document. |
||
66 | * qp(QueryPath::XHTML_STUB, 'title'); // Create one from a basic HTML doc and position it at the title element. |
||
67 | * |
||
68 | * // Most of the time, methods are chained directly off of this call. |
||
69 | * qp(QueryPath::XHTML_STUB, 'body')->append('<h1>Title</h1>')->addClass('body-class'); |
||
70 | * ?> |
||
71 | * @endcode |
||
72 | * |
||
73 | * This function is used internally by QueryPath. Anything that modifies the |
||
74 | * behavior of this function may also modify the behavior of common QueryPath |
||
75 | * methods. |
||
76 | * |
||
77 | * <b>Types of documents that QueryPath can support</b> |
||
78 | * |
||
79 | * qp() can take any of these as its first argument: |
||
80 | * |
||
81 | * - A string of XML or HTML (See {@link XHTML_STUB}) |
||
82 | * - A path on the file system or a URL |
||
83 | * - A {@link DOMDocument} object |
||
84 | * - A {@link SimpleXMLElement} object. |
||
85 | * - A {@link DOMNode} object. |
||
86 | * - An array of {@link DOMNode} objects (generally {@link DOMElement} nodes). |
||
87 | * - Another {@link QueryPath} object. |
||
88 | * |
||
89 | * Keep in mind that most features of QueryPath operate on elements. Other |
||
90 | * sorts of DOMNodes might not work with all features. |
||
91 | * |
||
92 | * <b>Supported Options</b> |
||
93 | * - context: A stream context object. This is used to pass context info |
||
94 | * to the underlying file IO subsystem. |
||
95 | * - encoding: A valid character encoding, such as 'utf-8' or 'ISO-8859-1'. |
||
96 | * The default is system-dependant, typically UTF-8. Note that this is |
||
97 | * only used when creating new documents, not when reading existing content. |
||
98 | * (See convert_to_encoding below.) |
||
99 | * - parser_flags: An OR-combined set of parser flags. The flags supported |
||
100 | * by the DOMDocument PHP class are all supported here. |
||
101 | * - omit_xml_declaration: Boolean. If this is TRUE, then certain output |
||
102 | * methods (like {@link QueryPath::xml()}) will omit the XML declaration |
||
103 | * from the beginning of a document. |
||
104 | * - format_output: Boolean. If this is set to TRUE, QueryPath will format |
||
105 | * the HTML or XML output to make it more readible. If this is set to |
||
106 | * FALSE, QueryPath will minimize whitespace to keep the document smaller |
||
107 | * but harder to read. |
||
108 | * - replace_entities: Boolean. If this is TRUE, then any of the insertion |
||
109 | * functions (before(), append(), etc.) will replace named entities with |
||
110 | * their decimal equivalent, and will replace un-escaped ampersands with |
||
111 | * a numeric entity equivalent. |
||
112 | * - ignore_parser_warnings: Boolean. If this is TRUE, then E_WARNING messages |
||
113 | * generated by the XML parser will not cause QueryPath to throw an exception. |
||
114 | * This is useful when parsing |
||
115 | * badly mangled HTML, or when failure to find files should not result in |
||
116 | * an exception. By default, this is FALSE -- that is, parsing warnings and |
||
117 | * IO warnings throw exceptions. |
||
118 | * - convert_to_encoding: Use the MB library to convert the document to the |
||
119 | * named encoding before parsing. This is useful for old HTML (set it to |
||
120 | * iso-8859-1 for best results). If this is not supplied, no character set |
||
121 | * conversion will be performed. See {@link mb_convert_encoding()}. |
||
122 | * (QueryPath 1.3 and later) |
||
123 | * - convert_from_encoding: If 'convert_to_encoding' is set, this option can be |
||
124 | * used to explicitly define what character set the source document is using. |
||
125 | * By default, QueryPath will allow the MB library to guess the encoding. |
||
126 | * (QueryPath 1.3 and later) |
||
127 | * - strip_low_ascii: If this is set to TRUE then markup will have all low ASCII |
||
128 | * characters (<32) stripped out before parsing. This is good in cases where |
||
129 | * icky HTML has (illegal) low characters in the document. |
||
130 | * - use_parser: If 'xml', Parse the document as XML. If 'html', parse the |
||
131 | * document as HTML. Note that the XML parser is very strict, while the |
||
132 | * HTML parser is more lenient, but does enforce some of the DTD/Schema. |
||
133 | * <i>By default, QueryPath autodetects the type.</i> |
||
134 | * - escape_xhtml_js_css_sections: XHTML needs script and css sections to be |
||
135 | * escaped. Yet older readers do not handle CDATA sections, and comments do not |
||
136 | * work properly (for numerous reasons). By default, QueryPath's *XHTML methods |
||
137 | * will wrap a script body with a CDATA declaration inside of C-style comments. |
||
138 | * If you want to change this, you can set this option with one of the |
||
139 | * JS_CSS_ESCAPE_* constants, or you can write your own. |
||
140 | * - QueryPath_class: (ADVANCED) Use this to set the actual classname that |
||
141 | * {@link qp()} loads as a QueryPath instance. It is assumed that the |
||
142 | * class is either {@link QueryPath} or a subclass thereof. See the test |
||
143 | * cases for an example. |
||
144 | * |
||
145 | * @ingroup querypath_core |
||
146 | * @param mixed $document |
||
147 | * A document in one of the forms listed above. |
||
148 | * @param string $string |
||
149 | * A CSS 3 selector. |
||
150 | * @param array $options |
||
151 | * An associative array of options. Currently supported options are listed above. |
||
152 | * @return \QueryPath\DOMQuery |
||
153 | * Or possibly another QueryPath-like object if you overrode QueryPath_class. |
||
154 | */ |
||
155 | function qp($document = NULL, $string = NULL, array $options = []) |
||
156 | { |
||
157 | return QueryPath::with($document, $string, $options); |
||
0 ignored issues
–
show
Bug
introduced
by
![]() |
|||
158 | } |
||
159 | |||
160 | /** |
||
161 | * A special-purpose version of {@link qp()} designed specifically for HTML. |
||
162 | * |
||
163 | * XHTML (if valid) can be easily parsed by {@link qp()} with no problems. However, |
||
164 | * because of the way that libxml handles HTML, there are several common steps that |
||
165 | * need to be taken to reliably parse non-XML HTML documents. This function is |
||
166 | * a convenience tool for configuring QueryPath to parse HTML. |
||
167 | * |
||
168 | * The following options are automatically set unless overridden: |
||
169 | * - ignore_parser_warnings: TRUE |
||
170 | * - convert_to_encoding: ISO-8859-1 (the best for the HTML parser). |
||
171 | * - convert_from_encoding: auto (autodetect encoding) |
||
172 | * - use_parser: html |
||
173 | * |
||
174 | * Parser warning messages are also suppressed, so if the parser emits a warning, |
||
175 | * the application will not be notified. This is equivalent to |
||
176 | * calling @code@qp()@endcode. |
||
177 | * |
||
178 | * Warning: Character set conversions will only work if the Multi-Byte (mb) library |
||
179 | * is installed and enabled. This is usually enabled, but not always. |
||
180 | * |
||
181 | * @ingroup querypath_core |
||
182 | * @see qp() |
||
183 | * @param null $document |
||
0 ignored issues
–
show
|
|||
184 | * @param null $selector |
||
0 ignored issues
–
show
|
|||
185 | * @param array $options |
||
186 | * @return mixed|\QueryPath\DOMQuery |
||
187 | */ |
||
188 | function htmlqp($document = NULL, $selector = NULL, $options = []) |
||
189 | { |
||
190 | |||
191 | return QueryPath::withHTML($document, $selector, $options); |
||
192 | } |
||
193 | |||
194 | /** |
||
195 | * Parse HTML5 documents. |
||
196 | * |
||
197 | * This uses HTML5-PHP to parse the document. In actuality, this parser does |
||
198 | * a fine job with pre-HTML5 documents in most cases, though really old HTML |
||
199 | * (like 2.0) may have some substantial quirks. |
||
200 | * |
||
201 | * <b>Supported Options</b> |
||
202 | * Any options supported by HTML5-PHP are allowed here. Additionally, the |
||
203 | * following options have meaning to QueryPath. |
||
204 | * - QueryPath_class |
||
205 | * |
||
206 | * |
||
207 | * @param null $document |
||
0 ignored issues
–
show
|
|||
208 | * @param string $selector |
||
209 | * A CSS3 selector. |
||
210 | * |
||
211 | * @param array $options |
||
212 | * An associative array of options, which is passed on into HTML5-PHP. Note |
||
213 | * that the standard QueryPath options may be ignored for this function, |
||
214 | * since it uses a different parser. |
||
215 | * |
||
216 | * @return QueryPath |
||
217 | */ |
||
218 | function html5qp($document = NULL, $selector = NULL, array $options = []) |
||
219 | { |
||
220 | return QueryPath::withHTML5($document, $selector, $options); |
||
221 | } |
||
222 |