arthurkushman /
querypath
| 1 | <?php |
||
| 2 | /** |
||
| 3 | * @file |
||
| 4 | * |
||
| 5 | * QueryPath functions. |
||
| 6 | * |
||
| 7 | * This file holds the QueryPath functions, qp() and htmlqp(). |
||
| 8 | * |
||
| 9 | * Usage: |
||
| 10 | * |
||
| 11 | * @code |
||
| 12 | * <?php |
||
| 13 | * require 'qp.php'; |
||
| 14 | * |
||
| 15 | * qp($xml)->find('foo')->count(); |
||
| 16 | * ?> |
||
| 17 | * @endcode |
||
| 18 | */ |
||
| 19 | |||
| 20 | use QueryPath\QueryPath; |
||
| 21 | |||
| 22 | /** @addtogroup querypath_core Core API |
||
| 23 | * Core classes and functions for QueryPath. |
||
| 24 | * |
||
| 25 | * These are the classes, objects, and functions that developers who use QueryPath |
||
| 26 | * are likely to use. The qp() and htmlqp() functions are the best place to start, |
||
| 27 | * while most of the frequently used methods are part of the QueryPath object. |
||
| 28 | */ |
||
| 29 | |||
| 30 | /** @addtogroup querypath_util Utilities |
||
| 31 | * Utility classes for QueryPath. |
||
| 32 | * |
||
| 33 | * These classes add important, but less-often used features to QueryPath. Some of |
||
| 34 | * these are used transparently (QueryPathIterator). Others you can use directly in your |
||
| 35 | * code (QueryPathEntities). |
||
| 36 | */ |
||
| 37 | |||
| 38 | /** @namespace QueryPath |
||
| 39 | * The core classes that compose QueryPath. |
||
| 40 | * |
||
| 41 | * The QueryPath classes contain the brunt of the QueryPath code. If you are |
||
| 42 | * interested in working with just the CSS engine, you may want to look at CssEventHandler, |
||
| 43 | * which can be used without the rest of QueryPath. If you are interested in looking |
||
| 44 | * carefully at QueryPath's implementation details, then the QueryPath class is where you |
||
| 45 | * should begin. If you are interested in writing extensions, than you may want to look at |
||
| 46 | * QueryPathExtension, and also at some of the simple extensions, such as QPXML. |
||
| 47 | */ |
||
| 48 | |||
| 49 | /** |
||
| 50 | * Build a new Query Path. |
||
| 51 | * This builds a new Query Path object. The new object can be used for |
||
| 52 | * reading, search, and modifying a document. |
||
| 53 | * |
||
| 54 | * While it is permissible to directly create new instances of a QueryPath |
||
| 55 | * implementation, it is not advised. Instead, you should use this function |
||
| 56 | * as a factory. |
||
| 57 | * |
||
| 58 | * Example: |
||
| 59 | * |
||
| 60 | * @code |
||
| 61 | * <?php |
||
| 62 | * qp(); // New empty QueryPath |
||
| 63 | * qp('path/to/file.xml'); // From a file |
||
| 64 | * qp('<html><head></head><body></body></html>'); // From HTML or XML |
||
| 65 | * qp(QueryPath::XHTML_STUB); // From a basic HTML document. |
||
| 66 | * qp(QueryPath::XHTML_STUB, 'title'); // Create one from a basic HTML doc and position it at the title element. |
||
| 67 | * |
||
| 68 | * // Most of the time, methods are chained directly off of this call. |
||
| 69 | * qp(QueryPath::XHTML_STUB, 'body')->append('<h1>Title</h1>')->addClass('body-class'); |
||
| 70 | * ?> |
||
| 71 | * @endcode |
||
| 72 | * |
||
| 73 | * This function is used internally by QueryPath. Anything that modifies the |
||
| 74 | * behavior of this function may also modify the behavior of common QueryPath |
||
| 75 | * methods. |
||
| 76 | * |
||
| 77 | * <b>Types of documents that QueryPath can support</b> |
||
| 78 | * |
||
| 79 | * qp() can take any of these as its first argument: |
||
| 80 | * |
||
| 81 | * - A string of XML or HTML (See {@link XHTML_STUB}) |
||
| 82 | * - A path on the file system or a URL |
||
| 83 | * - A {@link DOMDocument} object |
||
| 84 | * - A {@link SimpleXMLElement} object. |
||
| 85 | * - A {@link DOMNode} object. |
||
| 86 | * - An array of {@link DOMNode} objects (generally {@link DOMElement} nodes). |
||
| 87 | * - Another {@link QueryPath} object. |
||
| 88 | * |
||
| 89 | * Keep in mind that most features of QueryPath operate on elements. Other |
||
| 90 | * sorts of DOMNodes might not work with all features. |
||
| 91 | * |
||
| 92 | * <b>Supported Options</b> |
||
| 93 | * - context: A stream context object. This is used to pass context info |
||
| 94 | * to the underlying file IO subsystem. |
||
| 95 | * - encoding: A valid character encoding, such as 'utf-8' or 'ISO-8859-1'. |
||
| 96 | * The default is system-dependant, typically UTF-8. Note that this is |
||
| 97 | * only used when creating new documents, not when reading existing content. |
||
| 98 | * (See convert_to_encoding below.) |
||
| 99 | * - parser_flags: An OR-combined set of parser flags. The flags supported |
||
| 100 | * by the DOMDocument PHP class are all supported here. |
||
| 101 | * - omit_xml_declaration: Boolean. If this is TRUE, then certain output |
||
| 102 | * methods (like {@link QueryPath::xml()}) will omit the XML declaration |
||
| 103 | * from the beginning of a document. |
||
| 104 | * - format_output: Boolean. If this is set to TRUE, QueryPath will format |
||
| 105 | * the HTML or XML output to make it more readible. If this is set to |
||
| 106 | * FALSE, QueryPath will minimize whitespace to keep the document smaller |
||
| 107 | * but harder to read. |
||
| 108 | * - replace_entities: Boolean. If this is TRUE, then any of the insertion |
||
| 109 | * functions (before(), append(), etc.) will replace named entities with |
||
| 110 | * their decimal equivalent, and will replace un-escaped ampersands with |
||
| 111 | * a numeric entity equivalent. |
||
| 112 | * - ignore_parser_warnings: Boolean. If this is TRUE, then E_WARNING messages |
||
| 113 | * generated by the XML parser will not cause QueryPath to throw an exception. |
||
| 114 | * This is useful when parsing |
||
| 115 | * badly mangled HTML, or when failure to find files should not result in |
||
| 116 | * an exception. By default, this is FALSE -- that is, parsing warnings and |
||
| 117 | * IO warnings throw exceptions. |
||
| 118 | * - convert_to_encoding: Use the MB library to convert the document to the |
||
| 119 | * named encoding before parsing. This is useful for old HTML (set it to |
||
| 120 | * iso-8859-1 for best results). If this is not supplied, no character set |
||
| 121 | * conversion will be performed. See {@link mb_convert_encoding()}. |
||
| 122 | * (QueryPath 1.3 and later) |
||
| 123 | * - convert_from_encoding: If 'convert_to_encoding' is set, this option can be |
||
| 124 | * used to explicitly define what character set the source document is using. |
||
| 125 | * By default, QueryPath will allow the MB library to guess the encoding. |
||
| 126 | * (QueryPath 1.3 and later) |
||
| 127 | * - strip_low_ascii: If this is set to TRUE then markup will have all low ASCII |
||
| 128 | * characters (<32) stripped out before parsing. This is good in cases where |
||
| 129 | * icky HTML has (illegal) low characters in the document. |
||
| 130 | * - use_parser: If 'xml', Parse the document as XML. If 'html', parse the |
||
| 131 | * document as HTML. Note that the XML parser is very strict, while the |
||
| 132 | * HTML parser is more lenient, but does enforce some of the DTD/Schema. |
||
| 133 | * <i>By default, QueryPath autodetects the type.</i> |
||
| 134 | * - escape_xhtml_js_css_sections: XHTML needs script and css sections to be |
||
| 135 | * escaped. Yet older readers do not handle CDATA sections, and comments do not |
||
| 136 | * work properly (for numerous reasons). By default, QueryPath's *XHTML methods |
||
| 137 | * will wrap a script body with a CDATA declaration inside of C-style comments. |
||
| 138 | * If you want to change this, you can set this option with one of the |
||
| 139 | * JS_CSS_ESCAPE_* constants, or you can write your own. |
||
| 140 | * - QueryPath_class: (ADVANCED) Use this to set the actual classname that |
||
| 141 | * {@link qp()} loads as a QueryPath instance. It is assumed that the |
||
| 142 | * class is either {@link QueryPath} or a subclass thereof. See the test |
||
| 143 | * cases for an example. |
||
| 144 | * |
||
| 145 | * @ingroup querypath_core |
||
| 146 | * @param mixed $document |
||
| 147 | * A document in one of the forms listed above. |
||
| 148 | * @param string $string |
||
| 149 | * A CSS 3 selector. |
||
| 150 | * @param array $options |
||
| 151 | * An associative array of options. Currently supported options are listed above. |
||
| 152 | * @return \QueryPath\DOMQuery |
||
| 153 | * Or possibly another QueryPath-like object if you overrode QueryPath_class. |
||
| 154 | */ |
||
| 155 | function qp($document = NULL, $string = NULL, array $options = []) |
||
| 156 | { |
||
| 157 | return QueryPath::with($document, $string, $options); |
||
|
0 ignored issues
–
show
Bug
introduced
by
Loading history...
|
|||
| 158 | } |
||
| 159 | |||
| 160 | /** |
||
| 161 | * A special-purpose version of {@link qp()} designed specifically for HTML. |
||
| 162 | * |
||
| 163 | * XHTML (if valid) can be easily parsed by {@link qp()} with no problems. However, |
||
| 164 | * because of the way that libxml handles HTML, there are several common steps that |
||
| 165 | * need to be taken to reliably parse non-XML HTML documents. This function is |
||
| 166 | * a convenience tool for configuring QueryPath to parse HTML. |
||
| 167 | * |
||
| 168 | * The following options are automatically set unless overridden: |
||
| 169 | * - ignore_parser_warnings: TRUE |
||
| 170 | * - convert_to_encoding: ISO-8859-1 (the best for the HTML parser). |
||
| 171 | * - convert_from_encoding: auto (autodetect encoding) |
||
| 172 | * - use_parser: html |
||
| 173 | * |
||
| 174 | * Parser warning messages are also suppressed, so if the parser emits a warning, |
||
| 175 | * the application will not be notified. This is equivalent to |
||
| 176 | * calling @code@qp()@endcode. |
||
| 177 | * |
||
| 178 | * Warning: Character set conversions will only work if the Multi-Byte (mb) library |
||
| 179 | * is installed and enabled. This is usually enabled, but not always. |
||
| 180 | * |
||
| 181 | * @ingroup querypath_core |
||
| 182 | * @see qp() |
||
| 183 | * @param null $document |
||
|
0 ignored issues
–
show
|
|||
| 184 | * @param null $selector |
||
|
0 ignored issues
–
show
|
|||
| 185 | * @param array $options |
||
| 186 | * @return mixed|\QueryPath\DOMQuery |
||
| 187 | */ |
||
| 188 | function htmlqp($document = NULL, $selector = NULL, $options = []) |
||
| 189 | { |
||
| 190 | |||
| 191 | return QueryPath::withHTML($document, $selector, $options); |
||
| 192 | } |
||
| 193 | |||
| 194 | /** |
||
| 195 | * Parse HTML5 documents. |
||
| 196 | * |
||
| 197 | * This uses HTML5-PHP to parse the document. In actuality, this parser does |
||
| 198 | * a fine job with pre-HTML5 documents in most cases, though really old HTML |
||
| 199 | * (like 2.0) may have some substantial quirks. |
||
| 200 | * |
||
| 201 | * <b>Supported Options</b> |
||
| 202 | * Any options supported by HTML5-PHP are allowed here. Additionally, the |
||
| 203 | * following options have meaning to QueryPath. |
||
| 204 | * - QueryPath_class |
||
| 205 | * |
||
| 206 | * |
||
| 207 | * @param null $document |
||
|
0 ignored issues
–
show
|
|||
| 208 | * @param string $selector |
||
| 209 | * A CSS3 selector. |
||
| 210 | * |
||
| 211 | * @param array $options |
||
| 212 | * An associative array of options, which is passed on into HTML5-PHP. Note |
||
| 213 | * that the standard QueryPath options may be ignored for this function, |
||
| 214 | * since it uses a different parser. |
||
| 215 | * |
||
| 216 | * @return QueryPath |
||
| 217 | */ |
||
| 218 | function html5qp($document = NULL, $selector = NULL, array $options = []) |
||
| 219 | { |
||
| 220 | return QueryPath::withHTML5($document, $selector, $options); |
||
| 221 | } |
||
| 222 |