arthurkushman /
querypath
| 1 | <?php |
||||
| 2 | /** |
||||
| 3 | * Example of grabbing and parsing Linked Data from DBPedia. |
||||
| 4 | * |
||||
| 5 | * This example illustrates how QueryPath can be used to do the following: |
||||
| 6 | * |
||||
| 7 | * - Make a robust HTTP connection to a remote server to fetch data. |
||||
| 8 | * - Using context to control the underlying stream. |
||||
| 9 | * - Working with Linked Data. |
||||
| 10 | * - Work with XML Namespaces in documents. |
||||
| 11 | * * Using namespaces to access elements in selectors |
||||
| 12 | * * Using namespaces to access attributes in selectors |
||||
| 13 | * * Using namespaces to access attributes in XML methods. |
||||
| 14 | * |
||||
| 15 | * The code here connects to the DBPedia server and looks up the Linked |
||||
| 16 | * Data stored there for a particular Wikipedia entry (any Wikipedia |
||||
| 17 | * wiki name should work here). |
||||
| 18 | * |
||||
| 19 | * |
||||
| 20 | * @author M Butcher <[email protected]> |
||||
| 21 | * @license LGPL The GNU Lesser GPL (LGPL) or an MIT-like license. |
||||
| 22 | * @see http://www.w3.org/DesignIssues/LinkedData.html |
||||
| 23 | * @see http://dbpedia.org |
||||
| 24 | * @see sparql.php |
||||
| 25 | * @see musicbrainz.php |
||||
| 26 | */ |
||||
| 27 | |||||
| 28 | require_once '../src/QueryPath/QueryPath.php'; |
||||
| 29 | |||||
| 30 | // The URL to look up (any of these works): |
||||
| 31 | $url = 'http://dbpedia.org/data/The_Beatles.rdf'; |
||||
| 32 | //$url = 'http://dbpedia.org/data/Swansea.rdf'; |
||||
| 33 | //$url = 'http://dbpedia.org/data/The_Lord_of_the_Rings.rdf'; |
||||
| 34 | // HTTP headers: |
||||
| 35 | $headers = array( |
||||
| 36 | 'Accept: application/rdf,application/rdf+xml;q=0.9,*/*;q=0.8', |
||||
| 37 | 'Accept-Language: en-us,en', |
||||
| 38 | 'Accept-Charset: ISO-8859-1,utf-8', |
||||
| 39 | 'User-Agent: QueryPath/1.2', |
||||
| 40 | ); |
||||
| 41 | |||||
| 42 | // The context options: |
||||
| 43 | $options = array( |
||||
| 44 | 'http' => array( |
||||
| 45 | 'method' => 'GET', |
||||
| 46 | 'protocol_version' => 1.1, |
||||
| 47 | 'header' => implode("\r\n", $headers), |
||||
| 48 | ), |
||||
| 49 | ); |
||||
| 50 | |||||
| 51 | // Create a stream context that will tell QueryPath how to |
||||
| 52 | // load the file. |
||||
| 53 | $cxt = stream_context_create($options); |
||||
| 54 | |||||
| 55 | // Fetch the URL and select all rdf:Description elements. |
||||
| 56 | // (Note that | is the CSS 3 equiv of colons for namespacing.) |
||||
| 57 | // To add the context, we pass it in as an option to QueryPath. |
||||
| 58 | $qp = qp($url, 'rdf|Description', array('context' => $cxt)); |
||||
| 59 | //$qp = qp('The_Beatles.rdf'); |
||||
| 60 | |||||
| 61 | printf("There are %d descriptions in this record.\n", $qp->size()); |
||||
| 62 | |||||
| 63 | // Here, we use rdf|* to select all elements in the RDF namespace. |
||||
| 64 | $qp->top()->find('rdf|*'); |
||||
| 65 | printf("There are %d RDF items in this record.\n", $qp->size()); |
||||
| 66 | |||||
| 67 | // Standard pseudo-classes that are not HTML specific can be used on |
||||
| 68 | // namespaced elements, too. |
||||
| 69 | print "About: " . $qp->top()->find('rdfs|label:first')->text() . PHP_EOL; |
||||
|
0 ignored issues
–
show
Bug
introduced
by
Loading history...
|
|||||
| 70 | print "About (FOAF): " . $qp->top()->find('foaf|name:first')->text() . PHP_EOL; |
||||
|
0 ignored issues
–
show
Are you sure
$qp->top()->find('foaf|name:first')->text() of type QueryPath\DOMQuery|string can be used in concatenation?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||
| 71 | |||||
| 72 | // Namespaced attributes can be retrieved using the same sort of delimiting. |
||||
| 73 | print "\nComment:\n"; |
||||
| 74 | print $qp->top()->find('rdfs|comment[xml|lang="en"]')->text(); |
||||
|
0 ignored issues
–
show
Are you sure
$qp->top()->find('rdfs|c...ml|lang="en"]')->text() of type QueryPath\DOMQuery|string can be used in print()?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||
| 75 | print PHP_EOL; |
||||
| 76 | |||||
| 77 | $qp->top(); |
||||
| 78 | |||||
| 79 | print "\nImages:\n"; |
||||
| 80 | foreach ($qp->branch()->find('foaf|img') as $img) { |
||||
| 81 | // Note that when we use attr() we are using the XML name, NOT |
||||
| 82 | // the CSS 3 name. So it is rdf:resource, not rdf|resource. |
||||
| 83 | // The same goes for the tag() function -- it will return |
||||
| 84 | // the full element name (e.g. rdf:Description). |
||||
| 85 | print $img->attr('rdf:resource') . PHP_EOL; |
||||
| 86 | } |
||||
| 87 | |||||
| 88 | print "\nImages Galleries:\n"; |
||||
| 89 | foreach ($qp->branch()->find('dbpprop|hasPhotoCollection') as $img) { |
||||
| 90 | print $img->attr('rdf:resource') . PHP_EOL; |
||||
| 91 | } |
||||
| 92 | |||||
| 93 | print "\nOther Sites:\n"; |
||||
| 94 | foreach ($qp->branch()->find('foaf|page') as $img) { |
||||
| 95 | print $img->attr('rdf:resource') . PHP_EOL; |
||||
| 96 | } |
||||
| 97 | |||||
| 98 | //$qp->writeXML(); |