|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
namespace Onoi\Remi\Ncbi; |
|
4
|
|
|
|
|
5
|
|
|
use Onoi\Remi\FilteredRecord; |
|
6
|
|
|
use DOMDocument; |
|
7
|
|
|
|
|
8
|
|
|
/** |
|
9
|
|
|
* @license GNU GPL v2+ |
|
10
|
|
|
* @since 0.1 |
|
11
|
|
|
* |
|
12
|
|
|
* @author mwjames |
|
13
|
|
|
*/ |
|
14
|
|
|
class NcbiEntrezAbstractXMLProcessor { |
|
15
|
|
|
|
|
16
|
|
|
/** |
|
17
|
|
|
* @var FilteredRecord |
|
18
|
|
|
*/ |
|
19
|
|
|
private $filteredRecord; |
|
20
|
|
|
|
|
21
|
|
|
/** |
|
22
|
|
|
* @since 0.1 |
|
23
|
|
|
* |
|
24
|
|
|
* @param FilteredRecord $filteredRecord |
|
25
|
|
|
*/ |
|
26
|
4 |
|
public function __construct( FilteredRecord $filteredRecord ) { |
|
27
|
4 |
|
$this->filteredRecord = $filteredRecord; |
|
28
|
4 |
|
} |
|
29
|
|
|
|
|
30
|
|
|
/** |
|
31
|
|
|
* @since 0.1 |
|
32
|
|
|
* |
|
33
|
|
|
* @param string $xml |
|
34
|
|
|
*/ |
|
35
|
4 |
|
public function doProcess( $xml ) { |
|
36
|
|
|
|
|
37
|
4 |
|
$domDocument = new DOMDocument(); |
|
38
|
4 |
|
$domDocument->loadXml( $xml ); |
|
39
|
|
|
|
|
40
|
4 |
|
$this->findElementsForPubMed( $domDocument ); |
|
41
|
4 |
|
$this->findElementsForPubMedCentral( $domDocument ); |
|
42
|
4 |
|
} |
|
43
|
|
|
|
|
44
|
4 |
|
private function findElementsForPubMed( DOMDocument $domDocument ) { |
|
45
|
|
|
|
|
46
|
4 |
|
foreach ( $domDocument->getElementsByTagName( 'PubDate' ) as $item ) { |
|
47
|
1 |
|
foreach ( $item->getElementsByTagName( 'Year' ) as $i ) { |
|
48
|
1 |
|
$this->filteredRecord->set( 'year', $i->nodeValue ); |
|
49
|
1 |
|
} |
|
50
|
4 |
|
} |
|
51
|
|
|
|
|
52
|
4 |
|
foreach ( $domDocument->getElementsByTagName( 'Abstract' ) as $item ) { |
|
53
|
1 |
|
$this->filteredRecord->set( 'abstract', preg_replace( '#\s{2,}#', ' ', trim( $item->nodeValue ) ) ); |
|
54
|
4 |
|
} |
|
55
|
|
|
|
|
56
|
4 |
|
foreach ( $domDocument->getElementsByTagName( 'MeshHeading' ) as $item ) { |
|
57
|
1 |
|
$this->filteredRecord->append( 'subject', preg_replace( '#\s{2,}#', ' ', trim( $item->nodeValue ) ) ); |
|
58
|
4 |
|
} |
|
59
|
|
|
|
|
60
|
|
|
// http://www.library.illinois.edu/biotech/j-abbrev.html notes: |
|
61
|
|
|
// "... database uses the "standard abbreviation", as defined by ISSN |
|
62
|
|
|
// and used also by BIOSIS and CASSI ..." |
|
63
|
4 |
|
foreach ( $domDocument->getElementsByTagName( 'ISOAbbreviation' ) as $item ) { |
|
64
|
1 |
|
$this->filteredRecord->append( 'iso-abbreviation', trim( $item->nodeValue ) ); |
|
65
|
4 |
|
} |
|
66
|
4 |
|
} |
|
67
|
|
|
|
|
68
|
4 |
|
private function findElementsForPubMedCentral( DOMDocument $domDocument ) { |
|
69
|
|
|
|
|
70
|
4 |
|
foreach ( $domDocument->getElementsByTagName( 'pub-date' ) as $item ) { |
|
71
|
3 |
|
foreach ( $item->getElementsByTagName( 'year' ) as $i ) { |
|
72
|
3 |
|
$this->filteredRecord->set( 'year', $i->nodeValue ); |
|
73
|
3 |
|
} |
|
74
|
4 |
|
} |
|
75
|
|
|
|
|
76
|
4 |
|
foreach ( $domDocument->getElementsByTagName( 'abstract' ) as $item ) { |
|
77
|
2 |
|
$this->filteredRecord->set( 'abstract', preg_replace( '#\s{2,}#', ' ', trim( $item->nodeValue ) ) ); |
|
78
|
4 |
|
} |
|
79
|
|
|
|
|
80
|
4 |
|
foreach ( $domDocument->getElementsByTagName( 'article' ) as $item ) { |
|
81
|
3 |
|
$this->filteredRecord->set( 'type', $item->getAttribute( 'article-type' ) ); |
|
82
|
4 |
|
} |
|
83
|
4 |
|
} |
|
84
|
|
|
|
|
85
|
|
|
} |
|
86
|
|
|
|