1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace Onoi\Remi\Ncbi; |
4
|
|
|
|
5
|
|
|
use Onoi\Remi\FilteredRecord; |
6
|
|
|
use DOMDocument; |
7
|
|
|
|
8
|
|
|
/** |
9
|
|
|
* @license GNU GPL v2+ |
10
|
|
|
* @since 0.1 |
11
|
|
|
* |
12
|
|
|
* @author mwjames |
13
|
|
|
*/ |
14
|
|
|
class NcbiEntrezAbstractXMLProcessor { |
15
|
|
|
|
16
|
|
|
/** |
17
|
|
|
* @var FilteredRecord |
18
|
|
|
*/ |
19
|
|
|
private $filteredRecord; |
20
|
|
|
|
21
|
|
|
/** |
22
|
|
|
* @since 0.1 |
23
|
|
|
* |
24
|
|
|
* @param FilteredRecord $filteredRecord |
25
|
|
|
*/ |
26
|
4 |
|
public function __construct( FilteredRecord $filteredRecord ) { |
27
|
4 |
|
$this->filteredRecord = $filteredRecord; |
28
|
4 |
|
} |
29
|
|
|
|
30
|
|
|
/** |
31
|
|
|
* @since 0.1 |
32
|
|
|
* |
33
|
|
|
* @param string $xml |
34
|
|
|
*/ |
35
|
4 |
|
public function doProcess( $xml ) { |
36
|
|
|
|
37
|
4 |
|
$domDocument = new DOMDocument(); |
38
|
4 |
|
$domDocument->loadXml( $xml ); |
39
|
|
|
|
40
|
4 |
|
$this->findElementsForPubMed( $domDocument ); |
41
|
4 |
|
$this->findElementsForPubMedCentral( $domDocument ); |
42
|
4 |
|
} |
43
|
|
|
|
44
|
4 |
|
private function findElementsForPubMed( DOMDocument $domDocument ) { |
45
|
|
|
|
46
|
4 |
|
foreach ( $domDocument->getElementsByTagName( 'PubDate' ) as $item ) { |
47
|
1 |
|
foreach ( $item->getElementsByTagName( 'Year' ) as $i ) { |
48
|
1 |
|
$this->filteredRecord->set( 'year', $i->nodeValue ); |
49
|
1 |
|
} |
50
|
4 |
|
} |
51
|
|
|
|
52
|
4 |
|
foreach ( $domDocument->getElementsByTagName( 'Abstract' ) as $item ) { |
53
|
1 |
|
$this->filteredRecord->set( 'abstract', preg_replace( '#\s{2,}#', ' ', trim( $item->nodeValue ) ) ); |
54
|
4 |
|
} |
55
|
|
|
|
56
|
4 |
|
foreach ( $domDocument->getElementsByTagName( 'MeshHeading' ) as $item ) { |
57
|
1 |
|
$this->filteredRecord->append( 'subject', preg_replace( '#\s{2,}#', ' ', trim( $item->nodeValue ) ) ); |
58
|
4 |
|
} |
59
|
|
|
|
60
|
|
|
// http://www.library.illinois.edu/biotech/j-abbrev.html notes: |
61
|
|
|
// "... database uses the "standard abbreviation", as defined by ISSN |
62
|
|
|
// and used also by BIOSIS and CASSI ..." |
63
|
4 |
|
foreach ( $domDocument->getElementsByTagName( 'ISOAbbreviation' ) as $item ) { |
64
|
1 |
|
$this->filteredRecord->append( 'iso-abbreviation', trim( $item->nodeValue ) ); |
65
|
4 |
|
} |
66
|
4 |
|
} |
67
|
|
|
|
68
|
4 |
|
private function findElementsForPubMedCentral( DOMDocument $domDocument ) { |
69
|
|
|
|
70
|
4 |
|
foreach ( $domDocument->getElementsByTagName( 'pub-date' ) as $item ) { |
71
|
3 |
|
foreach ( $item->getElementsByTagName( 'year' ) as $i ) { |
72
|
3 |
|
$this->filteredRecord->set( 'year', $i->nodeValue ); |
73
|
3 |
|
} |
74
|
4 |
|
} |
75
|
|
|
|
76
|
4 |
|
foreach ( $domDocument->getElementsByTagName( 'abstract' ) as $item ) { |
77
|
2 |
|
$this->filteredRecord->set( 'abstract', preg_replace( '#\s{2,}#', ' ', trim( $item->nodeValue ) ) ); |
78
|
4 |
|
} |
79
|
|
|
|
80
|
4 |
|
foreach ( $domDocument->getElementsByTagName( 'article' ) as $item ) { |
81
|
3 |
|
$this->filteredRecord->set( 'type', $item->getAttribute( 'article-type' ) ); |
82
|
4 |
|
} |
83
|
4 |
|
} |
84
|
|
|
|
85
|
|
|
} |
86
|
|
|
|