1 | <?php |
||
2 | |||
3 | namespace SCI\DataValues; |
||
4 | |||
5 | /** |
||
6 | * @license GNU GPL v2+ |
||
7 | * @since 1.0 |
||
8 | * |
||
9 | * @author mwjames |
||
10 | */ |
||
11 | class ResourceIdentifierStringValueParser { |
||
12 | |||
13 | /** |
||
14 | * @var string |
||
15 | */ |
||
16 | private $typeid; |
||
17 | |||
18 | /** |
||
19 | * @param string $typeid |
||
20 | */ |
||
21 | 52 | public function __construct( $typeid ) { |
|
22 | 52 | $this->typeid = $typeid; |
|
23 | 52 | } |
|
24 | |||
25 | /** |
||
26 | * @since 1.0 |
||
27 | * |
||
28 | * @return true |
||
29 | */ |
||
30 | 40 | public function parse( &$value ) { |
|
31 | |||
32 | // Remove a possible url-prefix |
||
33 | |||
34 | 40 | if ( $this->typeid === '_sci_doi' ) { |
|
35 | // Remove anything before 10 as in http://dx.doi.org/10.1000/123 |
||
36 | 14 | $value = substr( $value, strcspn( $value, '10' ) ); |
|
37 | 27 | } elseif ( strrpos( $value, '://' ) !== false ) { |
|
38 | // Remove anything before the last / as in http://foo/bar123 |
||
39 | 4 | $value = substr( $value, strrpos( $value, '/' ) + 1 ); |
|
40 | } |
||
41 | |||
42 | // Remove other possible prefixes |
||
43 | |||
44 | // http://www.doi.org/doi_handbook/2_Numbering.html#2.4 |
||
45 | // All DOI names are converted to upper case upon registration |
||
46 | // but since it system is case insensitive, we store it as lower |
||
47 | // case as it "looks" better |
||
48 | 40 | if ( $this->typeid === '_sci_doi' ) { |
|
49 | 14 | $value = strtolower( $value ); |
|
50 | } else { |
||
51 | 27 | $value = str_replace( [ 'VIAF', 'OCLC', 'PMID' ], '', strtoupper( $value ) ); |
|
52 | } |
||
53 | |||
54 | 40 | return $this->canMatchValueToPattern( $value ); |
|
0 ignored issues
–
show
Bug
Best Practice
introduced
by
![]() |
|||
55 | } |
||
56 | |||
57 | /** |
||
58 | * @return string |
||
59 | */ |
||
60 | 1 | public function getCanonicalName() { |
|
61 | |||
62 | 1 | switch ( $this->typeid ) { |
|
63 | 1 | case '_sci_viaf': |
|
64 | return 'VIAF'; |
||
65 | 1 | case '_sci_oclc': |
|
66 | return 'OLCL'; |
||
67 | 1 | case '_sci_pmid': |
|
68 | return 'PMID'; |
||
69 | 1 | case '_sci_pmcid': |
|
70 | return 'PMCID'; |
||
71 | 1 | case '_sci_olid': |
|
72 | return 'OLID'; |
||
73 | 1 | case '_sci_doi': |
|
74 | 1 | return 'DOI'; |
|
75 | } |
||
76 | |||
77 | return null; |
||
78 | } |
||
79 | |||
80 | /** |
||
81 | * @return string |
||
82 | */ |
||
83 | 7 | public function getResourceTargetUri() { |
|
84 | |||
85 | 7 | switch ( $this->typeid ) { |
|
86 | 7 | case '_sci_viaf': |
|
87 | // http://www.oclc.org/research/activities/viaf.html |
||
88 | // http://id.loc.gov/vocabulary/identifiers/viaf.html |
||
89 | 1 | return "https://viaf.org/viaf/"; |
|
90 | 6 | case '_sci_oclc': |
|
91 | // http://www.oclc.org/support/documentation/glossary/oclc.en.html#OCLCControlNumber |
||
92 | 1 | return "https://www.worldcat.org/oclc/"; |
|
93 | 5 | case '_sci_pmcid': |
|
94 | // https://www.nlm.nih.gov/pubs/techbull/nd09/nd09_pmc_urls.html |
||
95 | 1 | return "https://www.ncbi.nlm.nih.gov/pmc/"; |
|
96 | 4 | case '_sci_pmid': |
|
97 | 1 | return "https://www.ncbi.nlm.nih.gov/pubmed/"; |
|
98 | 3 | case '_sci_olid': |
|
99 | 1 | return "https://openlibrary.org/books/"; |
|
100 | 2 | case '_sci_doi': |
|
101 | // https://en.wikipedia.org/wiki/Digital_object_identifier |
||
102 | 2 | return "https://doi.org/"; |
|
103 | } |
||
104 | } |
||
105 | |||
106 | 40 | private function canMatchValueToPattern( &$value ) { |
|
107 | |||
108 | 40 | switch ( $this->typeid ) { |
|
109 | 40 | case '_sci_viaf': |
|
110 | 35 | case '_sci_oclc': |
|
111 | 29 | case '_sci_pmid': |
|
112 | 16 | return preg_match( "/^[0-9]*$/", $value ); |
|
113 | 25 | case '_sci_pmcid': |
|
114 | 6 | return preg_match( "/PMC[\d]+/", $value ); |
|
115 | 20 | case '_sci_olid': |
|
116 | 7 | return preg_match( "/OL[A-Z0-9]+/", $value ); |
|
117 | 14 | case '_sci_doi': |
|
118 | // http://stackoverflow.com/questions/27910/finding-a-doi-in-a-document-or-page# |
||
119 | 14 | return preg_match( "/\b(10[.][0-9]{4,}(?:[.][0-9]+)*\/(?:(?![\"&\'])\S)+)\b/", $value ); |
|
120 | } |
||
121 | |||
122 | return false; |
||
123 | } |
||
124 | |||
125 | } |
||
126 |