SemanticMediaWiki /
SemanticCite
| 1 | <?php |
||
| 2 | |||
| 3 | namespace SCI\DataValues; |
||
| 4 | |||
| 5 | /** |
||
| 6 | * @license GNU GPL v2+ |
||
| 7 | * @since 1.0 |
||
| 8 | * |
||
| 9 | * @author mwjames |
||
| 10 | */ |
||
| 11 | class ResourceIdentifierStringValueParser { |
||
| 12 | |||
| 13 | /** |
||
| 14 | * @var string |
||
| 15 | */ |
||
| 16 | private $typeid; |
||
| 17 | |||
| 18 | /** |
||
| 19 | * @param string $typeid |
||
| 20 | */ |
||
| 21 | 52 | public function __construct( $typeid ) { |
|
| 22 | 52 | $this->typeid = $typeid; |
|
| 23 | 52 | } |
|
| 24 | |||
| 25 | /** |
||
| 26 | * @since 1.0 |
||
| 27 | * |
||
| 28 | * @return true |
||
| 29 | */ |
||
| 30 | 40 | public function parse( &$value ) { |
|
| 31 | |||
| 32 | // Remove a possible url-prefix |
||
| 33 | |||
| 34 | 40 | if ( $this->typeid === '_sci_doi' ) { |
|
| 35 | // Remove anything before 10 as in http://dx.doi.org/10.1000/123 |
||
| 36 | 14 | $value = substr( $value, strcspn( $value, '10' ) ); |
|
| 37 | 27 | } elseif ( strrpos( $value, '://' ) !== false ) { |
|
| 38 | // Remove anything before the last / as in http://foo/bar123 |
||
| 39 | 4 | $value = substr( $value, strrpos( $value, '/' ) + 1 ); |
|
| 40 | } |
||
| 41 | |||
| 42 | // Remove other possible prefixes |
||
| 43 | |||
| 44 | // http://www.doi.org/doi_handbook/2_Numbering.html#2.4 |
||
| 45 | // All DOI names are converted to upper case upon registration |
||
| 46 | // but since it system is case insensitive, we store it as lower |
||
| 47 | // case as it "looks" better |
||
| 48 | 40 | if ( $this->typeid === '_sci_doi' ) { |
|
| 49 | 14 | $value = strtolower( $value ); |
|
| 50 | } else { |
||
| 51 | 27 | $value = str_replace( [ 'VIAF', 'OCLC', 'PMID' ], '', strtoupper( $value ) ); |
|
| 52 | } |
||
| 53 | |||
| 54 | 40 | return $this->canMatchValueToPattern( $value ); |
|
|
0 ignored issues
–
show
Bug
Best Practice
introduced
by
Loading history...
|
|||
| 55 | } |
||
| 56 | |||
| 57 | /** |
||
| 58 | * @return string |
||
| 59 | */ |
||
| 60 | 1 | public function getCanonicalName() { |
|
| 61 | |||
| 62 | 1 | switch ( $this->typeid ) { |
|
| 63 | 1 | case '_sci_viaf': |
|
| 64 | return 'VIAF'; |
||
| 65 | 1 | case '_sci_oclc': |
|
| 66 | return 'OLCL'; |
||
| 67 | 1 | case '_sci_pmid': |
|
| 68 | return 'PMID'; |
||
| 69 | 1 | case '_sci_pmcid': |
|
| 70 | return 'PMCID'; |
||
| 71 | 1 | case '_sci_olid': |
|
| 72 | return 'OLID'; |
||
| 73 | 1 | case '_sci_doi': |
|
| 74 | 1 | return 'DOI'; |
|
| 75 | } |
||
| 76 | |||
| 77 | return null; |
||
| 78 | } |
||
| 79 | |||
| 80 | /** |
||
| 81 | * @return string |
||
| 82 | */ |
||
| 83 | 7 | public function getResourceTargetUri() { |
|
| 84 | |||
| 85 | 7 | switch ( $this->typeid ) { |
|
| 86 | 7 | case '_sci_viaf': |
|
| 87 | // http://www.oclc.org/research/activities/viaf.html |
||
| 88 | // http://id.loc.gov/vocabulary/identifiers/viaf.html |
||
| 89 | 1 | return "https://viaf.org/viaf/"; |
|
| 90 | 6 | case '_sci_oclc': |
|
| 91 | // http://www.oclc.org/support/documentation/glossary/oclc.en.html#OCLCControlNumber |
||
| 92 | 1 | return "https://www.worldcat.org/oclc/"; |
|
| 93 | 5 | case '_sci_pmcid': |
|
| 94 | // https://www.nlm.nih.gov/pubs/techbull/nd09/nd09_pmc_urls.html |
||
| 95 | 1 | return "https://www.ncbi.nlm.nih.gov/pmc/"; |
|
| 96 | 4 | case '_sci_pmid': |
|
| 97 | 1 | return "https://www.ncbi.nlm.nih.gov/pubmed/"; |
|
| 98 | 3 | case '_sci_olid': |
|
| 99 | 1 | return "https://openlibrary.org/books/"; |
|
| 100 | 2 | case '_sci_doi': |
|
| 101 | // https://en.wikipedia.org/wiki/Digital_object_identifier |
||
| 102 | 2 | return "https://doi.org/"; |
|
| 103 | } |
||
| 104 | } |
||
| 105 | |||
| 106 | 40 | private function canMatchValueToPattern( &$value ) { |
|
| 107 | |||
| 108 | 40 | switch ( $this->typeid ) { |
|
| 109 | 40 | case '_sci_viaf': |
|
| 110 | 35 | case '_sci_oclc': |
|
| 111 | 29 | case '_sci_pmid': |
|
| 112 | 16 | return preg_match( "/^[0-9]*$/", $value ); |
|
| 113 | 25 | case '_sci_pmcid': |
|
| 114 | 6 | return preg_match( "/PMC[\d]+/", $value ); |
|
| 115 | 20 | case '_sci_olid': |
|
| 116 | 7 | return preg_match( "/OL[A-Z0-9]+/", $value ); |
|
| 117 | 14 | case '_sci_doi': |
|
| 118 | // http://stackoverflow.com/questions/27910/finding-a-doi-in-a-document-or-page# |
||
| 119 | 14 | return preg_match( "/\b(10[.][0-9]{4,}(?:[.][0-9]+)*\/(?:(?![\"&\'])\S)+)\b/", $value ); |
|
| 120 | } |
||
| 121 | |||
| 122 | return false; |
||
| 123 | } |
||
| 124 | |||
| 125 | } |
||
| 126 |