Passed
Push — master ( bd2115...5437ab )
by Xavier
01:30
created

EutilsEfetch::getEmailsFromAffiliations()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 0
Metric Value
dl 0
loc 3
rs 10
c 0
b 0
f 0
cc 1
nc 1
nop 1
ccs 0
cts 0
cp 0
crap 2
1
<?php
2
3
namespace PubPeerFoundation\PublicationDataExtractor\Resources\Extractors;
4
5
use Tightenco\Collect\Support\Arr;
6
7
class EutilsEfetch extends Extractor implements ProvidesPublicationData, ProvidesIdentifiersData, ProvidesAuthorsData, ProvidesJournalData, ProvidesUpdatesData
8
{
9
    protected $updateTypes = [
10
        'ErratumIn' => 'Erratum',
11
        'RetractionIn' => 'Retraction',
12
        'ExpressionOfConcernIn' => 'Expression of concern',
13
    ];
14
15
    /**
16 8
     * Create search tree.
17
     */
18 8
    protected function fillSearchTree(): void
19 8
    {
20 8
        $this->searchTree = $this->document->{'PubmedArticle'};
21
    }
22 4
23
    /**
24 4
     * Extract and format data needed for the Publication Model.
25
     */
26 4
    public function extractPublicationData(): void
27 4
    {
28 4
        $this->resourceOutput['publication'] = [
29 4
            'title' => get_string($this->searchTree, 'MedlineCitation.Article.ArticleTitle'),
30
            'url' => (string) 'http://www.ncbi.nlm.nih.gov/pubmed/'.get_string($this->searchTree, 'MedlineCitation.PMID'),
31 4
            'published_at' => date_from_pub_date(data_get($this->searchTree, 'MedlineCitation.Article.Journal.JournalIssue.PubDate')),
32
            'abstract' => get_string($this->searchTree, 'MedlineCitation.Article.Abstract.AbstractText'),
33
        ];
34 4
    }
35
36 4
    /**
37 4
     * Extract and format data needed for the Identifiers Relationship
38
     * on the Publication Model.
39
     */
40
    public function extractIdentifiersData(): void
41
    {
42 4
        foreach ($this->searchTree->PubmedData->ArticleIdList->ArticleId as $identifier) {
43
            $this->resourceOutput['identifiers'][] = [
44 4
                'value' => (string) $identifier,
45 4
                'type' => (string) $identifier['IdType'],
46 4
            ];
47 4
        }
48 4
        if ($value = get_string($this->searchTree, 'MedlineCitation.Article.Journal.ISSN')) {
49
            $this->resourceOutput['identifiers'][] = [
50 4
                'value' => $value,
51
                'type' => 'issn',
52
            ];
53
        }
54
    }
55
56 4
    /**
57
     * Extract and format data needed for the Journals Relationship
58 4
     * on the Publication Model.
59 4
     */
60 4
    public function extractJournalData(): void
61 4
    {
62
        $this->resourceOutput['journal'] = [
63
            'title' => get_string($this->searchTree, 'MedlineCitation.Article.Journal.Title'),
64
            'issn' => $this->getIssns(),
65 4
        ];
66 4
    }
67 4
68 4
    /**
69
     * Extract and format data needed for the Authors Relationship
70
     * on the Publication Model.
71 4
     */
72
    public function extractAuthorsData(): void
73
    {
74
        try {
75
            $this->loopOverAuthors();
76
        } catch (\Exception $e) {
77 4
            // Empty catch block, don't want anything to happen in case of exception.
78
        }
79 4
    }
80
81 4
    public function extractUpdatesData(): void
82 4
    {
83
        foreach (get_array($this->searchTree, 'MedlineCitation.CommentsCorrectionsList.CommentsCorrections') as $correction) {
84
            if (in_array(stringify($correction['RefType']), array_keys($this->updateTypes))) {
85 4
                $this->getUpdateFromCorrection($correction);
86 4
            }
87
        }
88
    }
89 4
90 4
    /**
91 4
     * @param $correction
92
     */
93 4
    protected function getUpdateFromCorrection($correction): void
94
    {
95
        try {
96
            $this->resourceOutput['updates'][] = [
97
                'timestamp' => $this->getUpdateTimestamp(stringify($correction->RefSource)),
98
                'identifier' => [
99 8
                    'pubmed' => get_string($correction, 'PMID'),
100
                ],
101
                'type' => $this->getReadableUpdateType(stringify($correction['RefType'])),
102 8
            ];
103 7
        } catch (\Exception $e) {
104 7
            // Don't stop in case of unreadable date format
105 7
        }
106 7
    }
107
108
    protected function getReadableUpdateType($refType)
109 1
    {
110
        return $this->updateTypes[$refType];
111 8
    }
112
113
    /**
114
     * Get all available ISSNs values from the tree.
115
     *
116
     * @return array
117
     */
118
    protected function getIssns()
119
    {
120
        $issn = [];
121
122
        if ($number = get_string($this->searchTree, 'MedlineCitation.Article.Journal.ISSN')) {
123
            $issn[] = $number;
124
        }
125
126
        if ($number = get_string($this->searchTree, 'MedlineCitation.MedlineJournalInfo.ISSNLinking')) {
127
            $issn[] = $number;
128
        }
129
130
        return $issn;
131
    }
132
133
    /**
134
     * Loop over authors array.
135
     */
136
    protected function loopOverAuthors(): void
137
    {
138
        foreach ($this->searchTree->MedlineCitation->Article->AuthorList->Author as $author) {
139
            $this->createAuthorEntry($author);
140
        }
141
    }
142
143
    /**
144
     * Create an author entry in output.
145
     *
146
     * @param $author
147
     */
148
    protected function createAuthorEntry($author): void
149
    {
150
        if (! empty($lastName = get_string($author, 'LastName'))) {
151
            $affiliations = $this->loopOverAffiliations($author);
152
153
            $this->resourceOutput['authors'][] = [
154
                'first_name' => get_string($author, 'ForeName'),
155
                'last_name' => $lastName,
156
                'email' => $this->getEmailsFromAffiliations($affiliations),
157
                'affiliation' => $affiliations,
158
            ];
159
        }
160
    }
161
162
    /**
163
     * Loop over affiliations.
164
     *
165
     * @param  array $author
166
     * @return array
167
     */
168
    protected function loopOverAffiliations($author): array
169
    {
170
        $affiliations = [];
171
        foreach ($author->AffiliationInfo as $affiliation) {
172
            $affiliations[]['name'] = get_string($affiliation, 'Affiliation');
173
        }
174
175
        return $affiliations;
176
    }
177
178
    /**
179
     * Get emails from affiliations array.
180
     *
181
     * @param  array  $affiliations
182
     * @return string
183
     */
184
    protected function getEmailsFromAffiliations($affiliations): string
185
    {
186
        return get_string(find_emails_in_array(Arr::pluck($affiliations, 'name')), 0);
187
    }
188
189
    protected function getUpdateTimestamp($refSource)
190
    {
191
        preg_match('/\s(\d{4}\s\w{3}(\s\d{1,2})?);/', $refSource, $matches);
192
193
        return date_from_human_readable($matches[1])->timestamp;
194
    }
195
}
196