Passed
Pull Request — master (#195)
by
unknown
17:55
created

RisReader::parseFile()   C

Complexity

Conditions 15
Paths 6

Size

Total Lines 63
Code Lines 34

Duplication

Lines 0
Ratio 0 %

Importance

Changes 3
Bugs 0 Features 0
Metric Value
cc 15
eloc 34
nc 6
nop 2
dl 0
loc 63
rs 5.9166
c 3
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
namespace EWW\Dpf\Services\ImportExternalMetadata;
3
4
/*
5
 * This file is part of the TYPO3 CMS project.
6
 *
7
 * It is free software; you can redistribute it and/or modify it under
8
 * the terms of the GNU General Public License, either version 2
9
 * of the License, or any later version.
10
 *
11
 * For the full copyright and license information, please read the
12
 * LICENSE.txt file that was distributed with this source code.
13
 *
14
 * The TYPO3 project - inspiring people to share!
15
 */
16
17
use Symfony\Component\Serializer\Encoder\XmlEncoder;
18
19
20
/**
21
 * Class RisReader
22
 *
23
 * A reader for the Web of Science RIS-Format
24
 *
25
 * @package EWW\Dpf\Services\ImportExternalMetadata
26
 */
27
class RisReader
28
{
29
    public static $tagMap = [
30
        'FN' => 'File Name',
31
        'VR' => 'Version Number',
32
        'PT' => 'Publication Type',
33
        'AU' => 'Author',
34
        'AF' => 'Author Full Name',
35
        'BA' => 'Book Author',
36
        'BF' => 'Book Author Full Name',
37
        'CA' => 'Group Author',
38
        'GP' => 'Book Group Author',
39
        'BE' => 'Editor',
40
        'TI' => 'Document Title',
41
        'SO' => 'Publication Name',
42
        'SE' => 'Book Series Title',
43
        'BS' => 'Book Series Subtitle',
44
        'LA' => 'Language',
45
        'DT' => 'Document Type',
46
        'CT' => 'Conference Title',
47
        'CY' => 'Conference Date',
48
        'CL' => 'Conference Location',
49
        'SP' => 'Conference Sponsors',
50
        'HO' => 'Conference Host',
51
        'DE' => 'Author Keywords',
52
        'ID' => 'Keywords Plus',
53
        'AB' => 'Abstract',
54
        'C1' => 'Author Address',
55
        'RP' => 'Reprint Address',
56
        'EM' => 'E-mail Address',
57
        'RI' => 'ResearcherID Number',
58
        'OI' => 'ORCID Identifier',
59
        'FU' => 'Funding Agency and Grant Number',
60
        'FX' => 'Funding Text',
61
        'CR' => 'Cited References',
62
        'NR' => 'Cited Reference Count',
63
        'TC' => 'WoS Times Cited Count',
64
        'Z9' => 'Total Times Cited Count',
65
        'U1' => 'Usage Count las 180 days',
66
        'U2' => 'Usage Count since 2013',
67
        'PU' => 'Publisher',
68
        'PI' => 'Publisher City',
69
        'PA' => 'Publisher Address',
70
        'SN' => 'ISSN',
71
        'EI' => 'eISSN',
72
        'BN' => 'ISBN',
73
        'J9' => 'Character-29 Source Abbreviation',
74
        'JI' => 'ISO Source Abbreviation',
75
        'PD' => 'Publication Date',
76
        'PY' => 'Year Published',
77
        'VL' => 'Volume',
78
        'IS' => 'Issue',
79
        'SI' => 'Special Issue',
80
        'PN' => 'Part Number',
81
        'SU' => 'Supplement',
82
        'MA' => 'Meeting Abstract',
83
        'BP' => 'Beginning Page',
84
        'EP' => 'Ending Page',
85
        'AR' => 'Article Number',
86
        'DI' => 'DOI',
87
        'D2' => 'Book DOI',
88
        'EA' => 'Early access date',
89
        'EY' => 'Early access year',
90
        'PG' => 'Page Count',
91
        'P2' => 'Chapter Count',
92
        'WC' => 'WoS Categories',
93
        'SC' => 'Research Areas',
94
        'GA' => 'Document Delivery Number',
95
        'PM' => 'PubMed ID',
96
        'UT' => 'Accession Number',
97
        'OA' => 'Open Access Indicator',
98
        'HP' => 'ESI Hot Paper',
99
        'HC' => 'ESI Highly Cited Paper',
100
        'DA' => 'Date generated',
101
        'ER' => 'End of Record',
102
        'EF' => 'End of File'
103
    ];
104
105
106
    public static $publicationTypes = [
107
        'J' => 'Journal',
108
        'B' => 'Book',
109
        'S' => 'Series',
110
        'P' => 'Patent'
111
    ];
112
113
    /**
114
     * Gets the full tag name
115
     *
116
     * @param string $tag
117
     */
118
    public static function tagToTagName($tag)
119
    {
120
        return str_replace(" ", "-", strtolower(self::$tagMap[$tag]));
121
    }
122
123
    /**
124
     * @param string $filePath
125
     * @return array
126
     */
127
    protected function readFile($filePath, $contentOnly = false)
128
    {
129
        $separator = "\r\n";
130
131
        if ($contentOnly) {
132
            $line = strtok($filePath, $separator);
133
        } else {
134
            $flags = FILE_SKIP_EMPTY_LINES | FILE_TEXT;
135
            $lines = file($filePath, $flags);
136
        }
137
138
        $currentTag = '';
139
        $risRecords = [];
140
        $risRecord = [];
141
        $recordIndex = 0;
142
143
        if ($contentOnly) {
144
145
            while ($line !== false) {
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $line does not seem to be defined for all execution paths leading up to this point.
Loading history...
146
                if (mb_detect_encoding($line) == 'UTF-8') {
147
                    $line = utf8_decode($line);
148
                    if (strpos($line, '?') === 0) {
149
                        $line = substr($line, 1);
150
                    }
151
                }
152
153
                $tempTag = trim(substr($line, 0, 2));
154
                if ($tempTag == 'EF') {
155
                    // End of file
156
                    break;
157
                }
158
159
                if ($tempTag == 'ER') {
160
                    $risRecords[$recordIndex] = $risRecord;
161
                    $risRecord = [];
162
                    $recordIndex += 1;
163
                } else {
164
                    if ($tempTag) {
165
                        $currentTag = $tempTag;
166
                    }
167
168
                    $line = substr($line, 2);
169
170
                    if ($currentTag && array_key_exists($currentTag, self::$tagMap)) {
171
                        $risRecord[$currentTag][] = trim($line);
172
                    }
173
                }
174
                $line = strtok($separator);
175
            }
176
177
        } else {
178
            foreach($lines as $line) {
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $lines does not seem to be defined for all execution paths leading up to this point.
Loading history...
179
180
                if (mb_detect_encoding($line) == 'UTF-8') {
181
                    $line = utf8_decode($line);
182
                    if (strpos($line, '?') === 0) {
183
                        $line = substr($line, 1);
184
                    }
185
                }
186
187
                $tempTag = trim(substr($line, 0, 2));
188
                if ($tempTag == 'EF') {
189
                    // End of file
190
                    break;
191
                }
192
193
                if ($tempTag == 'ER') {
194
                    $risRecords[$recordIndex] = $risRecord;
195
                    $risRecord = [];
196
                    $recordIndex += 1;
197
                } else {
198
                    if ($tempTag) {
199
                        $currentTag = $tempTag;
200
                    }
201
202
                    $line = substr($line, 2);
203
204
                    if ($currentTag && array_key_exists($currentTag, self::$tagMap)) {
205
                        $risRecord[$currentTag][] = trim($line);
206
                    }
207
                }
208
            }
209
        }
210
211
        return $risRecords;
212
    }
213
214
    public function createRisRecords() {
215
216
    }
217
218
    public function parseFile($filePath, $contentOnly = false)
219
    {
220
        $risRecords = $this->readFile($filePath, $contentOnly);
221
        $risEntries = [];
222
223
        foreach ($risRecords as $risRecord) {
224
225
            $risEntry = [];
226
227
            foreach ($risRecord as $tag => $risFieldValues) {
228
229
                if (in_array($tag, ['AF','AU','BA','BF','CA','GP','BE'])) {
230
                    // Authors
231
                    foreach ($risFieldValues as $fieldValue) {
232
233
                        list($family, $given, $suffix) = array_map('trim', explode(',', $fieldValue));
234
235
                        $affiliations = [];
236
                        if ($tag == 'AF') {
237
                            if (array_key_exists('C1', $risRecord)) {
238
                                $c1 = $risRecord['C1'];
239
                                foreach ($c1 as $affiliation) {
240
                                    if (
241
                                        preg_match(
242
                                            "/^\[.*?(".$fieldValue.").*?\](.*)/u", trim($affiliation),
243
                                            $matches
244
                                        )
245
                                    ) {
246
                                        $affiliations[] = $matches[2];
247
                                    }
248
                                }
249
                            }
250
                        }
251
252
                        if ($family || $given || $suffix || $affiliations) {
253
                            $risEntry[$tag][] = [
254
                                'family' => $family,
255
                                'given' => $given,
256
                                'suffix' => $suffix,
257
                                'affiliation' => $affiliations
258
                            ];
259
                        }
260
                    }
261
                } else {
262
                    $value = implode(" ", $risFieldValues);
263
264
                    if ($tag == 'PT') {
265
                        if (array_key_exists($value, self::$publicationTypes)) {
266
                            $value = strtolower(self::$publicationTypes[$value]);
267
                        } else {
268
                            $value = 'unknown';
269
                        }
270
                    }
271
272
                    $risEntry[$tag] = $value;
273
                }
274
275
            }
276
277
            $risEntries[] = $risEntry;
278
        }
279
280
        return $risEntries;
281
    }
282
283
    /**
284
     * @param array $risRecord
285
     */
286
    public function risRecordToXML($risRecord)
287
    {
288
        $encoder = new XmlEncoder();
289
        $record = [];
290
        foreach ($risRecord as $tag => $fieldValues) {
291
            switch ($tag) {
292
                case 'AF':
293
                    $record[self::tagToTagName('AU')] = $fieldValues;
294
                    break;
295
                case 'BF':
296
                    $record[self::tagToTagName('BA')] = $fieldValues;
297
                    break;
298
                case 'AU':
299
                    if (!array_key_exists('AF', $risRecord) || empty($risRecord['AF'])) {
300
                        $record[self::tagToTagName($tag)] = $fieldValues;
301
                    }
302
                    break;
303
                case 'BA':
304
                    if (!array_key_exists('BF', $risRecord) || empty($risRecord['BF'])) {
305
                        $record[self::tagToTagName($tag)] = $fieldValues;
306
                    }
307
                    break;
308
                default:
309
                    $record[self::tagToTagName($tag)] = $fieldValues;
310
                    break;
311
            }
312
        }
313
314
        return $encoder->encode($record, 'xml');
315
    }
316
317
}