BibtexAuthorListParser::grabFirstnameInitials()   A
last analyzed

Complexity

Conditions 6
Paths 8

Size

Total Lines 29
Code Lines 17

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 17
CRAP Score 6

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 17
c 1
b 0
f 0
dl 0
loc 29
ccs 17
cts 17
cp 1
rs 9.0777
cc 6
nc 8
nop 1
crap 6
1
<?php
2
3
namespace SCI\Bibtex;
4
5
/**
6
 * @note most of the parsing code has been copied from PARSECREATORS therefore
7
 * thanks goes to the authors of http://bibliophile.sourceforge.net
8
 *
9
 * Comments to the source code can be found at
10
 * http://sourceforge.net/projects/bibliophile/files/bibtexParse/ released under
11
 * under the GPL license.
12
 *
13
 * @license GNU GPL v2+
14
 * @since 1.0
15
 */
16
class BibtexAuthorListParser {
17
18
	/**
19
	 * @var array
20
	 */
21
	private $prefix = [];
22
23
	/**
24
	 * Create writer arrays from bibtex input
25
	 *
26
	 * 'author field can be (delimiters between authors are 'and' or '&'):
27
	 * 1. <first-tokens> <von-tokens> <last-tokens>
28
	 * 2. <von-tokens> <last-tokens>, <first-tokens>
29
	 * 3. <von-tokens> <last-tokens>, <jr-tokens>, <first-tokens>
30
	 *
31
	 * @since 1.0
32
	 *
33
	 * @param string $input
34
	 *
35
	 * @return array
36
	 */
37 17
	public function parse( $input ) {
38
39 17
		$authorList = [];
40
41
		// split on ' and '
42 17
		$authorArray = preg_split("/\s(and|&)\s/i", trim( $input ) );
43
44 17
		foreach( $authorArray as $value ) {
45 17
			$appellation = '';
46 17
			$prefix = '';
47
48 17
			$surname = '';
49 17
			$initials = '';
0 ignored issues
show
Unused Code introduced by
The assignment to $initials is dead and can be removed.
Loading history...
50
51 17
			$this->prefix = [];
52
53 17
			$author = explode( ",", preg_replace("/\s{2,}/", ' ', trim( $value ) ) );
54 17
			$size = count( $author );
55
56
			// No commas therefore something like Mark Grimshaw, Mark Nicholas Grimshaw, M N Grimshaw, Mark N. Grimshaw
57 17
			if( $size == 1 ) {
58
				// Is complete surname enclosed in {...}, unless the string starts with a backslash (\) because then it is
59
				// probably a special latex-sign..
60
				// 2006.02.11 DR: in the last case, any NESTED curly braces should also be taken into account! so second
61
				// clause rules out things such as author="a{\"{o}}"
62
				//
63 11
				if( preg_match("/(.*){([^\\\].*)}/", $value, $matches) &&
64 11
					!(preg_match("/(.*){\\\.{.*}.*}/", $value, $matches2 ) ) ) {
65 2
					$author = explode(" ", $matches[1]);
66 2
					$surname = $matches[2];
67
				} else {
68 9
					$author = explode(" ", $value);
69
					// last of array is surname (no prefix if entered correctly)
70 11
					$surname = array_pop($author);
71
				}
72 7
			} elseif( $size == 2 ) { // Something like Grimshaw, Mark or Grimshaw, Mark Nicholas  or Grimshaw, M N or Grimshaw, Mark N.
73
				// first of array is surname (perhaps with prefix)
74 5
				list( $surname, $prefix ) = $this->grabSurname( array_shift( $author ) );
75
			} else { // If $size is 3, we're looking at something like Bush, Jr. III, George W
76
				// middle of array is 'Jr.', 'IV' etc.
77 2
				$appellation = implode(' ', array_splice( $author, 1, 1 ) );
78
				// first of array is surname (perhaps with prefix)
79 2
				list( $surname, $prefix ) = $this->grabSurname( array_shift( $author ) );
80
			}
81
82 17
			$remainder = implode( " ", $author );
83
84 17
			list( $firstname, $initials ) = $this->grabFirstnameInitials( $remainder );
85
86 17
			if( $this->prefix !== [] ) {
87 3
				$prefix = implode(' ', $this->prefix );
88
			}
89
90 17
			$surname = $surname . ' ' . trim( $appellation );
91
92 17
			$authorList[] = $this->concatenate( $firstname, $initials, $surname, $prefix );
93
		}
94
95 17
		return $authorList;
96
	}
97
98 17
	private function concatenate( $firstname, $initials, $surname, $prefix ) {
99
100
		$author = [
101 17
			trim( $firstname ),
102 17
			trim( $initials ),
103 17
			trim( $prefix ),
104 17
			trim( $surname )
105
		];
106
107 17
		return implode( ' ', array_filter( $author ) );
108
	}
109
110
	/**
111
	 * @note firstname and initials which may be of form "A.B.C." or "A. B. C. " or " A B C " etc.
112
	 */
113 17
	private function grabFirstnameInitials( $remainder ) {
114
115 17
		$array = explode( " ", $remainder );
116
117 17
		$firstname = '';
0 ignored issues
show
Unused Code introduced by
The assignment to $firstname is dead and can be removed.
Loading history...
118 17
		$initials = '';
119
120 17
		$initialsArray = [];
121 17
		$firstnameArray = [];
122
123 17
		foreach( $array as $value ) {
124 17
			$firstChar = substr($value, 0, 1);
125
126 17
			if( ( ord( $firstChar ) >= 97 ) && ( ord( $firstChar ) <= 122) ) {
127 3
				$this->prefix[] = $value;
128 17
			} elseif( preg_match("/[a-zA-Z]{2,}/", trim( $value ) ) ) {
129 13
				$firstnameArray[] = trim($value);
130
			} else {
131 17
				$initialsArray[] = str_replace(".", " ", trim( $value ) );
132
			}
133
		}
134
135 17
		foreach( $initialsArray as $initial) {
136 12
			$initials .= ' ' . trim ( $initial );
137
		}
138
139 17
		$firstname = implode(" ", $firstnameArray);
140
141 17
		return [ $firstname, $initials ];
142
	}
143
144
	/**
145
	 * @note surname may have title such as 'den', 'von', 'de la' etc. -
146
	 * characterised by first character lowercased.  Any uppercased part means
147
	 * lowercased parts following are part of the surname (e.g. Van den Bussche)
148
	 */
149 7
	private function grabSurname( $input ) {
150 7
		$surnameArray = explode(" ", $input );
151
152 7
		$noPrefix = false;
153 7
		$surname = [];
154 7
		$prefix = [];
155
156 7
		foreach( $surnameArray as $value ) {
157 7
			$firstChar = substr($value, 0, 1);
158
159 7
			if( !$noPrefix && ( ord( $firstChar ) >= 97 ) && ( ord( $firstChar ) <= 122 ) ) {
160 3
				$prefix[] = $value;
161
			} else {
162 7
				$surname[] = $value;
163 7
				$noPrefix = TRUE;
164
			}
165
		}
166
167 7
		$surname = implode(" ", $surname);
168
169 7
		if( $prefix !== [] ) {
170 3
			return [ $surname, implode(" ", $prefix ) ];
171
		}
172
173 4
		return [ $surname, false ];
174
	}
175
}
176