BibtexParser   A
last analyzed

Complexity

Total Complexity 24

Size/Duplication

Total Lines 129
Duplicated Lines 0 %

Test Coverage

Coverage 96.36%

Importance

Changes 1
Bugs 0 Features 0
Metric Value
wmc 24
eloc 54
c 1
b 0
f 0
dl 0
loc 129
ccs 53
cts 55
cp 0.9636
rs 10

5 Methods

Rating   Name   Duplication   Size   Complexity  
A splitField() 0 9 2
B removeDelimiters() 0 18 7
A findBibtexFormatMatches() 0 14 3
A parse() 0 12 2
B parseFields() 0 51 10
1
<?php
2
3
namespace SCI\Bibtex;
4
5
/**
6
 * @note most of the parsing code has been copied from PARSEENTRIES therefore
7
 * thanks goes to the authors of http://bibliophile.sourceforge.net
8
 *
9
 * Comments to the source code can be found at
10
 * http://sourceforge.net/projects/bibliophile/files/bibtexParse/ and is
11
 * released under the GPL license.
12
 *
13
 * @note There might be a better parser out there but I didn't want to spend to
14
 * much time reviewing code therefore PARSEENTRIES does the job well.
15
 *
16
 * Any fancy macro stuff or other complicated string parsing isn't supported
17
 * given that the bibtex format misses a proper specification. PARSEENTRIES
18
 * surely allows to cover more edge cases but for what we want to achieve (to ease
19
 * copy and paste of existing bibtex records) the current implementation is
20
 * sufficient.
21
 *
22
 * BibtexParserTest provides the test interface to verify edge cases.
23
 *
24
 * @license GNU GPL v2+
25
 * @since 1.0
26
 */
27
class BibtexParser {
28
29
	/**
30
	 * @var array
31
	 */
32
	private $undefinedStrings = [];
0 ignored issues
show
introduced by
The private property $undefinedStrings is not used, and could be removed.
Loading history...
33
34
	/**
35
	 * @var array
36
	 */
37
	private $strings = [];
0 ignored issues
show
introduced by
The private property $strings is not used, and could be removed.
Loading history...
38
39
	/**
40
	 * @since  1.0
41
	 *
42
	 * @return array
43
	 */
44 7
	public function parse( $bibtex ) {
45
46 7
		if ( ( $matches = $this->findBibtexFormatMatches( $bibtex ) ) === [] ) {
47 2
			return [];
48
		}
49
50
		$head = [
51 5
			'type'      => strtolower( trim( $matches[1] ) ),
52 5
			'reference' => $matches[2]
53
		];
54
55 5
		return $head + $this->parseFields( $matches[3] );
56
	}
57
58 7
	private function findBibtexFormatMatches( $bibtex ) {
59
60 7
		$matches = preg_split("/@(.*)[{(](.*),/U", $bibtex, 2, PREG_SPLIT_DELIM_CAPTURE );
61
62
		// Silently retreat from processing
63 7
		if ( !isset( $matches[2] ) ) {
64 2
			return [];
65
		}
66
67 5
		if( preg_match("/=/", $matches[2] ) ) {
68
			$matches = preg_split("/@(.*)\s*[{(](.*)/U", $bibtex, 2, PREG_SPLIT_DELIM_CAPTURE );
69
		}
70
71 5
		return $matches;
72
	}
73
74 5
	private function parseFields( $content ) {
75 5
		$elements = [];
76 5
		$values = [];
77
78 5
		$length = strlen( $content );
79
80 5
		if( $content[$length - 1] == "}" ||  $content[$length - 1] == ")" ||  $content[$length - 1] == ",") {
81 5
			$content = substr( $content,  0, $length - 1 );
82
		}
83
84 5
		$split = preg_split("/=/",  $content, 2 );
85 5
		$string = $split[1];
86
87 5
		while( $string ) {
88 5
			list( $entry, $string ) = $this->splitField( $string );
89 5
			$values[] = $entry;
90
		}
91
92 5
		foreach( $values as $value ) {
93 5
			$pos = strpos( $content, $value);
94 5
			$content = substr_replace( $content, '', $pos, strlen( $value ) );
95
		}
96
97 5
		$rev = strrev( trim( $content ) );
98
99 5
		if( $rev[0] != ',') {
100 5
			 $content .= ',';
101
		}
102
103 5
		$keys = preg_split("/=,/",  $content );
104 5
		array_pop($keys);
105
106 5
		foreach( $keys as $key ) {
107 5
			$value = trim( array_shift( $values ) );
108 5
			$rev = strrev( $value );
109
110
			// remove any dangling ',' left on final field of entry
111 5
			if( $rev[0] == ',') {
112 1
				$value = rtrim($value, ",");
113
			}
114
115 5
			if(!$value) {
116
				continue;
117
			}
118
119 5
			$key = strtolower(trim($key));
120 5
			$value = trim($value);
121 5
			$elements[$key] = $this->removeDelimiters( $value );
122
		}
123
124 5
		return $elements;
125
	}
126
127 5
	private function splitField( $seg ) {
128
129 5
		$array = preg_split("/,\s*([-_.:,a-zA-Z0-9]+)\s*={1}\s*/U", $seg, PREG_SPLIT_DELIM_CAPTURE );
130
131
	//	if(!array_key_exists( 1, $array ) ) {
132
	//		return array( $array[0], FALSE);
133
	//	}
134
135 5
		return isset( $array[1] ) ? [ $array[0], $array[1] ] : [ $array[0], false ];
136
	}
137
138 5
	private function removeDelimiters( $string ) {
139
140 5
		if( $string  && ( $string[0] == "\"") ) {
141 1
			$string = substr($string, 1);
142 1
			$string = substr($string, 0, -1);
143 5
		} else if ( $string && ( $string[0] == "{") ) {
144 5
			if( strlen( $string ) > 0 && $string[strlen($string)-1] == "}" ) {
145 5
				$string = substr($string, 1);
146 5
				$string = substr($string, 0, -1);
147
			}
148
149
	//	} else if(!is_numeric($string) && !array_key_exists($string, $this->strings)
150
	//		 && (array_search($string, $this->undefinedStrings) === FALSE ) ) {
151
	//		$this->undefinedStrings[] = $string; // Undefined string that is not a year etc.
152
	//		return '';
153
		}
154
155 5
		return $string;
156
	}
157
}
158