Issues in BibtexParser.php (master) - Issues in master - SemanticMediaWiki/SemanticCite - Measure and Improve Code Quality continuously with Scrutinizer

Issues (155)

src/Bibtex/BibtexParser.php (2 issues)

Severity

Unknown 2

<?php

namespace SCI\Bibtex;

/**
 * @note most of the parsing code has been copied from PARSEENTRIES therefore
 * thanks goes to the authors of http://bibliophile.sourceforge.net
 *
 * Comments to the source code can be found at
 * http://sourceforge.net/projects/bibliophile/files/bibtexParse/ and is
 * released under the GPL license.
 *
 * @note There might be a better parser out there but I didn't want to spend to
 * much time reviewing code therefore PARSEENTRIES does the job well.
 *
 * Any fancy macro stuff or other complicated string parsing isn't supported
 * given that the bibtex format misses a proper specification. PARSEENTRIES
 * surely allows to cover more edge cases but for what we want to achieve (to ease
 * copy and paste of existing bibtex records) the current implementation is
 * sufficient.
 *
 * BibtexParserTest provides the test interface to verify edge cases.
 *
 * @license GNU GPL v2+
 * @since 1.0
 */
class BibtexParser {

	/**
	 * @var array
	 */
	private $undefinedStrings = [];


	/**
	 * @var array
	 */
	private $strings = [];


	/**
	 * @since  1.0
	 *
	 * @return array
	 */
	public function parse( $bibtex ) {

		if ( ( $matches = $this->findBibtexFormatMatches( $bibtex ) ) === [] ) {
			return [];
		}

		$head = [
			'type'      => strtolower( trim( $matches[1] ) ),
			'reference' => $matches[2]
		];

		return $head + $this->parseFields( $matches[3] );
	}

	private function findBibtexFormatMatches( $bibtex ) {

		$matches = preg_split("/@(.*)[{(](.*),/U", $bibtex, 2, PREG_SPLIT_DELIM_CAPTURE );

		// Silently retreat from processing
		if ( !isset( $matches[2] ) ) {
			return [];
		}

		if( preg_match("/=/", $matches[2] ) ) {
			$matches = preg_split("/@(.*)\s*[{(](.*)/U", $bibtex, 2, PREG_SPLIT_DELIM_CAPTURE );
		}

		return $matches;
	}

	private function parseFields( $content ) {
		$elements = [];
		$values = [];

		$length = strlen( $content );

		if( $content[$length - 1] == "}" ||  $content[$length - 1] == ")" ||  $content[$length - 1] == ",") {
			$content = substr( $content,  0, $length - 1 );
		}

		$split = preg_split("/=/",  $content, 2 );
		$string = $split[1];

		while( $string ) {
			list( $entry, $string ) = $this->splitField( $string );
			$values[] = $entry;
		}

		foreach( $values as $value ) {
			$pos = strpos( $content, $value);
			$content = substr_replace( $content, '', $pos, strlen( $value ) );
		}

		$rev = strrev( trim( $content ) );

		if( $rev[0] != ',') {
			 $content .= ',';
		}

		$keys = preg_split("/=,/",  $content );
		array_pop($keys);

		foreach( $keys as $key ) {
			$value = trim( array_shift( $values ) );
			$rev = strrev( $value );

			// remove any dangling ',' left on final field of entry
			if( $rev[0] == ',') {
				$value = rtrim($value, ",");
			}

			if(!$value) {
				continue;
			}

			$key = strtolower(trim($key));
			$value = trim($value);
			$elements[$key] = $this->removeDelimiters( $value );
		}

		return $elements;
	}

	private function splitField( $seg ) {

		$array = preg_split("/,\s*([-_.:,a-zA-Z0-9]+)\s*={1}\s*/U", $seg, PREG_SPLIT_DELIM_CAPTURE );

	//	if(!array_key_exists( 1, $array ) ) {
	//		return array( $array[0], FALSE);
	//	}

		return isset( $array[1] ) ? [ $array[0], $array[1] ] : [ $array[0], false ];
	}

	private function removeDelimiters( $string ) {

		if( $string  && ( $string[0] == "\"") ) {
			$string = substr($string, 1);
			$string = substr($string, 0, -1);
		} else if ( $string && ( $string[0] == "{") ) {
			if( strlen( $string ) > 0 && $string[strlen($string)-1] == "}" ) {
				$string = substr($string, 1);
				$string = substr($string, 0, -1);
			}

	//	} else if(!is_numeric($string) && !array_key_exists($string, $this->strings)
	//		 && (array_search($string, $this->undefinedStrings) === FALSE ) ) {
	//		$this->undefinedStrings[] = $string; // Undefined string that is not a year etc.
	//		return '';
		}

		return $string;
	}
}


1		<?php
2
3		namespace SCI\Bibtex;
4
5		/**
6		* @note most of the parsing code has been copied from PARSEENTRIES therefore
7		* thanks goes to the authors of http://bibliophile.sourceforge.net
8		*
9		* Comments to the source code can be found at
10		* http://sourceforge.net/projects/bibliophile/files/bibtexParse/ and is
11		* released under the GPL license.
12		*
13		* @note There might be a better parser out there but I didn't want to spend to
14		* much time reviewing code therefore PARSEENTRIES does the job well.
15		*
16		* Any fancy macro stuff or other complicated string parsing isn't supported
17		* given that the bibtex format misses a proper specification. PARSEENTRIES
18		* surely allows to cover more edge cases but for what we want to achieve (to ease
19		* copy and paste of existing bibtex records) the current implementation is
20		* sufficient.
21		*
22		* BibtexParserTest provides the test interface to verify edge cases.
23		*
24		* @license GNU GPL v2+
25		* @since 1.0
26		*/
27		class BibtexParser {
28
29		/**
30		* @var array
31		*/
32		private $undefinedStrings = [];
		0 ignored issues – show introduced 2024-12-28 10:41 UTC by Report Bug Copy Issue Report Show Similar Issues like this The private property `$undefinedStrings` is not used, and could be removed. Loading history...
33
34		/**
35		* @var array
36		*/
37		private $strings = [];
		0 ignored issues – show introduced 2024-12-28 10:41 UTC by Report Bug Copy Issue Report Show Similar Issues like this The private property `$strings` is not used, and could be removed. Loading history...
38
39		/**
40		* @since 1.0
41		*
42		* @return array
43		*/
44	7	public function parse( $bibtex ) {
45
46	7	if ( ( $matches = $this->findBibtexFormatMatches( $bibtex ) ) === [] ) {
47	2	return [];
48		}
49
50		$head = [
51	5	'type' => strtolower( trim( $matches[1] ) ),
52	5	'reference' => $matches[2]
53		];
54
55	5	return $head + $this->parseFields( $matches[3] );
56		}
57
58	7	private function findBibtexFormatMatches( $bibtex ) {
59
60	7	$matches = preg_split("/@(.)[{(](.),/U", $bibtex, 2, PREG_SPLIT_DELIM_CAPTURE );
61
62		// Silently retreat from processing
63	7	if ( !isset( $matches[2] ) ) {
64	2	return [];
65		}
66
67	5	if( preg_match("/=/", $matches[2] ) ) {
68		$matches = preg_split("/@(.)\s[{(](.*)/U", $bibtex, 2, PREG_SPLIT_DELIM_CAPTURE );
69		}
70
71	5	return $matches;
72		}
73
74	5	private function parseFields( $content ) {
75	5	$elements = [];
76	5	$values = [];
77
78	5	$length = strlen( $content );
79
80	5	if( $content[$length - 1] == "}" \|\| $content[$length - 1] == ")" \|\| $content[$length - 1] == ",") {
81	5	$content = substr( $content, 0, $length - 1 );
82		}
83
84	5	$split = preg_split("/=/", $content, 2 );
85	5	$string = $split[1];
86
87	5	while( $string ) {
88	5	list( $entry, $string ) = $this->splitField( $string );
89	5	$values[] = $entry;
90		}
91
92	5	foreach( $values as $value ) {
93	5	$pos = strpos( $content, $value);
94	5	$content = substr_replace( $content, '', $pos, strlen( $value ) );
95		}
96
97	5	$rev = strrev( trim( $content ) );
98
99	5	if( $rev[0] != ',') {
100	5	$content .= ',';
101		}
102
103	5	$keys = preg_split("/=,/", $content );
104	5	array_pop($keys);
105
106	5	foreach( $keys as $key ) {
107	5	$value = trim( array_shift( $values ) );
108	5	$rev = strrev( $value );
109
110		// remove any dangling ',' left on final field of entry
111	5	if( $rev[0] == ',') {
112	1	$value = rtrim($value, ",");
113		}
114
115	5	if(!$value) {
116		continue;
117		}
118
119	5	$key = strtolower(trim($key));
120	5	$value = trim($value);
121	5	$elements[$key] = $this->removeDelimiters( $value );
122		}
123
124	5	return $elements;
125		}
126
127	5	private function splitField( $seg ) {
128
129	5	$array = preg_split("/,\s([-_.:,a-zA-Z0-9]+)\s={1}\s*/U", $seg, PREG_SPLIT_DELIM_CAPTURE );
130
131		// if(!array_key_exists( 1, $array ) ) {
132		// return array( $array[0], FALSE);
133		// }
134
135	5	return isset( $array[1] ) ? [ $array[0], $array[1] ] : [ $array[0], false ];
136		}
137
138	5	private function removeDelimiters( $string ) {
139
140	5	if( $string && ( $string[0] == "\"") ) {
141	1	$string = substr($string, 1);
142	1	$string = substr($string, 0, -1);
143	5	} else if ( $string && ( $string[0] == "{") ) {
144	5	if( strlen( $string ) > 0 && $string[strlen($string)-1] == "}" ) {
145	5	$string = substr($string, 1);
146	5	$string = substr($string, 0, -1);
147		}
148
149		// } else if(!is_numeric($string) && !array_key_exists($string, $this->strings)
150		// && (array_search($string, $this->undefinedStrings) === FALSE ) ) {
151		// $this->undefinedStrings[] = $string; // Undefined string that is not a year etc.
152		// return '';
153		}
154
155	5	return $string;
156		}
157		}
158

SemanticMediaWiki / SemanticCite

Issues (155)

src/Bibtex/BibtexParser.php (2 issues)

Severity

Introduced By

Duplication Side-by-Side

Filter issues like