Completed
Push — add/changelog-tooling ( b30521...fa9ac3 )
by
unknown
1097:09 queued 1086:59
created

KeepAChangelogParser::__construct()   A

Complexity

Conditions 4
Paths 8

Size

Total Lines 12

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 4
nc 8
nop 1
dl 0
loc 12
rs 9.8666
c 0
b 0
f 0
1
<?php // phpcs:ignore WordPress.Files.FileName.NotHyphenatedLowercase
2
/**
3
 * Parser for a keepachangelog.com format changelog.
4
 *
5
 * @package automattic/jetpack-changelogger
6
 */
7
8
// phpcs:disable WordPress.WP.AlternativeFunctions, WordPress.NamingConventions.ValidFunctionName.MethodNameInvalid, WordPress.NamingConventions.ValidVariableName
9
10
namespace Automattic\Jetpack\Changelog;
11
12
use DateTime;
13
use InvalidArgumentException;
14
15
/**
16
 * Parser for a keepachangelog.com format changelog.
17
 */
18
class KeepAChangelogParser extends Parser {
19
20
	/**
21
	 * Bullet for changes.
22
	 *
23
	 * @var string
24
	 */
25
	private $bullet = '-';
26
27
	/**
28
	 * Output date format.
29
	 *
30
	 * @var string
31
	 */
32
	private $dateFormat = 'Y-m-d';
33
34
	/**
35
	 * If true, try to parse authors from entries.
36
	 *
37
	 * @var bool
38
	 */
39
	private $parseAuthors = false;
40
41
	/**
42
	 * If positive, wrap changes at this many columns.
43
	 *
44
	 * @var int
45
	 */
46
	private $wrap = 80;
47
48
	/**
49
	 * Constructor.
50
	 *
51
	 * @param array $config Configuration.
52
	 *  - bullet: (string) Bullet for changes. Default '-'.
53
	 *  - dateFormat: (string) Date format to use in output. Default 'Y-m-d'.
54
	 *  - parseAuthors: (bool) Try to parse authors out of change entries. Default false.
55
	 *  - wrap: (int) If positive, wrap changes at this many columns. Default 80.
56
	 */
57
	public function __construct( array $config ) {
58
		if ( ! empty( $config['bullet'] ) ) {
59
			$this->bullet = $config['bullet'];
60
		}
61
		if ( ! empty( $config['dateFormat'] ) ) {
62
			$this->dateFormat = $config['dateFormat'];
63
		}
64
		$this->parseAuthors = ! empty( $config['parseAuthors'] );
65
		if ( ! empty( $config['wrap'] ) ) {
66
			$this->wrap = $config['wrap'];
67
		}
68
	}
69
70
	/**
71
	 * Test if there's a link at the end of a string.
72
	 *
73
	 * @param string $s String.
74
	 * @return array|null Match data.
75
	 */
76
	private function endsInLink( $s ) {
77
		if ( preg_match( '/^\[([^]]+)\]: *(\S+(?: +(?:"(?:[^"]|\\.)*"|\'(?:[^\']|\\.)*\'|\((?:[^()]|\\.)*\)))?) *\z/m', $s, $m ) ) {
78
			return array(
79
				'match' => $m[0],
80
				'id'    => $m[1],
81
				'link'  => $m[2],
82
			);
83
		}
84
		return null;
85
	}
86
87
	/**
88
	 * Split a string in two at the first occurrence of a substring.
89
	 *
90
	 * @param string   $haystack String to split.
91
	 * @param string[] ...$needles Strings to split on. Earliest match in $haystack wins.
92
	 * @return string[] Two elements: The part before $needles and the part after, both trimmed.
93
	 */
0 ignored issues
show
Documentation introduced by
Should the type for parameter $needles not be string[][]?

This check looks for @param annotations where the type inferred by our type inference engine differs from the declared type.

It makes a suggestion as to what type it considers more descriptive.

Most often this is a case of a parameter that can be null in addition to its declared types.

Loading history...
94
	private function split( $haystack, ...$needles ) {
95
		$i = false;
96
		foreach ( $needles as $needle ) {
97
			$j = strpos( $haystack, $needle );
98
			$i = false === $i ? $j : min( $i, $j );
99
		}
100
		if ( false === $i ) {
101
			return array( trim( $haystack ), '' );
102
		}
103
		return array(
104
			trim( substr( $haystack, 0, $i ) ),
105
			trim( substr( $haystack, $i ) ),
106
		);
107
	}
108
109
	/**
110
	 * Parse changelog data into a Changelog object.
111
	 *
112
	 * This does not handle all markdown! In particular, it makes the following assumptions:
113
	 *
114
	 * - All level-2 ATX headings with no indentation are changelog entry headings.
115
	 * - Changelog entry headings consist of either a bare version number or a version number as
116
	 *   link text with no destination or title, followed by a spaced ASCII hyphen, followed by a timestamp.
117
	 * - All level-3 ATX headings with no indentation are changelog entry subheadings.
118
	 * - All change entries are formatted as lists starting with the configured bullet followed by a space,
119
	 *   and do not make use of lazy continuation. Indentation of continued
120
	 *   lines is equal to the length of the bullet plus the space.
121
	 * - All link definitions come at the end of the document, with no intervening blank lines or
122
	 *   other content, and are not indented and do not contain newlines. Link definitions for
123
	 *   changelog entries have no titles.
124
	 *
125
	 * @param string $changelog Changelog contents.
126
	 * @return Changelog
127
	 * @throws InvalidArgumentException If the changelog data cannot be parsed.
128
	 */
129
	public function parse( $changelog ) {
130
		$ret = new Changelog();
131
132
		$bullet = $this->bullet . ' ';
133
		$len    = strlen( $bullet );
134
		$indent = str_repeat( ' ', $len );
135
136
		// Fix newlines and expand tabs.
137
		$changelog = strtr( $changelog, array( "\r\n" => "\n" ) );
138
		$changelog = strtr( $changelog, array( "\r" => "\n" ) );
139
		while ( strpos( $changelog, "\t" ) !== false ) {
140
			$changelog = preg_replace_callback(
141
				'/^([^\t\n]*)\t/m',
142
				function ( $m ) {
143
					return $m[1] . str_repeat( ' ', 4 - ( mb_strlen( $m[1] ) % 4 ) );
144
				},
145
				$changelog
146
			);
147
		}
148
149
		// Extract link definitions.
150
		$links     = array();
151
		$usedlinks = array();
152
		while ( ( $m = $this->endsInLink( $changelog ) ) ) { // phpcs:ignore WordPress.CodeAnalysis.AssignmentInCondition.FoundInWhileCondition
153
			$links[ $m['id'] ]     = $m['link'];
154
			$usedlinks[ $m['id'] ] = false;
155
			$changelog             = substr( $changelog, -strlen( $m['match'] ) );
156
		}
157
158
		// Everything up to the first level-2 ATX heading is the prologue.
159
		list( $prologue, $changelog ) = $this->split( "\n$changelog", "\n## " );
160
		$ret->setPrologue( $prologue );
161
162
		// Entries make up the rest of the document.
163
		$entries = array();
164
		while ( '' !== $changelog ) {
165
			// Extract the first entry from the changelog file, then extract the heading from it.
166
			list( $content, $changelog ) = $this->split( $changelog, "\n## " );
167
			list( $heading, $content )   = $this->split( $content, "\n" );
168
169
			// Parse the heading and create a ChangelogEntry for it.
170
			if ( ! preg_match( '/^## +(\[?[^] ]+\]?) - (.+?) *$/', $heading, $m ) ) {
171
				throw new InvalidArgumentException( "Invalid heading: $heading" );
172
			}
173
			$link      = null;
174
			$version   = $m[1];
175
			$timestamp = $m[2];
176
			if ( '[' === $version[0] && ']' === substr( $version, -1 ) ) {
177
				$version = substr( $version, 1, -1 );
178
				if ( ! isset( $links[ $version ] ) ) {
179
					throw new InvalidArgumentException( "Heading seems to have a linked version, but link was not found: $heading" );
180
				}
181
				$link                  = $links[ $version ];
182
				$usedlinks[ $version ] = true;
183
			}
184
			try {
185
				$timestamp = new DateTime( $timestamp );
186
			} catch ( \Exception $ex ) {
187
				throw new InvalidArgumentException( "Heading has an invalid timestamp: $heading", 0, $ex );
188
			}
189
			$entry     = $this->newChangelogEntry(
190
				$version,
191
				array(
192
					'link'      => $link,
193
					'timestamp' => $timestamp,
194
				)
195
			);
196
			$entries[] = $entry;
197
198
			// Extract the prologue, if any.
199
			list( $prologue, $content ) = $this->split( "\n$content", "\n### ", "\n$bullet " );
200
			$entry->setPrologue( $prologue );
201
202
			if ( '' === $content ) {
203
				// Huh, no changes.
204
				continue;
205
			}
206
207
			// Inject an empty heading if necessary so the change parsing can be more striaghtforward.
208
			if ( '#' !== $content[0] ) {
209
				$content = "### \n$content";
210
			}
211
212
			// Now parse all the subheadings and changes.
213
			while ( '' !== $content ) {
214
				list( $section, $content )    = $this->split( $content, "\n### " );
215
				list( $subheading, $section ) = $this->split( $section, "\n" );
216
				$subheading                   = trim( substr( $subheading, 4 ) );
217
				$changes                      = array();
218
				$cur                          = '';
219
				$section                      = explode( "\n", $section );
220
				while ( $section ) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $section of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
221
					$line   = array_shift( $section );
222
					$prefix = substr( $line, 0, $len );
223
					if ( $prefix === $bullet ) {
224
						$cur = trim( $cur );
225
						if ( '' !== $cur ) {
226
							$changes[] = $cur;
227
						}
228
						$cur = substr( $line, $len ) . "\n";
229
					} elseif ( $prefix === $indent ) {
230
						$cur = substr( $line, $len ) . "\n";
231
					} elseif ( '' === $line ) {
232
						$cur = "\n";
233
					} else {
234
						// If there are no more subsections and the rest of the lines don't contain
235
						// bullets, assume it's an epilogue. Otherwise, assume it's an error.
236
						$section = $line . "\n" . implode( "\n", $section );
237
						if ( '' === $content && strpos( $section, "\n$bullet" ) === false ) {
238
							$entry->setEpilogue( $section );
239
							break;
240
						} else {
241
							throw new InvalidArgumentException( "Malformatted changes list near $line" );
242
						}
243
					}
244
				}
245
				$cur = trim( $cur );
246
				if ( '' !== $cur ) {
247
					$changes[] = $cur;
248
				}
249
				foreach ( $changes as $change ) {
250
					$author = '';
251
					if ( $this->parseAuthors && preg_match( '/ ([^()\n]+)$/', $change, $m ) ) {
252
						$author = $m[1];
253
						$change = substr( $change, -strlen( $m[0] ) );
254
					}
255
					$entry->appendChange(
256
						$this->newChangeEntry(
257
							array(
258
								'subheading' => $subheading,
259
								'author'     => $author,
260
								'content'    => $change,
261
							)
262
						)
263
					);
264
				}
265
			}
266
		}
267
		$changelog->setEntries( $entries );
0 ignored issues
show
Bug introduced by
The method setEntries cannot be called on $changelog (of type string).

Methods can only be called on objects. This check looks for methods being called on variables that have been inferred to never be objects.

Loading history...
268
269
		// Append any unused links to the epilogue.
270
		$epilogue = $ret->getEpilogue();
271
		foreach ( $links as $id => $content ) {
272
			if ( empty( $usedlinks[ $id ] ) ) {
273
				$epilogue .= "\n[$id]: $content";
274
			}
275
		}
276
		$ret->setEpilogue( $epilogue );
277
278
		return $ret;
279
	}
280
281
	/**
282
	 * Write a Changelog object to a string.
283
	 *
284
	 * @param Changelog $changelog Changelog object.
285
	 * @return string
286
	 */
287
	public function format( Changelog $changelog ) {
288
		$ret = '';
289
290
		$bullet = $this->bullet . ' ';
291
		$indent = str_repeat( ' ', strlen( $bullet ) );
292
293
		$prologue = trim( $changelog->getPrologue() );
294
		if ( '' !== $prologue ) {
295
			$ret .= "$prologue\n\n";
296
		}
297
298
		$links = array();
299
		foreach ( $changelog->getEntries() as $entry ) {
300
			$ret .= '## ';
301
			if ( $entry->getLink() !== null ) {
302
				$links[ $entry->getVersion() ] = $entry->getLink();
303
				$ret                          .= "[{$entry->getVersion()}]";
304
			} else {
305
				$ret .= $entry->getVersion();
306
			}
307
			$ret .= ' - ' . $entry->getTimestamp()->format( $this->dateFormat ) . "\n";
308
309
			$prologue = trim( $entry->getPrologue() );
310
			if ( '' !== $prologue ) {
311
				$ret .= "\n$prologue\n\n";
312
			}
313
314
			foreach ( $entry->getChangesBySubheading() as $heading => $changes ) {
315
				if ( '' !== $heading ) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison !== seems to always evaluate to true as the types of '' (string) and $heading (integer) can never be identical. Maybe you want to use a loose comparison != instead?
Loading history...
316
					$ret .= "### $heading\n";
317
				}
318
				foreach ( $changes as $change ) {
0 ignored issues
show
Bug introduced by
The expression $changes of type object<Automattic\Jetpac...Changelog\ChangeEntry>> is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
319
					$text = trim( $change->getContent() );
320
					if ( $change->getAuthor() !== '' ) {
321
						$text .= " {$change->getAuthor()}";
322
					}
323
					$ret .= $bullet . str_replace( "\n", "\n$indent", $text ) . "\n";
324
				}
325
				$ret .= "\n";
326
			}
327
328
			$epilogue = trim( $entry->getEpilogue() );
329
			if ( '' !== $epilogue ) {
330
				$ret .= "\n$epilogue\n";
331
			}
332
			$ret .= "\n";
333
		}
334
335
		$epilogue = trim( $changelog->getEpilogue() );
336
		if ( '' !== $epilogue ) {
337
			$ret .= "\n$epilogue\n";
338
		}
339
340
		if ( $links ) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $links of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
341
			if ( ! $this->endsInLink( $epilogue ) ) {
342
				$ret .= "\n";
343
			}
344
			foreach ( $links as $k => $v ) {
345
				$ret .= "[$k]: $v\n";
346
			}
347
		}
348
349
		return $ret;
350
	}
351
352
}
353