Completed
Branch master (19cd63)
by
unknown
40:04
created

BatchRowIterator::addOptions()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 2
nc 1
nop 1
dl 0
loc 3
rs 10
c 0
b 0
f 0
1
<?php
2
/**
3
 * Allows iterating a large number of rows in batches transparently.
4
 * By default when iterated over returns the full query result as an
5
 * array of rows.  Can be wrapped in RecursiveIteratorIterator to
6
 * collapse those arrays into a single stream of rows queried in batches.
7
 *
8
 * This program is free software; you can redistribute it and/or modify
9
 * it under the terms of the GNU General Public License as published by
10
 * the Free Software Foundation; either version 2 of the License, or
11
 * (at your option) any later version.
12
 *
13
 * This program is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
 * GNU General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU General Public License along
19
 * with this program; if not, write to the Free Software Foundation, Inc.,
20
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21
 * http://www.gnu.org/copyleft/gpl.html
22
 *
23
 * @file
24
 * @ingroup Maintenance
25
 */
26
class BatchRowIterator implements RecursiveIterator {
27
28
	/**
29
	 * @var IDatabase $db The database to read from
30
	 */
31
	protected $db;
32
33
	/**
34
	 * @var string|array $table The name or names of the table to read from
35
	 */
36
	protected $table;
37
38
	/**
39
	 * @var array $primaryKey The name of the primary key(s)
40
	 */
41
	protected $primaryKey;
42
43
	/**
44
	 * @var integer $batchSize The number of rows to fetch per iteration
45
	 */
46
	protected $batchSize;
47
48
	/**
49
	 * @var array $conditions Array of strings containing SQL conditions
50
	 *  to add to the query
51
	 */
52
	protected $conditions = [];
53
54
	/**
55
	 * @var array $joinConditions
56
	 */
57
	protected $joinConditions = [];
58
59
	/**
60
	 * @var array $fetchColumns List of column names to select from the
61
	 *  table suitable for use with IDatabase::select()
62
	 */
63
	protected $fetchColumns;
64
65
	/**
66
	 * @var string $orderBy SQL Order by condition generated from $this->primaryKey
67
	 */
68
	protected $orderBy;
69
70
	/**
71
	 * @var array $current The current iterator value
72
	 */
73
	private $current = [];
74
75
	/**
76
	 * @var integer key 0-indexed number of pages fetched since self::reset()
77
	 */
78
	private $key;
79
80
	/**
81
	 * @var array Additional query options
82
	 */
83
	protected $options = [];
84
85
	/**
86
	 * @param IDatabase $db The database to read from
87
	 * @param string|array $table      The name or names of the table to read from
88
	 * @param string|array $primaryKey The name or names of the primary key columns
89
	 * @param integer      $batchSize  The number of rows to fetch per iteration
90
	 * @throws InvalidArgumentException
91
	 */
92
	public function __construct( IDatabase $db, $table, $primaryKey, $batchSize ) {
93
		if ( $batchSize < 1 ) {
94
			throw new InvalidArgumentException( 'Batch size must be at least 1 row.' );
95
		}
96
		$this->db = $db;
97
		$this->table = $table;
98
		$this->primaryKey = (array)$primaryKey;
99
		$this->fetchColumns = $this->primaryKey;
100
		$this->orderBy = implode( ' ASC,', $this->primaryKey ) . ' ASC';
101
		$this->batchSize = $batchSize;
102
	}
103
104
	/**
105
	 * @param array $conditions Query conditions suitable for use with
106
	 *  IDatabase::select
107
	 */
108
	public function addConditions( array $conditions ) {
109
		$this->conditions = array_merge( $this->conditions, $conditions );
110
	}
111
112
	/**
113
	 * @param array $options Query options suitable for use with
114
	 *  IDatabase::select
115
	 */
116
	public function addOptions( array $options ) {
117
		$this->options = array_merge( $this->options, $options );
118
	}
119
120
	/**
121
	 * @param array $conditions Query join conditions suitable for use
122
	 *  with IDatabase::select
123
	 */
124
	public function addJoinConditions( array $conditions ) {
125
		$this->joinConditions = array_merge( $this->joinConditions, $conditions );
126
	}
127
128
	/**
129
	 * @param array $columns List of column names to select from the
130
	 *  table suitable for use with IDatabase::select()
131
	 */
132
	public function setFetchColumns( array $columns ) {
133
		// If it's not the all column selector merge in the primary keys we need
134
		if ( count( $columns ) === 1 && reset( $columns ) === '*' ) {
135
			$this->fetchColumns = $columns;
136
		} else {
137
			$this->fetchColumns = array_unique( array_merge(
138
				$this->primaryKey,
139
				$columns
140
			) );
141
		}
142
	}
143
144
	/**
145
	 * Extracts the primary key(s) from a database row.
146
	 *
147
	 * @param stdClass $row An individual database row from this iterator
148
	 * @return array Map of primary key column to value within the row
149
	 */
150
	public function extractPrimaryKeys( $row ) {
151
		$pk = [];
152
		foreach ( $this->primaryKey as $alias => $column ) {
153
			$name = is_numeric( $alias ) ? $column : $alias;
154
			$pk[$name] = $row->{$name};
155
		}
156
		return $pk;
157
	}
158
159
	/**
160
	 * @return array The most recently fetched set of rows from the database
161
	 */
162
	public function current() {
163
		return $this->current;
164
	}
165
166
	/**
167
	 * @return integer 0-indexed count of the page number fetched
168
	 */
169
	public function key() {
170
		return $this->key;
171
	}
172
173
	/**
174
	 * Reset the iterator to the begining of the table.
175
	 */
176
	public function rewind() {
177
		$this->key = -1; // self::next() will turn this into 0
178
		$this->current = [];
179
		$this->next();
180
	}
181
182
	/**
183
	 * @return bool True when the iterator is in a valid state
184
	 */
185
	public function valid() {
186
		return (bool)$this->current;
187
	}
188
189
	/**
190
	 * @return bool True when this result set has rows
191
	 */
192
	public function hasChildren() {
193
		return $this->current && count( $this->current );
194
	}
195
196
	/**
197
	 * @return RecursiveIterator
198
	 */
199
	public function getChildren() {
200
		return new NotRecursiveIterator( new ArrayIterator( $this->current ) );
201
	}
202
203
	/**
204
	 * Fetch the next set of rows from the database.
205
	 */
206
	public function next() {
207
		$res = $this->db->select(
208
			$this->table,
209
			$this->fetchColumns,
210
			$this->buildConditions(),
211
			__METHOD__,
212
			[
213
				'LIMIT' => $this->batchSize,
214
				'ORDER BY' => $this->orderBy,
215
			] + $this->options,
216
			$this->joinConditions
217
		);
218
219
		// The iterator is converted to an array because in addition to
220
		// returning it in self::current() we need to use the end value
221
		// in self::buildConditions()
222
		$this->current = iterator_to_array( $res );
223
		$this->key++;
224
	}
225
226
	/**
227
	 * Uses the primary key list and the maximal result row from the
228
	 * previous iteration to build an SQL condition sufficient for
229
	 * selecting the next page of results.  All except the final key use
230
	 * `=` conditions while the final key uses a `>` condition
231
	 *
232
	 * Example output:
233
	 * 	  [ '( foo = 42 AND bar > 7 ) OR ( foo > 42 )' ]
234
	 *
235
	 * @return array The SQL conditions necessary to select the next set
236
	 *  of rows in the batched query
237
	 */
238
	protected function buildConditions() {
239
		if ( !$this->current ) {
240
			return $this->conditions;
241
		}
242
243
		$maxRow = end( $this->current );
244
		$maximumValues = [];
245
		foreach ( $this->primaryKey as $alias => $column ) {
246
			$name = is_numeric( $alias ) ? $column : $alias;
247
			$maximumValues[$column] = $this->db->addQuotes( $maxRow->{$name} );
248
		}
249
250
		$pkConditions = [];
251
		// For example: If we have 3 primary keys
252
		// first run through will generate
253
		//   col1 = 4 AND col2 = 7 AND col3 > 1
254
		// second run through will generate
255
		//   col1 = 4 AND col2 > 7
256
		// and the final run through will generate
257
		//   col1 > 4
258
		while ( $maximumValues ) {
259
			$pkConditions[] = $this->buildGreaterThanCondition( $maximumValues );
260
			array_pop( $maximumValues );
261
		}
262
263
		$conditions = $this->conditions;
264
		$conditions[] = sprintf( '( %s )', implode( ' ) OR ( ', $pkConditions ) );
265
266
		return $conditions;
267
	}
268
269
	/**
270
	 * Given an array of column names and their maximum value  generate
271
	 * an SQL condition where all keys except the last match $quotedMaximumValues
272
	 * exactly and the last column is greater than the matching value in
273
	 * $quotedMaximumValues
274
	 *
275
	 * @param array $quotedMaximumValues The maximum values quoted with
276
	 *  $this->db->addQuotes()
277
	 * @return string An SQL condition that will select rows where all
278
	 *  columns match the maximum value exactly except the last column
279
	 *  which must be greater than the provided maximum value
280
	 */
281
	protected function buildGreaterThanCondition( array $quotedMaximumValues ) {
282
		$keys = array_keys( $quotedMaximumValues );
283
		$lastColumn = end( $keys );
284
		$lastValue = array_pop( $quotedMaximumValues );
285
		$conditions = [];
286
		foreach ( $quotedMaximumValues as $column => $value ) {
287
			$conditions[] = "$column = $value";
288
		}
289
		$conditions[] = "$lastColumn > $lastValue";
290
291
		return implode( ' AND ', $conditions );
292
	}
293
}
294