1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace Colligator\Console\Commands; |
4
|
|
|
|
5
|
|
|
use Carbon\Carbon; |
6
|
|
|
use Colligator\Jobs\OaiPmhHarvest as OaiPmhHarvestJob; |
7
|
|
|
use Illuminate\Console\Command; |
8
|
|
|
use Illuminate\Foundation\Bus\DispatchesJobs; |
9
|
|
|
|
10
|
|
|
class OaiPmhHarvest extends Command |
11
|
|
|
{ |
12
|
|
|
use DispatchesJobs; |
13
|
|
|
|
14
|
|
|
/** |
15
|
|
|
* Start time for the full harvest. |
16
|
|
|
* |
17
|
|
|
* @var float |
18
|
|
|
*/ |
19
|
|
|
protected $startTime; |
20
|
|
|
|
21
|
|
|
/** |
22
|
|
|
* Start time for the current batch. |
23
|
|
|
* |
24
|
|
|
* @var float |
25
|
|
|
*/ |
26
|
|
|
protected $batchTime; |
27
|
|
|
|
28
|
|
|
/** |
29
|
|
|
* Harvest position. |
30
|
|
|
* |
31
|
|
|
* @var int |
32
|
|
|
*/ |
33
|
|
|
protected $batchPos = 0; |
34
|
|
|
|
35
|
|
|
/** |
36
|
|
|
* The name and signature of the console command. |
37
|
|
|
* |
38
|
|
|
* @var string |
39
|
|
|
*/ |
40
|
|
|
protected $signature = 'colligator:harvest-oaipmh |
41
|
|
|
{name? : Name of the harvest config as defined in configs/oaipmh.php} |
42
|
|
|
{--from= : Start date on ISO format YYYY-MM-DD} |
43
|
|
|
{--until= : End date on ISO format YYYY-MM-DD} |
44
|
|
|
{--resume= : Resumption token} |
45
|
|
|
{--from-dump : Just re-index from dump} |
46
|
|
|
{--daily : Harvest records modified yesterday. Cannot be combined with --from / --until}'; |
47
|
|
|
|
48
|
|
|
/** |
49
|
|
|
* The console command description. |
50
|
|
|
* |
51
|
|
|
* @var string |
52
|
|
|
*/ |
53
|
|
|
protected $description = 'Harvest records from OAI-PMH service and store as XML files.'; |
54
|
|
|
|
55
|
|
|
/** |
56
|
|
|
* Create a new command instance. |
57
|
|
|
*/ |
58
|
|
|
public function __construct() |
59
|
|
|
{ |
60
|
|
|
parent::__construct(); |
61
|
|
|
} |
62
|
|
|
|
63
|
|
|
/** |
64
|
|
|
* Output a list of the configurations. |
65
|
|
|
*/ |
66
|
|
|
public function listConfigurations() |
67
|
|
|
{ |
68
|
|
|
$this->comment(''); |
69
|
|
|
$this->comment('Available configurations:'); |
70
|
|
|
$config = \Config::get('oaipmh.harvests', null); |
71
|
|
|
foreach (array_keys($config) as $key) { |
72
|
|
|
$this->comment(' - ' . $key); |
73
|
|
|
} |
74
|
|
|
} |
75
|
|
|
|
76
|
|
|
public function validate() |
77
|
|
|
{ |
78
|
|
|
if (empty($this->argument('name'))) { |
79
|
|
|
$this->listConfigurations(); |
80
|
|
|
|
81
|
|
|
return false; |
82
|
|
|
} |
83
|
|
|
$harvestConfig = \Config::get('oaipmh.harvests.' . $this->argument('name'), null); |
84
|
|
|
if (is_null($harvestConfig)) { |
85
|
|
|
$this->error('Unknown configuration specified.'); |
86
|
|
|
$this->listConfigurations(); |
87
|
|
|
|
88
|
|
|
return false; |
89
|
|
|
} |
90
|
|
View Code Duplication |
if ($this->option('daily')) { |
91
|
|
|
if ($this->option('from') || $this->option('until')) { |
92
|
|
|
$this->error('--daily cannot be combined with --from / --until.'); |
93
|
|
|
|
94
|
|
|
return false; |
95
|
|
|
} |
96
|
|
|
} |
97
|
|
|
if ($this->option('from-dump')) { |
98
|
|
View Code Duplication |
if ($this->option('from') || $this->option('until') || $this->option('resume') || $this->option('daily')) { |
99
|
|
|
$this->error('--from-dump cannot be combined with other options.'); |
100
|
|
|
|
101
|
|
|
return false; |
102
|
|
|
} |
103
|
|
|
} |
104
|
|
View Code Duplication |
if ($this->option('from')) { |
105
|
|
|
if (!preg_match('/[0-9]{4}-[0-9]{2}-[0-9]{2}/', $this->option('from'))) { |
106
|
|
|
$this->error('--from must be on ISO-format YYYY-MM-DD.'); |
107
|
|
|
|
108
|
|
|
return false; |
109
|
|
|
} |
110
|
|
|
} |
111
|
|
View Code Duplication |
if ($this->option('until')) { |
112
|
|
|
if (!preg_match('/[0-9]{4}-[0-9]{2}-[0-9]{2}/', $this->option('until'))) { |
113
|
|
|
$this->error('--until must be on ISO-format YYYY-MM-DD.'); |
114
|
|
|
|
115
|
|
|
return false; |
116
|
|
|
} |
117
|
|
|
} |
118
|
|
|
|
119
|
|
|
return true; |
120
|
|
|
} |
121
|
|
|
|
122
|
|
|
/** |
123
|
|
|
* Execute the console command. |
124
|
|
|
*/ |
125
|
|
|
public function handle() |
126
|
|
|
{ |
127
|
|
|
if (!$this->validate()) { |
128
|
|
|
return; |
129
|
|
|
} |
130
|
|
|
|
131
|
|
|
$harvestName = $this->argument('name'); |
132
|
|
|
$harvestConfig = \Config::get('oaipmh.harvests.' . $harvestName, null); |
133
|
|
|
|
134
|
|
|
$this->comment(''); |
135
|
|
|
$this->info(sprintf('[%s] Starting harvest "%s"', |
136
|
|
|
strftime('%Y-%m-%d %H:%M:%S'), |
137
|
|
|
$harvestName |
138
|
|
|
)); |
139
|
|
|
|
140
|
|
|
if ($this->option('from-dump')) { |
141
|
|
|
$this->comment(' - From local dump'); |
142
|
|
|
} else { |
143
|
|
|
$this->comment(' - Repo: ' . $harvestConfig['url']); |
144
|
|
|
$this->comment(' - Schema: ' . $harvestConfig['schema']); |
145
|
|
|
$this->comment(' - Set: ' . $harvestConfig['set']); |
146
|
|
|
|
147
|
|
|
foreach (['from', 'until', 'resume', 'daily'] as $key) { |
148
|
|
|
if (!is_null($this->option($key))) { |
149
|
|
|
$this->comment(sprintf(' - %s: %s', ucfirst($key), $this->option($key))); |
150
|
|
|
} |
151
|
|
|
} |
152
|
|
|
} |
153
|
|
|
|
154
|
|
|
// For timing |
155
|
|
|
$this->startTime = $this->batchTime = microtime(true) - 1; |
156
|
|
|
|
157
|
|
|
\Event::listen('Colligator\Events\OaiPmhHarvestStatus', function ($event) { |
158
|
|
|
$this->status($event->harvested, $event->position); |
159
|
|
|
}); |
160
|
|
|
|
161
|
|
|
\Event::listen('Colligator\Events\OaiPmhHarvestComplete', function ($event) { |
162
|
|
|
$this->info(sprintf('[%s] Harvest complete, got %d records in %d seconds', |
163
|
|
|
strftime('%Y-%m-%d %H:%M:%S'), |
164
|
|
|
$event->count, |
165
|
|
|
microtime(true) - $this->startTime |
166
|
|
|
)); |
167
|
|
|
}); |
168
|
|
|
|
169
|
|
|
\Event::listen('Colligator\Events\JobError', function ($event) { |
170
|
|
|
\Log::error('[OaiPmhHarvest] ' . $event->msg); |
171
|
|
|
$this->error($event->msg); |
172
|
|
|
}); |
173
|
|
|
|
174
|
|
|
$from = $this->option('from'); |
175
|
|
|
$until = $this->option('until'); |
176
|
|
|
if ($this->option('daily')) { |
177
|
|
|
$from = Carbon::now()->subDay()->toDateString(); |
178
|
|
|
$until = $from; |
179
|
|
|
} |
180
|
|
|
|
181
|
|
|
$this->dispatch( |
182
|
|
|
new OaiPmhHarvestJob( |
183
|
|
|
$harvestName, |
184
|
|
|
$harvestConfig, |
185
|
|
|
$from, |
186
|
|
|
$until, |
187
|
|
|
$this->option('resume'), |
188
|
|
|
$this->option('from-dump') |
189
|
|
|
) |
190
|
|
|
); |
191
|
|
|
} |
192
|
|
|
|
193
|
|
|
/** |
194
|
|
|
* Output a status message. |
195
|
|
|
* |
196
|
|
|
* @param int $fetched |
197
|
|
|
* @param int $current |
198
|
|
|
*/ |
199
|
|
|
public function status($fetched, $current) |
200
|
|
|
{ |
201
|
|
|
$totalTime = microtime(true) - $this->startTime; |
202
|
|
|
$batchTime = microtime(true) - $this->batchTime; |
203
|
|
|
$mem = round(memory_get_usage() / 1024 / 102.4) / 10; |
204
|
|
|
|
205
|
|
|
$currentSpeed = ($fetched - $this->batchPos) / $batchTime; |
206
|
|
|
$avgSpeed = $fetched / $totalTime; |
207
|
|
|
|
208
|
|
|
$this->batchTime = microtime(true); |
209
|
|
|
$this->batchPos = $fetched; |
210
|
|
|
|
211
|
|
|
$this->comment(sprintf( |
212
|
|
|
'[%s] %d records - Recs/sec: %.1f (current), %.1f (avg) - Mem: %.1f MB.', |
213
|
|
|
strftime('%Y-%m-%d %H:%M:%S'), |
214
|
|
|
$current, |
215
|
|
|
$currentSpeed, |
216
|
|
|
$avgSpeed, |
217
|
|
|
$mem |
218
|
|
|
)); |
219
|
|
|
} |
220
|
|
|
} |
221
|
|
|
|