|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
namespace Colligator\Console\Commands; |
|
4
|
|
|
|
|
5
|
|
|
use Carbon\Carbon; |
|
6
|
|
|
use Colligator\Jobs\OaiPmhHarvest as OaiPmhHarvestJob; |
|
7
|
|
|
use Illuminate\Console\Command; |
|
8
|
|
|
use Illuminate\Foundation\Bus\DispatchesJobs; |
|
9
|
|
|
|
|
10
|
|
|
class OaiPmhHarvest extends Command |
|
11
|
|
|
{ |
|
12
|
|
|
use DispatchesJobs; |
|
13
|
|
|
|
|
14
|
|
|
/** |
|
15
|
|
|
* Start time for the full harvest. |
|
16
|
|
|
* |
|
17
|
|
|
* @var float |
|
18
|
|
|
*/ |
|
19
|
|
|
protected $startTime; |
|
20
|
|
|
|
|
21
|
|
|
/** |
|
22
|
|
|
* Start time for the current batch. |
|
23
|
|
|
* |
|
24
|
|
|
* @var float |
|
25
|
|
|
*/ |
|
26
|
|
|
protected $batchTime; |
|
27
|
|
|
|
|
28
|
|
|
/** |
|
29
|
|
|
* Harvest position. |
|
30
|
|
|
* |
|
31
|
|
|
* @var int |
|
32
|
|
|
*/ |
|
33
|
|
|
protected $batchPos = 0; |
|
34
|
|
|
|
|
35
|
|
|
/** |
|
36
|
|
|
* The name and signature of the console command. |
|
37
|
|
|
* |
|
38
|
|
|
* @var string |
|
39
|
|
|
*/ |
|
40
|
|
|
protected $signature = 'colligator:harvest-oaipmh |
|
41
|
|
|
{name? : Name of the harvest config as defined in configs/oaipmh.php} |
|
42
|
|
|
{--from= : Start date on ISO format YYYY-MM-DD} |
|
43
|
|
|
{--until= : End date on ISO format YYYY-MM-DD} |
|
44
|
|
|
{--resume= : Resumption token} |
|
45
|
|
|
{--from-dump : Just re-index from dump} |
|
46
|
|
|
{--daily : Harvest records modified yesterday. Cannot be combined with --from / --until}'; |
|
47
|
|
|
|
|
48
|
|
|
/** |
|
49
|
|
|
* The console command description. |
|
50
|
|
|
* |
|
51
|
|
|
* @var string |
|
52
|
|
|
*/ |
|
53
|
|
|
protected $description = 'Harvest records from OAI-PMH service and store as XML files.'; |
|
54
|
|
|
|
|
55
|
|
|
/** |
|
56
|
|
|
* Create a new command instance. |
|
57
|
|
|
*/ |
|
58
|
|
|
public function __construct() |
|
59
|
|
|
{ |
|
60
|
|
|
parent::__construct(); |
|
61
|
|
|
} |
|
62
|
|
|
|
|
63
|
|
|
/** |
|
64
|
|
|
* Output a list of the configurations. |
|
65
|
|
|
*/ |
|
66
|
|
|
public function listConfigurations() |
|
67
|
|
|
{ |
|
68
|
|
|
$this->comment(''); |
|
69
|
|
|
$this->comment('Available configurations:'); |
|
70
|
|
|
$config = \Config::get('oaipmh.harvests', null); |
|
71
|
|
|
foreach (array_keys($config) as $key) { |
|
72
|
|
|
$this->comment(' - ' . $key); |
|
73
|
|
|
} |
|
74
|
|
|
} |
|
75
|
|
|
|
|
76
|
|
|
public function validate() |
|
77
|
|
|
{ |
|
78
|
|
|
if (empty($this->argument('name'))) { |
|
79
|
|
|
$this->listConfigurations(); |
|
80
|
|
|
|
|
81
|
|
|
return false; |
|
82
|
|
|
} |
|
83
|
|
|
$harvestConfig = \Config::get('oaipmh.harvests.' . $this->argument('name'), null); |
|
84
|
|
|
if (is_null($harvestConfig)) { |
|
85
|
|
|
$this->error('Unknown configuration specified.'); |
|
86
|
|
|
$this->listConfigurations(); |
|
87
|
|
|
|
|
88
|
|
|
return false; |
|
89
|
|
|
} |
|
90
|
|
View Code Duplication |
if ($this->option('daily')) { |
|
91
|
|
|
if ($this->option('from') || $this->option('until')) { |
|
92
|
|
|
$this->error('--daily cannot be combined with --from / --until.'); |
|
93
|
|
|
|
|
94
|
|
|
return false; |
|
95
|
|
|
} |
|
96
|
|
|
} |
|
97
|
|
|
if ($this->option('from-dump')) { |
|
98
|
|
View Code Duplication |
if ($this->option('from') || $this->option('until') || $this->option('resume') || $this->option('daily')) { |
|
99
|
|
|
$this->error('--from-dump cannot be combined with other options.'); |
|
100
|
|
|
|
|
101
|
|
|
return false; |
|
102
|
|
|
} |
|
103
|
|
|
} |
|
104
|
|
View Code Duplication |
if ($this->option('from')) { |
|
105
|
|
|
if (!preg_match('/[0-9]{4}-[0-9]{2}-[0-9]{2}/', $this->option('from'))) { |
|
106
|
|
|
$this->error('--from must be on ISO-format YYYY-MM-DD.'); |
|
107
|
|
|
|
|
108
|
|
|
return false; |
|
109
|
|
|
} |
|
110
|
|
|
} |
|
111
|
|
View Code Duplication |
if ($this->option('until')) { |
|
112
|
|
|
if (!preg_match('/[0-9]{4}-[0-9]{2}-[0-9]{2}/', $this->option('until'))) { |
|
113
|
|
|
$this->error('--until must be on ISO-format YYYY-MM-DD.'); |
|
114
|
|
|
|
|
115
|
|
|
return false; |
|
116
|
|
|
} |
|
117
|
|
|
} |
|
118
|
|
|
|
|
119
|
|
|
return true; |
|
120
|
|
|
} |
|
121
|
|
|
|
|
122
|
|
|
/** |
|
123
|
|
|
* Execute the console command. |
|
124
|
|
|
*/ |
|
125
|
|
|
public function handle() |
|
126
|
|
|
{ |
|
127
|
|
|
if (!$this->validate()) { |
|
128
|
|
|
return; |
|
129
|
|
|
} |
|
130
|
|
|
|
|
131
|
|
|
$harvestName = $this->argument('name'); |
|
132
|
|
|
$harvestConfig = \Config::get('oaipmh.harvests.' . $harvestName, null); |
|
133
|
|
|
|
|
134
|
|
|
$this->comment(''); |
|
135
|
|
|
$this->info(sprintf('[%s] Starting harvest "%s"', |
|
136
|
|
|
strftime('%Y-%m-%d %H:%M:%S'), |
|
137
|
|
|
$harvestName |
|
138
|
|
|
)); |
|
139
|
|
|
|
|
140
|
|
|
if ($this->option('from-dump')) { |
|
141
|
|
|
$this->comment(' - From local dump'); |
|
142
|
|
|
} else { |
|
143
|
|
|
$this->comment(' - Repo: ' . $harvestConfig['url']); |
|
144
|
|
|
$this->comment(' - Schema: ' . $harvestConfig['schema']); |
|
145
|
|
|
$this->comment(' - Set: ' . $harvestConfig['set']); |
|
146
|
|
|
|
|
147
|
|
|
foreach (['from', 'until', 'resume', 'daily'] as $key) { |
|
148
|
|
|
if (!is_null($this->option($key))) { |
|
149
|
|
|
$this->comment(sprintf(' - %s: %s', ucfirst($key), $this->option($key))); |
|
150
|
|
|
} |
|
151
|
|
|
} |
|
152
|
|
|
} |
|
153
|
|
|
|
|
154
|
|
|
// For timing |
|
155
|
|
|
$this->startTime = $this->batchTime = microtime(true) - 1; |
|
156
|
|
|
|
|
157
|
|
|
\Event::listen('Colligator\Events\OaiPmhHarvestStatus', function ($event) { |
|
158
|
|
|
$this->status($event->harvested, $event->position); |
|
159
|
|
|
}); |
|
160
|
|
|
|
|
161
|
|
|
\Event::listen('Colligator\Events\OaiPmhHarvestComplete', function ($event) { |
|
162
|
|
|
$this->info(sprintf('[%s] Harvest complete, got %d records in %d seconds', |
|
163
|
|
|
strftime('%Y-%m-%d %H:%M:%S'), |
|
164
|
|
|
$event->count, |
|
165
|
|
|
microtime(true) - $this->startTime |
|
166
|
|
|
)); |
|
167
|
|
|
}); |
|
168
|
|
|
|
|
169
|
|
|
\Event::listen('Colligator\Events\JobError', function ($event) { |
|
170
|
|
|
\Log::error('[OaiPmhHarvest] ' . $event->msg); |
|
171
|
|
|
$this->error($event->msg); |
|
172
|
|
|
}); |
|
173
|
|
|
|
|
174
|
|
|
$from = $this->option('from'); |
|
175
|
|
|
$until = $this->option('until'); |
|
176
|
|
|
if ($this->option('daily')) { |
|
177
|
|
|
$from = Carbon::now()->subDay()->toDateString(); |
|
178
|
|
|
$until = $from; |
|
179
|
|
|
} |
|
180
|
|
|
|
|
181
|
|
|
$this->dispatch( |
|
182
|
|
|
new OaiPmhHarvestJob( |
|
183
|
|
|
$harvestName, |
|
184
|
|
|
$harvestConfig, |
|
185
|
|
|
$from, |
|
186
|
|
|
$until, |
|
187
|
|
|
$this->option('resume'), |
|
188
|
|
|
$this->option('from-dump') |
|
189
|
|
|
) |
|
190
|
|
|
); |
|
191
|
|
|
} |
|
192
|
|
|
|
|
193
|
|
|
/** |
|
194
|
|
|
* Output a status message. |
|
195
|
|
|
* |
|
196
|
|
|
* @param int $fetched |
|
197
|
|
|
* @param int $current |
|
198
|
|
|
*/ |
|
199
|
|
|
public function status($fetched, $current) |
|
200
|
|
|
{ |
|
201
|
|
|
$totalTime = microtime(true) - $this->startTime; |
|
202
|
|
|
$batchTime = microtime(true) - $this->batchTime; |
|
203
|
|
|
$mem = round(memory_get_usage() / 1024 / 102.4) / 10; |
|
204
|
|
|
|
|
205
|
|
|
$currentSpeed = ($fetched - $this->batchPos) / $batchTime; |
|
206
|
|
|
$avgSpeed = $fetched / $totalTime; |
|
207
|
|
|
|
|
208
|
|
|
$this->batchTime = microtime(true); |
|
209
|
|
|
$this->batchPos = $fetched; |
|
210
|
|
|
|
|
211
|
|
|
$this->comment(sprintf( |
|
212
|
|
|
'[%s] %d records - Recs/sec: %.1f (current), %.1f (avg) - Mem: %.1f MB.', |
|
213
|
|
|
strftime('%Y-%m-%d %H:%M:%S'), |
|
214
|
|
|
$current, |
|
215
|
|
|
$currentSpeed, |
|
216
|
|
|
$avgSpeed, |
|
217
|
|
|
$mem |
|
218
|
|
|
)); |
|
219
|
|
|
} |
|
220
|
|
|
} |
|
221
|
|
|
|