Completed
Push — master ( 27d687...150b55 )
by Dan Michael O.
12:08
created

OaiPmhHarvest::handle()   A

Complexity

Conditions 4
Paths 2

Size

Total Lines 18
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
c 0
b 0
f 0
dl 0
loc 18
rs 9.2
cc 4
eloc 9
nc 2
nop 0
1
<?php
2
3
namespace Colligator\Jobs;
4
5
use Colligator\Collection;
6
use Illuminate\Foundation\Bus\DispatchesJobs;
7
use Log;
8
use Phpoaipmh\Client;
9
use Phpoaipmh\Endpoint;
10
use Scriptotek\OaiPmh\ListRecordsResponse;
11
use Storage;
12
13
class OaiPmhHarvest extends Job
14
{
15
    use DispatchesJobs;
16
17
    public $name;
18
    public $url;
19
    public $schema;
20
    public $set;
21
    public $start;
22
    public $until;
23
    public $resume;
24
    public $maxRetries;
25
    public $sleepTimeOnError;
26
27
    /**
28
     * Start time for the full harvest.
29
     *
30
     * @var float
31
     */
32
    protected $startTime;
33
34
    /**
35
     * Start time for the current batch.
36
     *
37
     * @var float
38
     */
39
    protected $batchTime;
40
41
    /**
42
     * Harvest position.
43
     *
44
     * @var int
45
     */
46
    protected $batchPos = 0;
47
48
    /**
49
     * @var Collection
50
     */
51
    public $collection;
52
53
    /**
54
     * Number of records retrieved between each emitted OaiPmhHarvestStatus event.
55
     * A too small number will cause CPU overhead.
56
     *
57
     * @var int
58
     */
59
    protected $statusUpdateEvery = 50;
60
61
    /**
62
     * Create a new job instance.
63
     *
64
     * @param string $name     Harvest name from config
65
     * @param array  $config   Harvest config array (url, set, schema)
66
     * @param string $start    Start date (optional)
67
     * @param string $until    End date (optional)
68
     * @param string $resume   Resumption token for continuing an aborted harvest (optional)
69
     */
70
    public function __construct($name, $config, $start = null, $until = null, $resume = null)
71
    {
72
        $this->name = $name;
73
        $this->url = $config['url'];
74
        $this->schema = $config['schema'];
75
        $this->set = $config['set'];
76
        $this->start = $start;
77
        $this->until = $until;
78
        $this->resume = $resume;
79
        $this->maxRetries = array_get($config, 'max-retries', 1000);
80
        $this->sleepTimeOnError = array_get($config, 'sleep-time-on-error', 60);
81
    }
82
83
    public function fromNetwork()
84
    {
85
        $client = new Client($this->url);
86
        $endpoint = new Endpoint($client);
87
88
        $recordsHarvested = 0;
89
90
        // Loop over all records using an iterator that pulls in more data when
91
        // the buffer is exhausted.
92
        foreach ($endpoint->listRecords($this->schema, $this->start, $this->until, $this->set, $this->resume) as $record) {
0 ignored issues
show
Bug introduced by
It seems like $this->start can also be of type string; however, Phpoaipmh\Endpoint::listRecords() does only seem to accept object<DateTime>|null, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
Bug introduced by
It seems like $this->until can also be of type string; however, Phpoaipmh\Endpoint::listRecords() does only seem to accept object<DateTime>|null, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
93
            ++$recordsHarvested;
94
95
            $this->dispatch(new ImportRecord($this->collection, $record->asXML()));
96
97
            if ($recordsHarvested % $this->statusUpdateEvery == 0) {
98
                $this->status($recordsHarvested, $recordsHarvested);
99
            }
100
        }
101
102
        return $recordsHarvested;
103
    }
104
105
    /**
106
     * Execute the job.
107
     */
108
    public function handle()
109
    {
110
        Log::info('[OaiPmhHarvest] Starting job. Requesting records from ' . ($this->start ?: '(no limit)') . ' until ' . ($this->until ?: '(no limit)') . '.');
111
112
        // For timing
113
        $this->startTime = $this->batchTime = microtime(true) - 1;
114
115
        $this->collection = Collection::where('name', '=', $this->name)->first();
116
        if (is_null($this->collection)) {
117
            $this->error("Collection '$this->name' not found in DB");
118
119
            return;
120
        }
121
122
        $recordsHarvested = $this->fromNetwork();
123
124
        Log::info('[OaiPmhHarvest] Harvest complete, got ' . $recordsHarvested . ' records.');
125
    }
126
127
    /**
128
     * Output a status message.
129
     *
130
     * @param int $fetched
131
     * @param int $current
132
     */
133
    public function status($fetched, $current)
134
    {
135
        $totalTime = microtime(true) - $this->startTime;
136
        $batchTime = microtime(true) - $this->batchTime;
137
        $mem = round(memory_get_usage() / 1024 / 102.4) / 10;
138
139
        $currentSpeed = ($fetched - $this->batchPos) / $batchTime;
140
        $avgSpeed = $fetched / $totalTime;
141
142
        $this->batchTime = microtime(true);
143
        $this->batchPos = $fetched;
144
145
        Log::debug(sprintf(
146
            '[OaiPmhHarvest] Got %d records so far - Recs/sec: %.1f (current), %.1f (avg) - Mem: %.1f MB.',
147
            $current,
148
            $currentSpeed,
149
            $avgSpeed,
150
            $mem
151
        ));
152
    }
153
}
154