1 | <?php |
||
38 | class ProcessService |
||
39 | { |
||
40 | /** |
||
41 | * @var $timeToLive integer |
||
42 | */ |
||
43 | private $timeToLive; |
||
44 | |||
45 | /** |
||
46 | * @var integer |
||
47 | */ |
||
48 | private $countInARun; |
||
49 | |||
50 | /** |
||
51 | * @var integer |
||
52 | */ |
||
53 | private $processLimit; |
||
54 | |||
55 | /** |
||
56 | * @var CrawlerController |
||
57 | */ |
||
58 | private $crawlerController; |
||
59 | |||
60 | /** |
||
61 | * @var \AOE\Crawler\Domain\Repository\QueueRepository |
||
62 | */ |
||
63 | private $queueRepository; |
||
64 | |||
65 | /** |
||
66 | * @var \AOE\Crawler\Domain\Repository\ProcessRepository |
||
67 | */ |
||
68 | private $processRepository; |
||
69 | |||
70 | /** |
||
71 | * @var $verbose boolean |
||
72 | */ |
||
73 | private $verbose; |
||
74 | |||
75 | /** |
||
76 | * the constructor |
||
77 | */ |
||
78 | public function __construct() |
||
88 | |||
89 | /** |
||
90 | * starts multiple processes |
||
91 | * |
||
92 | * @param integer $timeout |
||
93 | * |
||
94 | * @throws \RuntimeException |
||
95 | */ |
||
96 | 1 | public function multiProcess($timeout) |
|
97 | { |
||
98 | 1 | if ($this->processLimit <= 1) { |
|
99 | 1 | throw new \RuntimeException('To run crawler in multi process mode you have to configure the processLimit > 1.' . PHP_EOL); |
|
100 | } |
||
101 | |||
102 | $pendingItemsStart = $this->queueRepository->countAllPendingItems(); |
||
103 | $itemReportLimit = 20; |
||
104 | $reportItemCount = $pendingItemsStart - $itemReportLimit; |
||
105 | if ($this->verbose) { |
||
106 | $this->reportItemStatus(); |
||
107 | } |
||
108 | $this->startRequiredProcesses(); |
||
109 | $nextTimeOut = time() + $this->timeToLive; |
||
110 | $currentPendingItems = ''; |
||
111 | for ($i = 0; $i < $timeout; $i++) { |
||
112 | $currentPendingItems = $this->queueRepository->countAllPendingItems(); |
||
113 | if ($this->startRequiredProcesses()) { |
||
114 | $nextTimeOut = time() + $this->timeToLive; |
||
115 | } |
||
116 | if ($currentPendingItems == 0) { |
||
117 | if ($this->verbose) { |
||
118 | echo 'Finished...' . chr(10); |
||
119 | } |
||
120 | break; |
||
121 | } |
||
122 | if ($currentPendingItems < $reportItemCount) { |
||
123 | if ($this->verbose) { |
||
124 | $this->reportItemStatus(); |
||
125 | } |
||
126 | $reportItemCount = $currentPendingItems - $itemReportLimit; |
||
127 | } |
||
128 | sleep(1); |
||
129 | if ($nextTimeOut < time()) { |
||
130 | $timedOutProcesses = $this->processRepository->findAll('', 'DESC', null, 0, 'ttl >' . $nextTimeOut); |
||
131 | $nextTimeOut = time() + $this->timeToLive; |
||
132 | if ($this->verbose) { |
||
133 | echo 'Cleanup' . implode(',', $timedOutProcesses->getProcessIds()) . chr(10); |
||
134 | } |
||
135 | $this->crawlerController->CLI_releaseProcesses($timedOutProcesses->getProcessIds(), true); |
||
136 | } |
||
137 | } |
||
138 | if ($currentPendingItems > 0 && $this->verbose) { |
||
139 | echo 'Stop with timeout' . chr(10); |
||
140 | } |
||
141 | } |
||
142 | |||
143 | /** |
||
144 | * Reports curent Status of queue |
||
145 | */ |
||
146 | protected function reportItemStatus() |
||
150 | |||
151 | /** |
||
152 | * according to the given count of pending items and the countInARun Setting this method |
||
153 | * starts more crawling processes |
||
154 | * |
||
155 | * @throws \Exception |
||
156 | * |
||
157 | * @return boolean if processes are started |
||
158 | */ |
||
159 | private function startRequiredProcesses() |
||
186 | |||
187 | /** |
||
188 | * starts new process |
||
189 | * @throws \Exception if no crawler process was started |
||
190 | */ |
||
191 | public function startProcess() |
||
216 | |||
217 | /** |
||
218 | * Returns the path to start the crawler from the command line |
||
219 | * |
||
220 | * @return string |
||
221 | */ |
||
222 | public function getCrawlerCliPath() |
||
236 | } |
||
237 |