| Conditions | 10 |
| Paths | 27 |
| Total Lines | 56 |
| Code Lines | 35 |
| Lines | 0 |
| Ratio | 0 % |
| Changes | 2 | ||
| Bugs | 0 | Features | 0 |
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
| 1 | <?php |
||
| 107 | public function getContent($file) |
||
| 108 | { |
||
| 109 | if (!$file || (is_string($file) && !file_exists($file))) { |
||
| 110 | // no file |
||
| 111 | return ''; |
||
| 112 | } |
||
| 113 | |||
| 114 | $fileName = $file instanceof File ? $file->getFilename() : basename($file); |
||
| 115 | $client = $this->getHttpClient(); |
||
| 116 | |||
| 117 | // Get and validate base URL |
||
| 118 | $baseUrl = $this->config()->get('base_url'); |
||
| 119 | if (!$this->config()->get('base_url')) { |
||
| 120 | throw new InvalidArgumentException('SolrCellTextExtractor.base_url not specified'); |
||
| 121 | } |
||
| 122 | |||
| 123 | try { |
||
| 124 | $stream = $file instanceof File ? $file->getStream() : fopen($file, 'r'); |
||
| 125 | /** @var Response $response */ |
||
| 126 | $response = $client |
||
| 127 | ->post($baseUrl, [ |
||
| 128 | 'multipart' => [ |
||
| 129 | ['name' => 'extractOnly', 'contents' => 'true'], |
||
| 130 | ['name' => 'extractFormat', 'contents' => 'text'], |
||
| 131 | ['name' => 'myfile', 'contents' => $stream], |
||
| 132 | ] |
||
| 133 | ]); |
||
| 134 | } catch (InvalidArgumentException $e) { |
||
| 135 | $msg = sprintf( |
||
| 136 | 'Error extracting text from "%s" (message: %s)', |
||
| 137 | $fileName, |
||
| 138 | $e->getMessage() |
||
| 139 | ); |
||
| 140 | Injector::inst()->get(LoggerInterface::class)->notice($msg); |
||
| 141 | return null; |
||
| 142 | } catch (Exception $e) { |
||
| 143 | // Catch other errors that Tika can throw via Guzzle but are not caught and break Solr search |
||
| 144 | // query in some cases. |
||
| 145 | $msg = sprintf( |
||
| 146 | 'Tika server error attempting to extract from "%s" (message: %s)', |
||
| 147 | $fileName, |
||
| 148 | $e->getMessage() |
||
| 149 | ); |
||
| 150 | Injector::inst()->get(LoggerInterface::class)->notice($msg); |
||
| 151 | return null; |
||
| 152 | } |
||
| 153 | |||
| 154 | $matches = []; |
||
| 155 | // Use preg match to avoid SimpleXML running out of memory on large text nodes |
||
| 156 | preg_match( |
||
| 157 | sprintf('/\<str name\="%s"\>(.*?)\<\/str\>/s', preg_quote($fileName)), |
||
| 158 | (string)$response->getBody(), |
||
| 159 | $matches |
||
| 160 | ); |
||
| 161 | |||
| 162 | return $matches ? $matches[1] : null; |
||
| 163 | } |
||
| 165 |