| Conditions | 9 |
| Paths | 19 |
| Total Lines | 57 |
| Lines | 0 |
| Ratio | 0 % |
| Changes | 0 | ||
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
| 1 | <?php |
||
| 106 | public function getContent($file) |
||
| 107 | { |
||
| 108 | if (!$file || (is_string($file) && !file_exists($file))) { |
||
| 109 | // no file |
||
| 110 | return ''; |
||
| 111 | } |
||
| 112 | |||
| 113 | $fileName = $file instanceof File ? $file->getFilename() : basename($file); |
||
| 114 | $client = $this->getHttpClient(); |
||
| 115 | |||
| 116 | // Get and validate base URL |
||
| 117 | $baseUrl = $this->config()->get('base_url'); |
||
| 118 | if (!$this->config()->get('base_url')) { |
||
| 119 | throw new InvalidArgumentException('SolrCellTextExtractor.base_url not specified'); |
||
| 120 | } |
||
| 121 | |||
| 122 | try { |
||
| 123 | $path = $this->getPathFromFile($file); |
||
| 124 | $request = $client |
||
| 125 | ->post($baseUrl) |
||
| 126 | ->addPostFields(['extractOnly' => 'true', 'extractFormat' => 'text']) |
||
| 127 | ->addPostFiles(['myfile' => $path]); |
||
| 128 | $response = $request->send(); |
||
| 129 | } catch (InvalidArgumentException $e) { |
||
| 130 | $msg = sprintf( |
||
| 131 | 'Error extracting text from "%s" (message: %s)', |
||
| 132 | $fileName, |
||
| 133 | $e->getMessage() |
||
| 134 | ); |
||
| 135 | Injector::inst()->get(LoggerInterface::class)->notice($msg); |
||
| 136 | |||
| 137 | return null; |
||
| 138 | } catch (Exception $e) { |
||
| 139 | // Catch other errors that Tika can throw vai Guzzle but are not caught and break Solr search |
||
| 140 | // query in some cases. |
||
| 141 | $msg = sprintf( |
||
| 142 | 'Tika server error attempting to extract from "%s" (message: %s)', |
||
| 143 | $path, |
||
| 144 | $e->getMessage() |
||
| 145 | ); |
||
| 146 | |||
| 147 | Injector::inst()->get(LoggerInterface::class)->notice($msg); |
||
| 148 | |||
| 149 | return null; |
||
| 150 | } |
||
| 151 | |||
| 152 | // Just initialise it, it doesn't take much. |
||
| 153 | $matches = []; |
||
| 154 | |||
| 155 | // Use preg match to avoid SimpleXML running out of memory on large text nodes |
||
| 156 | preg_match( |
||
| 157 | sprintf('/\<str name\="%s"\>(.*?)\<\/str\>/s', preg_quote($fileName)), |
||
| 158 | (string)$response->getBody(), |
||
| 159 | $matches |
||
| 160 | ); |
||
| 161 | |||
| 162 | return $matches ? $matches[1] : null; |
||
| 163 | } |
||
| 165 |