Conditions | 10 |
Paths | 27 |
Total Lines | 56 |
Code Lines | 35 |
Lines | 0 |
Ratio | 0 % |
Changes | 2 | ||
Bugs | 0 | Features | 0 |
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
1 | <?php |
||
107 | public function getContent($file) |
||
108 | { |
||
109 | if (!$file || (is_string($file) && !file_exists($file))) { |
||
110 | // no file |
||
111 | return ''; |
||
112 | } |
||
113 | |||
114 | $fileName = $file instanceof File ? $file->getFilename() : basename($file); |
||
115 | $client = $this->getHttpClient(); |
||
116 | |||
117 | // Get and validate base URL |
||
118 | $baseUrl = $this->config()->get('base_url'); |
||
119 | if (!$this->config()->get('base_url')) { |
||
120 | throw new InvalidArgumentException('SolrCellTextExtractor.base_url not specified'); |
||
121 | } |
||
122 | |||
123 | try { |
||
124 | $stream = $file instanceof File ? $file->getStream() : fopen($file, 'r'); |
||
125 | /** @var Response $response */ |
||
126 | $response = $client |
||
127 | ->post($baseUrl, [ |
||
128 | 'multipart' => [ |
||
129 | ['name' => 'extractOnly', 'contents' => 'true'], |
||
130 | ['name' => 'extractFormat', 'contents' => 'text'], |
||
131 | ['name' => 'myfile', 'contents' => $stream], |
||
132 | ] |
||
133 | ]); |
||
134 | } catch (InvalidArgumentException $e) { |
||
135 | $msg = sprintf( |
||
136 | 'Error extracting text from "%s" (message: %s)', |
||
137 | $fileName, |
||
138 | $e->getMessage() |
||
139 | ); |
||
140 | Injector::inst()->get(LoggerInterface::class)->notice($msg); |
||
141 | return null; |
||
142 | } catch (Exception $e) { |
||
143 | // Catch other errors that Tika can throw via Guzzle but are not caught and break Solr search |
||
144 | // query in some cases. |
||
145 | $msg = sprintf( |
||
146 | 'Tika server error attempting to extract from "%s" (message: %s)', |
||
147 | $fileName, |
||
148 | $e->getMessage() |
||
149 | ); |
||
150 | Injector::inst()->get(LoggerInterface::class)->notice($msg); |
||
151 | return null; |
||
152 | } |
||
153 | |||
154 | $matches = []; |
||
155 | // Use preg match to avoid SimpleXML running out of memory on large text nodes |
||
156 | preg_match( |
||
157 | sprintf('/\<str name\="%s"\>(.*?)\<\/str\>/s', preg_quote($fileName)), |
||
158 | (string)$response->getBody(), |
||
159 | $matches |
||
160 | ); |
||
161 | |||
162 | return $matches ? $matches[1] : null; |
||
163 | } |
||
165 |