Complex classes like Zend_Search_Lucene often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Zend_Search_Lucene, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
95 | class Zend_Search_Lucene implements Zend_Search_Lucene_Interface |
||
96 | { |
||
97 | /** |
||
98 | * Default field name for search |
||
99 | * |
||
100 | * Null means search through all fields |
||
101 | * |
||
102 | * @var string |
||
103 | */ |
||
104 | private static $_defaultSearchField = null; |
||
105 | |||
106 | /** |
||
107 | * Result set limit |
||
108 | * |
||
109 | * 0 means no limit |
||
110 | * |
||
111 | * @var integer |
||
112 | */ |
||
113 | private static $_resultSetLimit = 0; |
||
114 | |||
115 | /** |
||
116 | * Terms per query limit |
||
117 | * |
||
118 | * 0 means no limit |
||
119 | * |
||
120 | * @var integer |
||
121 | */ |
||
122 | private static $_termsPerQueryLimit = 1024; |
||
123 | |||
124 | /** |
||
125 | * File system adapter. |
||
126 | * |
||
127 | * @var Zend_Search_Lucene_Storage_Directory |
||
128 | */ |
||
129 | private $_directory = null; |
||
130 | |||
131 | /** |
||
132 | * File system adapter closing option |
||
133 | * |
||
134 | * @var boolean |
||
135 | */ |
||
136 | private $_closeDirOnExit = true; |
||
137 | |||
138 | /** |
||
139 | * Writer for this index, not instantiated unless required. |
||
140 | * |
||
141 | * @var Zend_Search_Lucene_Index_Writer |
||
142 | */ |
||
143 | private $_writer = null; |
||
144 | |||
145 | /** |
||
146 | * Array of Zend_Search_Lucene_Index_SegmentInfo objects for current version of index. |
||
147 | * |
||
148 | * @var array Zend_Search_Lucene_Index_SegmentInfo |
||
149 | */ |
||
150 | private $_segmentInfos = array(); |
||
151 | |||
152 | /** |
||
153 | * Number of documents in this index. |
||
154 | * |
||
155 | * @var integer |
||
156 | */ |
||
157 | private $_docCount = 0; |
||
158 | |||
159 | /** |
||
160 | * Flag for index changes |
||
161 | * |
||
162 | * @var boolean |
||
163 | */ |
||
164 | private $_hasChanges = false; |
||
165 | |||
166 | |||
167 | /** |
||
168 | * Signal, that index is already closed, changes are fixed and resources are cleaned up |
||
169 | * |
||
170 | * @var boolean |
||
171 | */ |
||
172 | private $_closed = false; |
||
173 | |||
174 | /** |
||
175 | * Number of references to the index object |
||
176 | * |
||
177 | * @var integer |
||
178 | */ |
||
179 | private $_refCount = 0; |
||
180 | |||
181 | /** |
||
182 | * Current segment generation |
||
183 | * |
||
184 | * @var integer |
||
185 | */ |
||
186 | private $_generation; |
||
187 | |||
188 | const FORMAT_PRE_2_1 = 0; |
||
189 | const FORMAT_2_1 = 1; |
||
190 | const FORMAT_2_3 = 2; |
||
191 | |||
192 | |||
193 | /** |
||
194 | * Index format version |
||
195 | * |
||
196 | * @var integer |
||
197 | */ |
||
198 | private $_formatVersion; |
||
199 | |||
200 | /** |
||
201 | * Create index |
||
202 | * |
||
203 | * @param mixed $directory |
||
204 | * @return Zend_Search_Lucene_Interface |
||
205 | */ |
||
206 | public static function create($directory) |
||
213 | |||
214 | /** |
||
215 | * Open index |
||
216 | * |
||
217 | * @param mixed $directory |
||
218 | * @return Zend_Search_Lucene_Interface |
||
219 | */ |
||
220 | public static function open($directory) |
||
227 | |||
228 | /** Generation retrieving counter */ |
||
229 | const GENERATION_RETRIEVE_COUNT = 10; |
||
230 | |||
231 | /** Pause between generation retrieving attempts in milliseconds */ |
||
232 | const GENERATION_RETRIEVE_PAUSE = 50; |
||
233 | |||
234 | /** |
||
235 | * Get current generation number |
||
236 | * |
||
237 | * Returns generation number |
||
238 | * 0 means pre-2.1 index format |
||
239 | * -1 means there are no segments files. |
||
240 | * |
||
241 | * @param Zend_Search_Lucene_Storage_Directory $directory |
||
242 | * @return integer |
||
243 | * @throws Zend_Search_Lucene_Exception |
||
244 | */ |
||
245 | public static function getActualGeneration(Zend_Search_Lucene_Storage_Directory $directory) |
||
303 | |||
304 | /** |
||
305 | * Get generation number associated with this index instance |
||
306 | * |
||
307 | * The same generation number in pair with document number or query string |
||
308 | * guarantees to give the same result while index retrieving. |
||
309 | * So it may be used for search result caching. |
||
310 | * |
||
311 | * @return integer |
||
312 | */ |
||
313 | public function getGeneration() |
||
317 | |||
318 | |||
319 | /** |
||
320 | * Get segments file name |
||
321 | * |
||
322 | * @param integer $generation |
||
323 | * @return string |
||
324 | */ |
||
325 | public static function getSegmentFileName($generation) |
||
333 | |||
334 | /** |
||
335 | * Get index format version |
||
336 | * |
||
337 | * @return integer |
||
338 | */ |
||
339 | public function getFormatVersion() |
||
343 | |||
344 | /** |
||
345 | * Set index format version. |
||
346 | * Index is converted to this format at the nearest upfdate time |
||
347 | * |
||
348 | * @param int $formatVersion |
||
349 | * @throws Zend_Search_Lucene_Exception |
||
350 | */ |
||
351 | public function setFormatVersion($formatVersion) |
||
362 | |||
363 | /** |
||
364 | * Read segments file for pre-2.1 Lucene index format |
||
365 | * |
||
366 | * @throws Zend_Search_Lucene_Exception |
||
367 | */ |
||
368 | private function _readPre21SegmentsFile() |
||
404 | |||
405 | /** |
||
406 | * Read segments file |
||
407 | * |
||
408 | * @throws Zend_Search_Lucene_Exception |
||
409 | */ |
||
410 | private function _readSegmentsFile() |
||
498 | |||
499 | /** |
||
500 | * Opens the index. |
||
501 | * |
||
502 | * IndexReader constructor needs Directory as a parameter. It should be |
||
503 | * a string with a path to the index folder or a Directory object. |
||
504 | * |
||
505 | * @param Zend_Search_Lucene_Storage_Directory_Filesystem|string $directory |
||
506 | * @throws Zend_Search_Lucene_Exception |
||
507 | */ |
||
508 | public function __construct($directory = null, $create = false) |
||
573 | |||
574 | /** |
||
575 | * Close current index and free resources |
||
576 | */ |
||
577 | private function _close() |
||
599 | |||
600 | /** |
||
601 | * Add reference to the index object |
||
602 | * |
||
603 | * @internal |
||
604 | */ |
||
605 | public function addReference() |
||
609 | |||
610 | /** |
||
611 | * Remove reference from the index object |
||
612 | * |
||
613 | * When reference count becomes zero, index is closed and resources are cleaned up |
||
614 | * |
||
615 | * @internal |
||
616 | */ |
||
617 | public function removeReference() |
||
625 | |||
626 | /** |
||
627 | * Object destructor |
||
628 | */ |
||
629 | public function __destruct() |
||
633 | |||
634 | /** |
||
635 | * Returns an instance of Zend_Search_Lucene_Index_Writer for the index |
||
636 | * |
||
637 | * @return Zend_Search_Lucene_Index_Writer |
||
638 | */ |
||
639 | private function _getIndexWriter() |
||
650 | |||
651 | |||
652 | /** |
||
653 | * Returns the Zend_Search_Lucene_Storage_Directory instance for this index. |
||
654 | * |
||
655 | * @return Zend_Search_Lucene_Storage_Directory |
||
656 | */ |
||
657 | public function getDirectory() |
||
661 | |||
662 | |||
663 | /** |
||
664 | * Returns the total number of documents in this index (including deleted documents). |
||
665 | * |
||
666 | * @return integer |
||
667 | */ |
||
668 | public function count() |
||
672 | |||
673 | /** |
||
674 | * Returns one greater than the largest possible document number. |
||
675 | * This may be used to, e.g., determine how big to allocate a structure which will have |
||
676 | * an element for every document number in an index. |
||
677 | * |
||
678 | * @return integer |
||
679 | */ |
||
680 | public function maxDoc() |
||
684 | |||
685 | /** |
||
686 | * Returns the total number of non-deleted documents in this index. |
||
687 | * |
||
688 | * @return integer |
||
689 | */ |
||
690 | public function numDocs() |
||
700 | |||
701 | /** |
||
702 | * Checks, that document is deleted |
||
703 | * |
||
704 | * @param integer $id |
||
705 | * @return boolean |
||
706 | * @throws Zend_Search_Lucene_Exception Exception is thrown if $id is out of the range |
||
707 | */ |
||
708 | public function isDeleted($id) |
||
728 | |||
729 | /** |
||
730 | * Set default search field. |
||
731 | * |
||
732 | * Null means, that search is performed through all fields by default |
||
733 | * |
||
734 | * Default value is null |
||
735 | * |
||
736 | * @param string $fieldName |
||
737 | */ |
||
738 | public static function setDefaultSearchField($fieldName) |
||
742 | |||
743 | /** |
||
744 | * Get default search field. |
||
745 | * |
||
746 | * Null means, that search is performed through all fields by default |
||
747 | * |
||
748 | * @return string |
||
749 | */ |
||
750 | public static function getDefaultSearchField() |
||
754 | |||
755 | /** |
||
756 | * Set result set limit. |
||
757 | * |
||
758 | * 0 (default) means no limit |
||
759 | * |
||
760 | * @param integer $limit |
||
761 | */ |
||
762 | public static function setResultSetLimit($limit) |
||
766 | |||
767 | /** |
||
768 | * Get result set limit. |
||
769 | * |
||
770 | * 0 means no limit |
||
771 | * |
||
772 | * @return integer |
||
773 | */ |
||
774 | public static function getResultSetLimit() |
||
778 | |||
779 | /** |
||
780 | * Set terms per query limit. |
||
781 | * |
||
782 | * 0 means no limit |
||
783 | * |
||
784 | * @param integer $limit |
||
785 | */ |
||
786 | public static function setTermsPerQueryLimit($limit) |
||
790 | |||
791 | /** |
||
792 | * Get result set limit. |
||
793 | * |
||
794 | * 0 (default) means no limit |
||
795 | * |
||
796 | * @return integer |
||
797 | */ |
||
798 | public static function getTermsPerQueryLimit() |
||
802 | |||
803 | /** |
||
804 | * Retrieve index maxBufferedDocs option |
||
805 | * |
||
806 | * maxBufferedDocs is a minimal number of documents required before |
||
807 | * the buffered in-memory documents are written into a new Segment |
||
808 | * |
||
809 | * Default value is 10 |
||
810 | * |
||
811 | * @return integer |
||
812 | */ |
||
813 | public function getMaxBufferedDocs() |
||
817 | |||
818 | /** |
||
819 | * Set index maxBufferedDocs option |
||
820 | * |
||
821 | * maxBufferedDocs is a minimal number of documents required before |
||
822 | * the buffered in-memory documents are written into a new Segment |
||
823 | * |
||
824 | * Default value is 10 |
||
825 | * |
||
826 | * @param integer $maxBufferedDocs |
||
827 | */ |
||
828 | public function setMaxBufferedDocs($maxBufferedDocs) |
||
832 | |||
833 | /** |
||
834 | * Retrieve index maxMergeDocs option |
||
835 | * |
||
836 | * maxMergeDocs is a largest number of documents ever merged by addDocument(). |
||
837 | * Small values (e.g., less than 10,000) are best for interactive indexing, |
||
838 | * as this limits the length of pauses while indexing to a few seconds. |
||
839 | * Larger values are best for batched indexing and speedier searches. |
||
840 | * |
||
841 | * Default value is PHP_INT_MAX |
||
842 | * |
||
843 | * @return integer |
||
844 | */ |
||
845 | public function getMaxMergeDocs() |
||
849 | |||
850 | /** |
||
851 | * Set index maxMergeDocs option |
||
852 | * |
||
853 | * maxMergeDocs is a largest number of documents ever merged by addDocument(). |
||
854 | * Small values (e.g., less than 10,000) are best for interactive indexing, |
||
855 | * as this limits the length of pauses while indexing to a few seconds. |
||
856 | * Larger values are best for batched indexing and speedier searches. |
||
857 | * |
||
858 | * Default value is PHP_INT_MAX |
||
859 | * |
||
860 | * @param integer $maxMergeDocs |
||
861 | */ |
||
862 | public function setMaxMergeDocs($maxMergeDocs) |
||
866 | |||
867 | /** |
||
868 | * Retrieve index mergeFactor option |
||
869 | * |
||
870 | * mergeFactor determines how often segment indices are merged by addDocument(). |
||
871 | * With smaller values, less RAM is used while indexing, |
||
872 | * and searches on unoptimized indices are faster, |
||
873 | * but indexing speed is slower. |
||
874 | * With larger values, more RAM is used during indexing, |
||
875 | * and while searches on unoptimized indices are slower, |
||
876 | * indexing is faster. |
||
877 | * Thus larger values (> 10) are best for batch index creation, |
||
878 | * and smaller values (< 10) for indices that are interactively maintained. |
||
879 | * |
||
880 | * Default value is 10 |
||
881 | * |
||
882 | * @return integer |
||
883 | */ |
||
884 | public function getMergeFactor() |
||
888 | |||
889 | /** |
||
890 | * Set index mergeFactor option |
||
891 | * |
||
892 | * mergeFactor determines how often segment indices are merged by addDocument(). |
||
893 | * With smaller values, less RAM is used while indexing, |
||
894 | * and searches on unoptimized indices are faster, |
||
895 | * but indexing speed is slower. |
||
896 | * With larger values, more RAM is used during indexing, |
||
897 | * and while searches on unoptimized indices are slower, |
||
898 | * indexing is faster. |
||
899 | * Thus larger values (> 10) are best for batch index creation, |
||
900 | * and smaller values (< 10) for indices that are interactively maintained. |
||
901 | * |
||
902 | * Default value is 10 |
||
903 | * |
||
904 | * @param integer $maxMergeDocs |
||
905 | */ |
||
906 | public function setMergeFactor($mergeFactor) |
||
910 | |||
911 | /** |
||
912 | * Performs a query against the index and returns an array |
||
913 | * of Zend_Search_Lucene_Search_QueryHit objects. |
||
914 | * Input is a string or Zend_Search_Lucene_Search_Query. |
||
915 | * |
||
916 | * @param Zend_Search_Lucene_Search_QueryParser|string $query |
||
917 | * @return array Zend_Search_Lucene_Search_QueryHit |
||
918 | * @throws Zend_Search_Lucene_Exception |
||
919 | */ |
||
920 | public function find($query) |
||
1075 | |||
1076 | |||
1077 | /** |
||
1078 | * Returns a list of all unique field names that exist in this index. |
||
1079 | * |
||
1080 | * @param boolean $indexed |
||
1081 | * @return array |
||
1082 | */ |
||
1083 | public function getFieldNames($indexed = false) |
||
1091 | |||
1092 | |||
1093 | /** |
||
1094 | * Returns a Zend_Search_Lucene_Document object for the document |
||
1095 | * number $id in this index. |
||
1096 | * |
||
1097 | * @param integer|Zend_Search_Lucene_Search_QueryHit $id |
||
1098 | * @return Zend_Search_Lucene_Document |
||
1099 | * @throws Zend_Search_Lucene_Exception Exception is thrown if $id is out of the range |
||
1100 | */ |
||
1101 | public function getDocument($id) |
||
1159 | |||
1160 | |||
1161 | /** |
||
1162 | * Returns true if index contain documents with specified term. |
||
1163 | * |
||
1164 | * Is used for query optimization. |
||
1165 | * |
||
1166 | * @param Zend_Search_Lucene_Index_Term $term |
||
1167 | * @return boolean |
||
1168 | */ |
||
1169 | public function hasTerm(Zend_Search_Lucene_Index_Term $term) |
||
1179 | |||
1180 | /** |
||
1181 | * Returns IDs of all documents containing term. |
||
1182 | * |
||
1183 | * @param Zend_Search_Lucene_Index_Term $term |
||
1184 | * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter |
||
1185 | * @return array |
||
1186 | */ |
||
1187 | public function termDocs(Zend_Search_Lucene_Index_Term $term, $docsFilter = null) |
||
1210 | |||
1211 | /** |
||
1212 | * Returns documents filter for all documents containing term. |
||
1213 | * |
||
1214 | * It performs the same operation as termDocs, but return result as |
||
1215 | * Zend_Search_Lucene_Index_DocsFilter object |
||
1216 | * |
||
1217 | * @param Zend_Search_Lucene_Index_Term $term |
||
1218 | * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter |
||
1219 | * @return Zend_Search_Lucene_Index_DocsFilter |
||
1220 | */ |
||
1221 | public function termDocsFilter(Zend_Search_Lucene_Index_Term $term, $docsFilter = null) |
||
1244 | |||
1245 | |||
1246 | /** |
||
1247 | * Returns an array of all term freqs. |
||
1248 | * Result array structure: array(docId => freq, ...) |
||
1249 | * |
||
1250 | * @param Zend_Search_Lucene_Index_Term $term |
||
1251 | * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter |
||
1252 | * @return integer |
||
1253 | */ |
||
1254 | public function termFreqs(Zend_Search_Lucene_Index_Term $term, $docsFilter = null) |
||
1266 | |||
1267 | /** |
||
1268 | * Returns an array of all term positions in the documents. |
||
1269 | * Result array structure: array(docId => array(pos1, pos2, ...), ...) |
||
1270 | * |
||
1271 | * @param Zend_Search_Lucene_Index_Term $term |
||
1272 | * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter |
||
1273 | * @return array |
||
1274 | */ |
||
1275 | public function termPositions(Zend_Search_Lucene_Index_Term $term, $docsFilter = null) |
||
1287 | |||
1288 | |||
1289 | /** |
||
1290 | * Returns the number of documents in this index containing the $term. |
||
1291 | * |
||
1292 | * @param Zend_Search_Lucene_Index_Term $term |
||
1293 | * @return integer |
||
1294 | */ |
||
1295 | public function docFreq(Zend_Search_Lucene_Index_Term $term) |
||
1307 | |||
1308 | |||
1309 | /** |
||
1310 | * Retrive similarity used by index reader |
||
1311 | * |
||
1312 | * @return Zend_Search_Lucene_Search_Similarity |
||
1313 | */ |
||
1314 | public function getSimilarity() |
||
1321 | |||
1322 | |||
1323 | /** |
||
1324 | * Returns a normalization factor for "field, document" pair. |
||
1325 | * |
||
1326 | * @param integer $id |
||
1327 | * @param string $fieldName |
||
1328 | * @return float |
||
1329 | */ |
||
1330 | public function norm($id, $fieldName) |
||
1351 | |||
1352 | /** |
||
1353 | * Returns true if any documents have been deleted from this index. |
||
1354 | * |
||
1355 | * @return boolean |
||
1356 | */ |
||
1357 | public function hasDeletions() |
||
1367 | |||
1368 | |||
1369 | /** |
||
1370 | * Deletes a document from the index. |
||
1371 | * $id is an internal document id |
||
1372 | * |
||
1373 | * @param integer|Zend_Search_Lucene_Search_QueryHit $id |
||
1374 | * @throws Zend_Search_Lucene_Exception |
||
1375 | */ |
||
1376 | public function delete($id) |
||
1400 | |||
1401 | |||
1402 | |||
1403 | /** |
||
1404 | * Adds a document to this index. |
||
1405 | * |
||
1406 | * @param Zend_Search_Lucene_Document $document |
||
1407 | */ |
||
1408 | public function addDocument(Zend_Search_Lucene_Document $document) |
||
1415 | |||
1416 | |||
1417 | /** |
||
1418 | * Update document counter |
||
1419 | */ |
||
1420 | private function _updateDocCount() |
||
1427 | |||
1428 | /** |
||
1429 | * Commit changes resulting from delete() or undeleteAll() operations. |
||
1430 | * |
||
1431 | * @todo undeleteAll processing. |
||
1432 | */ |
||
1433 | public function commit() |
||
1443 | |||
1444 | |||
1445 | /** |
||
1446 | * Optimize index. |
||
1447 | * |
||
1448 | * Merges all segments into one |
||
1449 | */ |
||
1450 | public function optimize() |
||
1460 | |||
1461 | |||
1462 | /** |
||
1463 | * Returns an array of all terms in this index. |
||
1464 | * |
||
1465 | * @return array |
||
1466 | */ |
||
1467 | public function terms() |
||
1501 | |||
1502 | |||
1503 | /** |
||
1504 | * Terms stream priority queue object |
||
1505 | * |
||
1506 | * @var Zend_Search_Lucene_TermStreamsPriorityQueue |
||
1507 | */ |
||
1508 | private $_termsStream = null; |
||
1509 | |||
1510 | /** |
||
1511 | * Reset terms stream. |
||
1512 | */ |
||
1513 | public function resetTermsStream() |
||
1524 | |||
1525 | /** |
||
1526 | * Skip terms stream up to the specified term preffix. |
||
1527 | * |
||
1528 | * Prefix contains fully specified field info and portion of searched term |
||
1529 | * |
||
1530 | * @param Zend_Search_Lucene_Index_Term $prefix |
||
1531 | */ |
||
1532 | public function skipTo(Zend_Search_Lucene_Index_Term $prefix) |
||
1536 | |||
1537 | /** |
||
1538 | * Scans terms dictionary and returns next term |
||
1539 | * |
||
1540 | * @return Zend_Search_Lucene_Index_Term|null |
||
1541 | */ |
||
1542 | public function nextTerm() |
||
1546 | |||
1547 | /** |
||
1548 | * Returns term in current position |
||
1549 | * |
||
1550 | * @return Zend_Search_Lucene_Index_Term|null |
||
1551 | */ |
||
1552 | public function currentTerm() |
||
1556 | |||
1557 | /** |
||
1558 | * Close terms stream |
||
1559 | * |
||
1560 | * Should be used for resources clean up if stream is not read up to the end |
||
1561 | */ |
||
1562 | public function closeTermsStream() |
||
1567 | |||
1568 | |||
1569 | /************************************************************************* |
||
1570 | @todo UNIMPLEMENTED |
||
1571 | *************************************************************************/ |
||
1572 | /** |
||
1573 | * Undeletes all documents currently marked as deleted in this index. |
||
1574 | * |
||
1575 | * @todo Implementation |
||
1576 | */ |
||
1577 | public function undeleteAll() |
||
1579 | } |
||
1580 |
This check looks for unreachable code. It uses sophisticated control flow analysis techniques to find statements which will never be executed.
Unreachable code is most often the result of
return
,die
orexit
statements that have been added for debug purposes.In the above example, the last
return false
will never be executed, because a return statement has already been met in every possible execution path.