Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like EE_Import often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use EE_Import, and based on these observations, apply Extract Interface, too.
1 | <?php if (!defined('EVENT_ESPRESSO_VERSION')) exit('No direct script access allowed'); |
||
12 | class EE_Import { |
||
13 | |||
14 | const do_insert = 'insert'; |
||
15 | const do_update = 'update'; |
||
16 | const do_nothing = 'nothing'; |
||
17 | |||
18 | |||
19 | // instance of the EE_Import object |
||
20 | private static $_instance = NULL; |
||
21 | |||
22 | private static $_csv_array = array(); |
||
|
|||
23 | |||
24 | /** |
||
25 | * |
||
26 | * @var array of model names |
||
27 | */ |
||
28 | private static $_model_list = array(); |
||
29 | |||
30 | private static $_columns_to_save = array(); |
||
31 | |||
32 | protected $_total_inserts = 0; |
||
33 | protected $_total_updates = 0; |
||
34 | protected $_total_insert_errors = 0; |
||
35 | protected $_total_update_errors = 0; |
||
36 | |||
37 | |||
38 | /** |
||
39 | * private constructor to prevent direct creation |
||
40 | * @Constructor |
||
41 | * @access private |
||
42 | * @return void |
||
43 | */ |
||
44 | private function __construct() { |
||
50 | |||
51 | |||
52 | /** |
||
53 | * @ singleton method used to instantiate class object |
||
54 | * @ access public |
||
55 | * @return EE_Import |
||
56 | */ |
||
57 | View Code Duplication | public static function instance() { |
|
58 | // check if class object is instantiated |
||
59 | if ( self::$_instance === NULL or ! is_object( self::$_instance ) or ! ( self::$_instance instanceof EE_Import )) { |
||
60 | self::$_instance = new self(); |
||
61 | } |
||
62 | return self::$_instance; |
||
63 | } |
||
64 | |||
65 | /** |
||
66 | * Resets the importer |
||
67 | * @return EE_Import |
||
68 | */ |
||
69 | public static function reset(){ |
||
73 | |||
74 | |||
75 | |||
76 | |||
77 | /** |
||
78 | * @ generates HTML for a file upload input and form |
||
79 | * @ access public |
||
80 | * @param string $title - heading for the form |
||
81 | * @param string $intro - additional text explaing what to do |
||
82 | * @param string $page - EE Admin page to direct form to - in the form "espresso_{pageslug}" |
||
83 | * @param string $action - EE Admin page route array "action" that form will direct to |
||
84 | * @param string $type - type of file to import |
||
85 | * @ return string |
||
86 | */ |
||
87 | public function upload_form ( $title, $intro, $form_url, $action, $type ) { |
||
116 | |||
117 | |||
118 | |||
119 | |||
120 | |||
121 | /** |
||
122 | * @Import Event Espresso data - some code "borrowed" from event espresso csv_import.php |
||
123 | * @access public |
||
124 | * @return boolean success |
||
125 | */ |
||
126 | public function import() { |
||
242 | |||
243 | /** |
||
244 | * Given an array of data (usually from a CSV import) attempts to save that data to the db. |
||
245 | * If $model_name ISN'T provided, assumes that this is a 3d array, with toplevel keys being model names, |
||
246 | * next level being numeric indexes adn each value representing a model object, and the last layer down |
||
247 | * being keys of model fields and their proposed values. |
||
248 | * If $model_name IS provided, assumes a 2d array of the bottom two layers previously mentioned. |
||
249 | * If the CSV data says (in the metadata row) that it's from the SAME database, |
||
250 | * we treat the IDs in the CSV as the normal IDs, and try to update those records. However, if those |
||
251 | * IDs DON'T exist in the database, they're treated as temporary IDs, |
||
252 | * which can used elsewhere to refer to the same object. Once an item |
||
253 | * with a temporary ID gets inserted, we record its mapping from temporary |
||
254 | * ID to real ID, and use the real ID in place of the temporary ID |
||
255 | * when that temporary ID was used as a foreign key. |
||
256 | * If the CSV data says (in the metadata again) that it's from a DIFFERENT database, |
||
257 | * we treat all the IDs in the CSV as temporary ID- eg, if the CSV specifies an event with |
||
258 | * ID 1, and the database already has an event with ID 1, we assume that's just a coincidence, |
||
259 | * and insert a new event, and map it's temporary ID of 1 over to its new real ID. |
||
260 | * An important exception are non-auto-increment primary keys. If one entry in the |
||
261 | * CSV file has the same ID as one in the DB, we assume they are meant to be |
||
262 | * the same item, and instead update the item in the DB with that same ID. |
||
263 | * Also note, we remember the mappings permanently. So the 2nd, 3rd, and 10000th |
||
264 | * time you import a CSV from a different site, we remember their mappings, and |
||
265 | * will try to update the item in the DB instead of inserting another item (eg |
||
266 | * if we previously imported an event with temporary ID 1, and then it got a |
||
267 | * real ID of 123, we remember that. So the next time we import an event with |
||
268 | * temporary ID, from the same site, we know that it's real ID is 123, and will |
||
269 | * update that event, instead of adding a new event). |
||
270 | * @access public |
||
271 | * @param array $csv_data_array - the array containing the csv data produced from EE_CSV::import_csv_to_model_data_array() |
||
272 | * @param array $fields_to_save - an array containing the csv column names as keys with the corresponding db table fields they will be saved to |
||
273 | * @return TRUE on success, FALSE on fail |
||
274 | */ |
||
275 | public function save_csv_data_array_to_db( $csv_data_array, $model_name = FALSE ) { |
||
349 | |||
350 | |||
351 | /** |
||
352 | * Processes the array of data, given the knowledge that it's from the same database or a different one, |
||
353 | * and the mapping from temporary IDs to real IDs. |
||
354 | * If the data is from a different database, we treat the primary keys and their corresponding |
||
355 | * foreign keys as "temp Ids", basically identifiers that get mapped to real primary keys |
||
356 | * in the real target database. As items are inserted, their temporary primary keys |
||
357 | * are mapped to the real IDs in the target database. Also, before doing any update or |
||
358 | * insert, we replace all the temp ID which are foreign keys with their mapped real IDs. |
||
359 | * An exception: string primary keys are treated as real IDs, or else we'd need to |
||
360 | * dynamically generate new string primary keys which would be very awkard for the country table etc. |
||
361 | * Also, models with no primary key are strange too. We combine use their primar key INDEX (a |
||
362 | * combination of fields) to create a unique string identifying the row and store |
||
363 | * those in the mapping. |
||
364 | * |
||
365 | * If the data is from the same database, we usually treat primary keys as real IDs. |
||
366 | * An exception is if there is nothing in the database for that ID. If that's the case, |
||
367 | * we need to insert a new row for that ID, and then map from the non-existent ID |
||
368 | * to the newly-inserted real ID. |
||
369 | * @param type $csv_data_array |
||
370 | * @param type $export_from_site_a_to_b |
||
371 | * @param type $old_db_to_new_db_mapping |
||
372 | * @return array updated $old_db_to_new_db_mapping |
||
373 | */ |
||
374 | public function save_data_rows_to_db( $csv_data_array, $export_from_site_a_to_b, $old_db_to_new_db_mapping ) { |
||
375 | foreach ( $csv_data_array as $model_name_in_csv_data => $model_data_from_import ) { |
||
376 | //now check that assumption was correct. If |
||
377 | if ( EE_Registry::instance()->is_model_name($model_name_in_csv_data)) { |
||
378 | $model_name = $model_name_in_csv_data; |
||
379 | }else { |
||
380 | // no table info in the array and no table name passed to the function?? FAIL |
||
381 | EE_Error::add_error( __('No table information was specified and/or found, therefore the import could not be completed','event_espresso'), __FILE__, __FUNCTION__, __LINE__ ); |
||
382 | return FALSE; |
||
383 | } |
||
384 | /* @var $model EEM_Base */ |
||
385 | $model = EE_Registry::instance()->load_model($model_name); |
||
386 | |||
387 | //so without further ado, scanning all the data provided for primary keys and their inital values |
||
388 | foreach ( $model_data_from_import as $model_object_data ) { |
||
389 | //before we do ANYTHING, make sure the csv row wasn't just completely blank |
||
390 | $row_is_completely_empty = true; |
||
391 | foreach($model_object_data as $field){ |
||
392 | if($field){ |
||
393 | $row_is_completely_empty = false; |
||
394 | } |
||
395 | } |
||
396 | if($row_is_completely_empty){ |
||
397 | continue; |
||
398 | } |
||
399 | //find the PK in the row of data (or a combined key if |
||
400 | //there is no primary key) |
||
401 | if($model->has_primary_key_field()){ |
||
402 | $id_in_csv = $model_object_data[$model->primary_key_name()]; |
||
403 | }else{ |
||
404 | $id_in_csv = $model->get_index_primary_key_string($model_object_data); |
||
405 | } |
||
406 | |||
407 | |||
408 | $model_object_data = $this->_replace_temp_ids_with_mappings( $model_object_data, $model, $old_db_to_new_db_mapping, $export_from_site_a_to_b ); |
||
409 | //now we need to decide if we're going to add a new model object given the $model_object_data, |
||
410 | //or just update. |
||
411 | if($export_from_site_a_to_b){ |
||
412 | $what_to_do = $this->_decide_whether_to_insert_or_update_given_data_from_other_db( $id_in_csv, $model_object_data, $model, $old_db_to_new_db_mapping ); |
||
413 | }else{//this is just a re-import |
||
414 | $what_to_do = $this->_decide_whether_to_insert_or_update_given_data_from_same_db( $id_in_csv, $model_object_data, $model, $old_db_to_new_db_mapping ); |
||
415 | } |
||
416 | if( $what_to_do == self::do_nothing ) { |
||
417 | continue; |
||
418 | } |
||
419 | |||
420 | //double-check we actually want to insert, if that's what we're planning |
||
421 | //based on whether this item would be unique in the DB or not |
||
422 | if( $what_to_do == self::do_insert ) { |
||
423 | //we're supposed to be inserting. But wait, will this thing |
||
424 | //be acceptable if inserted? |
||
425 | $conflicting = $model->get_one_conflicting( $model_object_data, false ); |
||
426 | if($conflicting){ |
||
427 | //ok, this item would conflict if inserted. Just update the item that it conflicts with. |
||
428 | $what_to_do = self::do_update; |
||
429 | //and if this model has a primary key, remember its mapping |
||
430 | if($model->has_primary_key_field()){ |
||
431 | $old_db_to_new_db_mapping[$model_name][$id_in_csv] = $conflicting->ID(); |
||
432 | $model_object_data[$model->primary_key_name()] = $conflicting->ID(); |
||
433 | }else{ |
||
434 | //we want to update this conflicting item, instead of inserting a conflicting item |
||
435 | //so we need to make sure they match entirely (its possible that they only conflicted on one field, but we need them to match on other fields |
||
436 | //for the WHERE conditions in the update). At the time of this comment, there were no models like this |
||
437 | foreach($model->get_combined_primary_key_fields() as $key_field){ |
||
438 | $model_object_data[$key_field->get_name()] = $conflicting->get($key_field->get_name()); |
||
439 | } |
||
440 | } |
||
441 | } |
||
442 | } |
||
443 | if( $what_to_do == self::do_insert ) { |
||
444 | $old_db_to_new_db_mapping = $this->_insert_from_data_array( $id_in_csv, $model_object_data, $model, $old_db_to_new_db_mapping ); |
||
445 | }elseif( $what_to_do == self::do_update ) { |
||
446 | $old_db_to_new_db_mapping = $this->_update_from_data_array( $id_in_csv, $model_object_data, $model, $old_db_to_new_db_mapping ); |
||
447 | }else{ |
||
448 | throw new EE_Error( sprintf( __( 'Programming error. We shoudl be inserting or updating, but instead we are being told to "%s", whifh is invalid', 'event_espresso' ), $what_to_do ) ); |
||
449 | } |
||
450 | } |
||
451 | } |
||
452 | return $old_db_to_new_db_mapping; |
||
453 | } |
||
454 | |||
455 | |||
456 | |||
457 | /** |
||
458 | * Decides whether or not to insert, given that this data is from another database. |
||
459 | * So, if the primary key of this $model_object_data already exists in the database, |
||
460 | * it's just a coincidence and we should still insert. The only time we should |
||
461 | * update is when we know what it maps to, or there's something that would |
||
462 | * conflict (and we should instead just update that conflicting thing) |
||
463 | * @param string $id_in_csv |
||
464 | * @param array $model_object_data by reference so it can be modified |
||
465 | * @param EEM_Base $model |
||
466 | * @param array $old_db_to_new_db_mapping by reference so it can be modified |
||
467 | * @return string one of the consts on this class that starts with do_* |
||
468 | */ |
||
469 | protected function _decide_whether_to_insert_or_update_given_data_from_other_db( $id_in_csv, $model_object_data, $model, $old_db_to_new_db_mapping ) { |
||
479 | |||
480 | /** |
||
481 | * If this thing basically already exists in the database, we want to update it; |
||
482 | * otherwise insert it (ie, someone tweaked the CSV file, or the item was |
||
483 | * deleted in the database so it should be re-inserted) |
||
484 | * @param type $id_in_csv |
||
485 | * @param type $model_object_data |
||
486 | * @param EEM_Base $model |
||
487 | * @param type $old_db_to_new_db_mapping |
||
488 | * @return |
||
489 | */ |
||
490 | protected function _decide_whether_to_insert_or_update_given_data_from_same_db( $id_in_csv, $model_object_data, $model ) { |
||
498 | |||
499 | /** |
||
500 | * Using the $old_db_to_new_db_mapping array, replaces all the temporary IDs |
||
501 | * with their mapped real IDs. Eg, if importing from site A to B, the mapping |
||
502 | * file may indicate that the ID "my_event_id" maps to an actual event ID of 123. |
||
503 | * So this function searches for any event temp Ids called "my_event_id" and |
||
504 | * replaces them with 123. |
||
505 | * Also, if there is no temp ID for the INT foreign keys from another database, |
||
506 | * replaces them with 0 or the field's default. |
||
507 | * @param type $model_object_data |
||
508 | * @param EEM_Base $model |
||
509 | * @param type $old_db_to_new_db_mapping |
||
510 | * @param boolean $export_from_site_a_to_b |
||
511 | * @return array updated model object data with temp IDs removed |
||
512 | */ |
||
513 | protected function _replace_temp_ids_with_mappings( $model_object_data, $model, $old_db_to_new_db_mapping, $export_from_site_a_to_b ) { |
||
574 | |||
575 | /** |
||
576 | * If the data was exported PRE-4.2, but then imported POST-4.2, then the term_id |
||
577 | * this term-taxonomy refers to may be out-of-date so we need to update it. |
||
578 | * see https://make.wordpress.org/core/2015/02/16/taxonomy-term-splitting-in-4-2-a-developer-guide/ |
||
579 | * @param type $model_object_data |
||
580 | * @return array new model object data |
||
581 | */ |
||
582 | protected function _handle_split_term_ids( $model_object_data ){ |
||
591 | /** |
||
592 | * Given the object's ID and its model's name, find it int he mapping data, |
||
593 | * bearing in mind where it came from |
||
594 | * @param type $object_id |
||
595 | * @param string $model_name |
||
596 | * @param array $old_db_to_new_db_mapping |
||
597 | * @param type $export_from_site_a_to_b |
||
598 | * @return int |
||
599 | */ |
||
600 | protected function _find_mapping_in( $object_id, $model_name, $old_db_to_new_db_mapping, $export_from_site_a_to_b) { |
||
617 | |||
618 | /** |
||
619 | * |
||
620 | * @param type $id_in_csv |
||
621 | * @param type $model_object_data |
||
622 | * @param EEM_Base $model |
||
623 | * @param type $old_db_to_new_db_mapping |
||
624 | * @return array updated $old_db_to_new_db_mapping |
||
625 | */ |
||
626 | protected function _insert_from_data_array( $id_in_csv, $model_object_data, $model, $old_db_to_new_db_mapping ) { |
||
659 | |||
660 | /** |
||
661 | * Given the model object data, finds the row to update and updates it |
||
662 | * @param string|int $id_in_csv |
||
663 | * @param array $model_object_data |
||
664 | * @param EEM_Base $model |
||
665 | * @param array $old_db_to_new_db_mapping |
||
666 | * @return array updated $old_db_to_new_db_mapping |
||
667 | */ |
||
668 | protected function _update_from_data_array( $id_in_csv, $model_object_data, $model, $old_db_to_new_db_mapping ) { |
||
720 | |||
721 | /** |
||
722 | * Gets the number of inserts performed since importer was instantiated or reset |
||
723 | * @return int |
||
724 | */ |
||
725 | public function get_total_inserts(){ |
||
728 | /** |
||
729 | * Gets the number of insert errors since importer was instantiated or reset |
||
730 | * @return int |
||
731 | */ |
||
732 | public function get_total_insert_errors(){ |
||
735 | /** |
||
736 | * Gets the number of updates performed since importer was instantiated or reset |
||
737 | * @return int |
||
738 | */ |
||
739 | public function get_total_updates(){ |
||
742 | /** |
||
743 | * Gets the number of update errors since importer was instantiated or reset |
||
744 | * @return int |
||
745 | */ |
||
746 | public function get_total_update_errors(){ |
||
749 | |||
750 | |||
751 | |||
752 | |||
753 | |||
754 | } |
||
755 | /* End of file EE_Import.class.php */ |
||
757 | ?> |
This check marks private properties in classes that are never used. Those properties can be removed.