Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like EE_Import often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use EE_Import, and based on these observations, apply Extract Interface, too.
| 1 | <?php if (!defined('EVENT_ESPRESSO_VERSION')) exit('No direct script access allowed'); |
||
| 12 | class EE_Import { |
||
| 13 | |||
| 14 | const do_insert = 'insert'; |
||
| 15 | const do_update = 'update'; |
||
| 16 | const do_nothing = 'nothing'; |
||
| 17 | |||
| 18 | |||
| 19 | // instance of the EE_Import object |
||
| 20 | private static $_instance = NULL; |
||
| 21 | |||
| 22 | private static $_csv_array = array(); |
||
|
|
|||
| 23 | |||
| 24 | /** |
||
| 25 | * |
||
| 26 | * @var array of model names |
||
| 27 | */ |
||
| 28 | private static $_model_list = array(); |
||
| 29 | |||
| 30 | private static $_columns_to_save = array(); |
||
| 31 | |||
| 32 | protected $_total_inserts = 0; |
||
| 33 | protected $_total_updates = 0; |
||
| 34 | protected $_total_insert_errors = 0; |
||
| 35 | protected $_total_update_errors = 0; |
||
| 36 | |||
| 37 | |||
| 38 | /** |
||
| 39 | * private constructor to prevent direct creation |
||
| 40 | * @Constructor |
||
| 41 | * @access private |
||
| 42 | * @return void |
||
| 43 | */ |
||
| 44 | private function __construct() { |
||
| 50 | |||
| 51 | |||
| 52 | /** |
||
| 53 | * @ singleton method used to instantiate class object |
||
| 54 | * @ access public |
||
| 55 | * @return EE_Import |
||
| 56 | */ |
||
| 57 | View Code Duplication | public static function instance() { |
|
| 58 | // check if class object is instantiated |
||
| 59 | if ( self::$_instance === NULL or ! is_object( self::$_instance ) or ! ( self::$_instance instanceof EE_Import )) { |
||
| 60 | self::$_instance = new self(); |
||
| 61 | } |
||
| 62 | return self::$_instance; |
||
| 63 | } |
||
| 64 | |||
| 65 | /** |
||
| 66 | * Resets the importer |
||
| 67 | * @return EE_Import |
||
| 68 | */ |
||
| 69 | public static function reset(){ |
||
| 73 | |||
| 74 | |||
| 75 | |||
| 76 | |||
| 77 | /** |
||
| 78 | * @ generates HTML for a file upload input and form |
||
| 79 | * @ access public |
||
| 80 | * @param string $title - heading for the form |
||
| 81 | * @param string $intro - additional text explaing what to do |
||
| 82 | * @param string $page - EE Admin page to direct form to - in the form "espresso_{pageslug}" |
||
| 83 | * @param string $action - EE Admin page route array "action" that form will direct to |
||
| 84 | * @param string $type - type of file to import |
||
| 85 | * @ return string |
||
| 86 | */ |
||
| 87 | public function upload_form ( $title, $intro, $form_url, $action, $type ) { |
||
| 116 | |||
| 117 | |||
| 118 | |||
| 119 | |||
| 120 | |||
| 121 | /** |
||
| 122 | * @Import Event Espresso data - some code "borrowed" from event espresso csv_import.php |
||
| 123 | * @access public |
||
| 124 | * @return boolean success |
||
| 125 | */ |
||
| 126 | public function import() { |
||
| 242 | |||
| 243 | /** |
||
| 244 | * Given an array of data (usually from a CSV import) attempts to save that data to the db. |
||
| 245 | * If $model_name ISN'T provided, assumes that this is a 3d array, with toplevel keys being model names, |
||
| 246 | * next level being numeric indexes adn each value representing a model object, and the last layer down |
||
| 247 | * being keys of model fields and their proposed values. |
||
| 248 | * If $model_name IS provided, assumes a 2d array of the bottom two layers previously mentioned. |
||
| 249 | * If the CSV data says (in the metadata row) that it's from the SAME database, |
||
| 250 | * we treat the IDs in the CSV as the normal IDs, and try to update those records. However, if those |
||
| 251 | * IDs DON'T exist in the database, they're treated as temporary IDs, |
||
| 252 | * which can used elsewhere to refer to the same object. Once an item |
||
| 253 | * with a temporary ID gets inserted, we record its mapping from temporary |
||
| 254 | * ID to real ID, and use the real ID in place of the temporary ID |
||
| 255 | * when that temporary ID was used as a foreign key. |
||
| 256 | * If the CSV data says (in the metadata again) that it's from a DIFFERENT database, |
||
| 257 | * we treat all the IDs in the CSV as temporary ID- eg, if the CSV specifies an event with |
||
| 258 | * ID 1, and the database already has an event with ID 1, we assume that's just a coincidence, |
||
| 259 | * and insert a new event, and map it's temporary ID of 1 over to its new real ID. |
||
| 260 | * An important exception are non-auto-increment primary keys. If one entry in the |
||
| 261 | * CSV file has the same ID as one in the DB, we assume they are meant to be |
||
| 262 | * the same item, and instead update the item in the DB with that same ID. |
||
| 263 | * Also note, we remember the mappings permanently. So the 2nd, 3rd, and 10000th |
||
| 264 | * time you import a CSV from a different site, we remember their mappings, and |
||
| 265 | * will try to update the item in the DB instead of inserting another item (eg |
||
| 266 | * if we previously imported an event with temporary ID 1, and then it got a |
||
| 267 | * real ID of 123, we remember that. So the next time we import an event with |
||
| 268 | * temporary ID, from the same site, we know that it's real ID is 123, and will |
||
| 269 | * update that event, instead of adding a new event). |
||
| 270 | * @access public |
||
| 271 | * @param array $csv_data_array - the array containing the csv data produced from EE_CSV::import_csv_to_model_data_array() |
||
| 272 | * @param array $fields_to_save - an array containing the csv column names as keys with the corresponding db table fields they will be saved to |
||
| 273 | * @return TRUE on success, FALSE on fail |
||
| 274 | */ |
||
| 275 | public function save_csv_data_array_to_db( $csv_data_array, $model_name = FALSE ) { |
||
| 349 | |||
| 350 | |||
| 351 | /** |
||
| 352 | * Processes the array of data, given the knowledge that it's from the same database or a different one, |
||
| 353 | * and the mapping from temporary IDs to real IDs. |
||
| 354 | * If the data is from a different database, we treat the primary keys and their corresponding |
||
| 355 | * foreign keys as "temp Ids", basically identifiers that get mapped to real primary keys |
||
| 356 | * in the real target database. As items are inserted, their temporary primary keys |
||
| 357 | * are mapped to the real IDs in the target database. Also, before doing any update or |
||
| 358 | * insert, we replace all the temp ID which are foreign keys with their mapped real IDs. |
||
| 359 | * An exception: string primary keys are treated as real IDs, or else we'd need to |
||
| 360 | * dynamically generate new string primary keys which would be very awkard for the country table etc. |
||
| 361 | * Also, models with no primary key are strange too. We combine use their primar key INDEX (a |
||
| 362 | * combination of fields) to create a unique string identifying the row and store |
||
| 363 | * those in the mapping. |
||
| 364 | * |
||
| 365 | * If the data is from the same database, we usually treat primary keys as real IDs. |
||
| 366 | * An exception is if there is nothing in the database for that ID. If that's the case, |
||
| 367 | * we need to insert a new row for that ID, and then map from the non-existent ID |
||
| 368 | * to the newly-inserted real ID. |
||
| 369 | * @param type $csv_data_array |
||
| 370 | * @param type $export_from_site_a_to_b |
||
| 371 | * @param type $old_db_to_new_db_mapping |
||
| 372 | * @return array updated $old_db_to_new_db_mapping |
||
| 373 | */ |
||
| 374 | public function save_data_rows_to_db( $csv_data_array, $export_from_site_a_to_b, $old_db_to_new_db_mapping ) { |
||
| 375 | foreach ( $csv_data_array as $model_name_in_csv_data => $model_data_from_import ) { |
||
| 376 | //now check that assumption was correct. If |
||
| 377 | if ( EE_Registry::instance()->is_model_name($model_name_in_csv_data)) { |
||
| 378 | $model_name = $model_name_in_csv_data; |
||
| 379 | }else { |
||
| 380 | // no table info in the array and no table name passed to the function?? FAIL |
||
| 381 | EE_Error::add_error( __('No table information was specified and/or found, therefore the import could not be completed','event_espresso'), __FILE__, __FUNCTION__, __LINE__ ); |
||
| 382 | return FALSE; |
||
| 383 | } |
||
| 384 | /* @var $model EEM_Base */ |
||
| 385 | $model = EE_Registry::instance()->load_model($model_name); |
||
| 386 | |||
| 387 | //so without further ado, scanning all the data provided for primary keys and their inital values |
||
| 388 | foreach ( $model_data_from_import as $model_object_data ) { |
||
| 389 | //before we do ANYTHING, make sure the csv row wasn't just completely blank |
||
| 390 | $row_is_completely_empty = true; |
||
| 391 | foreach($model_object_data as $field){ |
||
| 392 | if($field){ |
||
| 393 | $row_is_completely_empty = false; |
||
| 394 | } |
||
| 395 | } |
||
| 396 | if($row_is_completely_empty){ |
||
| 397 | continue; |
||
| 398 | } |
||
| 399 | //find the PK in the row of data (or a combined key if |
||
| 400 | //there is no primary key) |
||
| 401 | if($model->has_primary_key_field()){ |
||
| 402 | $id_in_csv = $model_object_data[$model->primary_key_name()]; |
||
| 403 | }else{ |
||
| 404 | $id_in_csv = $model->get_index_primary_key_string($model_object_data); |
||
| 405 | } |
||
| 406 | |||
| 407 | |||
| 408 | $model_object_data = $this->_replace_temp_ids_with_mappings( $model_object_data, $model, $old_db_to_new_db_mapping, $export_from_site_a_to_b ); |
||
| 409 | //now we need to decide if we're going to add a new model object given the $model_object_data, |
||
| 410 | //or just update. |
||
| 411 | if($export_from_site_a_to_b){ |
||
| 412 | $what_to_do = $this->_decide_whether_to_insert_or_update_given_data_from_other_db( $id_in_csv, $model_object_data, $model, $old_db_to_new_db_mapping ); |
||
| 413 | }else{//this is just a re-import |
||
| 414 | $what_to_do = $this->_decide_whether_to_insert_or_update_given_data_from_same_db( $id_in_csv, $model_object_data, $model, $old_db_to_new_db_mapping ); |
||
| 415 | } |
||
| 416 | if( $what_to_do == self::do_nothing ) { |
||
| 417 | continue; |
||
| 418 | } |
||
| 419 | |||
| 420 | //double-check we actually want to insert, if that's what we're planning |
||
| 421 | //based on whether this item would be unique in the DB or not |
||
| 422 | if( $what_to_do == self::do_insert ) { |
||
| 423 | //we're supposed to be inserting. But wait, will this thing |
||
| 424 | //be acceptable if inserted? |
||
| 425 | $conflicting = $model->get_one_conflicting( $model_object_data, false ); |
||
| 426 | if($conflicting){ |
||
| 427 | //ok, this item would conflict if inserted. Just update the item that it conflicts with. |
||
| 428 | $what_to_do = self::do_update; |
||
| 429 | //and if this model has a primary key, remember its mapping |
||
| 430 | if($model->has_primary_key_field()){ |
||
| 431 | $old_db_to_new_db_mapping[$model_name][$id_in_csv] = $conflicting->ID(); |
||
| 432 | $model_object_data[$model->primary_key_name()] = $conflicting->ID(); |
||
| 433 | }else{ |
||
| 434 | //we want to update this conflicting item, instead of inserting a conflicting item |
||
| 435 | //so we need to make sure they match entirely (its possible that they only conflicted on one field, but we need them to match on other fields |
||
| 436 | //for the WHERE conditions in the update). At the time of this comment, there were no models like this |
||
| 437 | foreach($model->get_combined_primary_key_fields() as $key_field){ |
||
| 438 | $model_object_data[$key_field->get_name()] = $conflicting->get($key_field->get_name()); |
||
| 439 | } |
||
| 440 | } |
||
| 441 | } |
||
| 442 | } |
||
| 443 | if( $what_to_do == self::do_insert ) { |
||
| 444 | $old_db_to_new_db_mapping = $this->_insert_from_data_array( $id_in_csv, $model_object_data, $model, $old_db_to_new_db_mapping ); |
||
| 445 | }elseif( $what_to_do == self::do_update ) { |
||
| 446 | $old_db_to_new_db_mapping = $this->_update_from_data_array( $id_in_csv, $model_object_data, $model, $old_db_to_new_db_mapping ); |
||
| 447 | }else{ |
||
| 448 | throw new EE_Error( sprintf( __( 'Programming error. We shoudl be inserting or updating, but instead we are being told to "%s", whifh is invalid', 'event_espresso' ), $what_to_do ) ); |
||
| 449 | } |
||
| 450 | } |
||
| 451 | } |
||
| 452 | return $old_db_to_new_db_mapping; |
||
| 453 | } |
||
| 454 | |||
| 455 | |||
| 456 | |||
| 457 | /** |
||
| 458 | * Decides whether or not to insert, given that this data is from another database. |
||
| 459 | * So, if the primary key of this $model_object_data already exists in the database, |
||
| 460 | * it's just a coincidence and we should still insert. The only time we should |
||
| 461 | * update is when we know what it maps to, or there's something that would |
||
| 462 | * conflict (and we should instead just update that conflicting thing) |
||
| 463 | * @param string $id_in_csv |
||
| 464 | * @param array $model_object_data by reference so it can be modified |
||
| 465 | * @param EEM_Base $model |
||
| 466 | * @param array $old_db_to_new_db_mapping by reference so it can be modified |
||
| 467 | * @return string one of the consts on this class that starts with do_* |
||
| 468 | */ |
||
| 469 | protected function _decide_whether_to_insert_or_update_given_data_from_other_db( $id_in_csv, $model_object_data, $model, $old_db_to_new_db_mapping ) { |
||
| 479 | |||
| 480 | /** |
||
| 481 | * If this thing basically already exists in the database, we want to update it; |
||
| 482 | * otherwise insert it (ie, someone tweaked the CSV file, or the item was |
||
| 483 | * deleted in the database so it should be re-inserted) |
||
| 484 | * @param type $id_in_csv |
||
| 485 | * @param type $model_object_data |
||
| 486 | * @param EEM_Base $model |
||
| 487 | * @param type $old_db_to_new_db_mapping |
||
| 488 | * @return |
||
| 489 | */ |
||
| 490 | protected function _decide_whether_to_insert_or_update_given_data_from_same_db( $id_in_csv, $model_object_data, $model ) { |
||
| 498 | |||
| 499 | /** |
||
| 500 | * Using the $old_db_to_new_db_mapping array, replaces all the temporary IDs |
||
| 501 | * with their mapped real IDs. Eg, if importing from site A to B, the mapping |
||
| 502 | * file may indicate that the ID "my_event_id" maps to an actual event ID of 123. |
||
| 503 | * So this function searches for any event temp Ids called "my_event_id" and |
||
| 504 | * replaces them with 123. |
||
| 505 | * Also, if there is no temp ID for the INT foreign keys from another database, |
||
| 506 | * replaces them with 0 or the field's default. |
||
| 507 | * @param type $model_object_data |
||
| 508 | * @param EEM_Base $model |
||
| 509 | * @param type $old_db_to_new_db_mapping |
||
| 510 | * @param boolean $export_from_site_a_to_b |
||
| 511 | * @return array updated model object data with temp IDs removed |
||
| 512 | */ |
||
| 513 | protected function _replace_temp_ids_with_mappings( $model_object_data, $model, $old_db_to_new_db_mapping, $export_from_site_a_to_b ) { |
||
| 574 | |||
| 575 | /** |
||
| 576 | * If the data was exported PRE-4.2, but then imported POST-4.2, then the term_id |
||
| 577 | * this term-taxonomy refers to may be out-of-date so we need to update it. |
||
| 578 | * see https://make.wordpress.org/core/2015/02/16/taxonomy-term-splitting-in-4-2-a-developer-guide/ |
||
| 579 | * @param type $model_object_data |
||
| 580 | * @return array new model object data |
||
| 581 | */ |
||
| 582 | protected function _handle_split_term_ids( $model_object_data ){ |
||
| 591 | /** |
||
| 592 | * Given the object's ID and its model's name, find it int he mapping data, |
||
| 593 | * bearing in mind where it came from |
||
| 594 | * @param type $object_id |
||
| 595 | * @param string $model_name |
||
| 596 | * @param array $old_db_to_new_db_mapping |
||
| 597 | * @param type $export_from_site_a_to_b |
||
| 598 | * @return int |
||
| 599 | */ |
||
| 600 | protected function _find_mapping_in( $object_id, $model_name, $old_db_to_new_db_mapping, $export_from_site_a_to_b) { |
||
| 617 | |||
| 618 | /** |
||
| 619 | * |
||
| 620 | * @param type $id_in_csv |
||
| 621 | * @param type $model_object_data |
||
| 622 | * @param EEM_Base $model |
||
| 623 | * @param type $old_db_to_new_db_mapping |
||
| 624 | * @return array updated $old_db_to_new_db_mapping |
||
| 625 | */ |
||
| 626 | protected function _insert_from_data_array( $id_in_csv, $model_object_data, $model, $old_db_to_new_db_mapping ) { |
||
| 659 | |||
| 660 | /** |
||
| 661 | * Given the model object data, finds the row to update and updates it |
||
| 662 | * @param string|int $id_in_csv |
||
| 663 | * @param array $model_object_data |
||
| 664 | * @param EEM_Base $model |
||
| 665 | * @param array $old_db_to_new_db_mapping |
||
| 666 | * @return array updated $old_db_to_new_db_mapping |
||
| 667 | */ |
||
| 668 | protected function _update_from_data_array( $id_in_csv, $model_object_data, $model, $old_db_to_new_db_mapping ) { |
||
| 720 | |||
| 721 | /** |
||
| 722 | * Gets the number of inserts performed since importer was instantiated or reset |
||
| 723 | * @return int |
||
| 724 | */ |
||
| 725 | public function get_total_inserts(){ |
||
| 728 | /** |
||
| 729 | * Gets the number of insert errors since importer was instantiated or reset |
||
| 730 | * @return int |
||
| 731 | */ |
||
| 732 | public function get_total_insert_errors(){ |
||
| 735 | /** |
||
| 736 | * Gets the number of updates performed since importer was instantiated or reset |
||
| 737 | * @return int |
||
| 738 | */ |
||
| 739 | public function get_total_updates(){ |
||
| 742 | /** |
||
| 743 | * Gets the number of update errors since importer was instantiated or reset |
||
| 744 | * @return int |
||
| 745 | */ |
||
| 746 | public function get_total_update_errors(){ |
||
| 749 | |||
| 750 | |||
| 751 | |||
| 752 | |||
| 753 | |||
| 754 | } |
||
| 755 | /* End of file EE_Import.class.php */ |
||
| 757 | ?> |
This check marks private properties in classes that are never used. Those properties can be removed.