Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like EE_Import often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use EE_Import, and based on these observations, apply Extract Interface, too.
| 1 | <?php if (!defined('EVENT_ESPRESSO_VERSION')) exit('No direct script access allowed'); | ||
| 12 |  class EE_Import { | ||
| 13 | |||
| 14 | const do_insert = 'insert'; | ||
| 15 | const do_update = 'update'; | ||
| 16 | const do_nothing = 'nothing'; | ||
| 17 | |||
| 18 | |||
| 19 | // instance of the EE_Import object | ||
| 20 | private static $_instance = NULL; | ||
| 21 | |||
| 22 | private static $_csv_array = array(); | ||
|  | |||
| 23 | |||
| 24 | /** | ||
| 25 | * | ||
| 26 | * @var array of model names | ||
| 27 | */ | ||
| 28 | private static $_model_list = array(); | ||
| 29 | |||
| 30 | private static $_columns_to_save = array(); | ||
| 31 | |||
| 32 | protected $_total_inserts = 0; | ||
| 33 | protected $_total_updates = 0; | ||
| 34 | protected $_total_insert_errors = 0; | ||
| 35 | protected $_total_update_errors = 0; | ||
| 36 | |||
| 37 | |||
| 38 | /** | ||
| 39 | * private constructor to prevent direct creation | ||
| 40 | * @Constructor | ||
| 41 | * @access private | ||
| 42 | * @return void | ||
| 43 | */ | ||
| 44 |   private function __construct() { | ||
| 50 | |||
| 51 | |||
| 52 | /** | ||
| 53 | * @ singleton method used to instantiate class object | ||
| 54 | * @ access public | ||
| 55 | * @return EE_Import | ||
| 56 | */ | ||
| 57 | View Code Duplication | 	public static function instance() { | |
| 58 | // check if class object is instantiated | ||
| 59 | 		if ( self::$_instance === NULL  or ! is_object( self::$_instance ) or ! ( self::$_instance instanceof EE_Import )) { | ||
| 60 | self::$_instance = new self(); | ||
| 61 | } | ||
| 62 | return self::$_instance; | ||
| 63 | } | ||
| 64 | |||
| 65 | /** | ||
| 66 | * Resets the importer | ||
| 67 | * @return EE_Import | ||
| 68 | */ | ||
| 69 | 	public static function reset(){ | ||
| 73 | |||
| 74 | |||
| 75 | |||
| 76 | |||
| 77 | /** | ||
| 78 | * @ generates HTML for a file upload input and form | ||
| 79 | * @ access public | ||
| 80 | * @param string $title - heading for the form | ||
| 81 | * @param string $intro - additional text explaing what to do | ||
| 82 | 	 * 	@param 	string 		$page - EE Admin page to direct form to - in the form "espresso_{pageslug}" | ||
| 83 | * @param string $action - EE Admin page route array "action" that form will direct to | ||
| 84 | * @param string $type - type of file to import | ||
| 85 | * @ return string | ||
| 86 | */ | ||
| 87 | 	public function upload_form ( $title, $intro, $form_url, $action, $type  ) { | ||
| 116 | |||
| 117 | |||
| 118 | |||
| 119 | |||
| 120 | |||
| 121 | /** | ||
| 122 | * @Import Event Espresso data - some code "borrowed" from event espresso csv_import.php | ||
| 123 | * @access public | ||
| 124 | * @return boolean success | ||
| 125 | */ | ||
| 126 | 	public function import() { | ||
| 242 | |||
| 243 | /** | ||
| 244 | * Given an array of data (usually from a CSV import) attempts to save that data to the db. | ||
| 245 | * If $model_name ISN'T provided, assumes that this is a 3d array, with toplevel keys being model names, | ||
| 246 | * next level being numeric indexes adn each value representing a model object, and the last layer down | ||
| 247 | * being keys of model fields and their proposed values. | ||
| 248 | * If $model_name IS provided, assumes a 2d array of the bottom two layers previously mentioned. | ||
| 249 | * If the CSV data says (in the metadata row) that it's from the SAME database, | ||
| 250 | * we treat the IDs in the CSV as the normal IDs, and try to update those records. However, if those | ||
| 251 | * IDs DON'T exist in the database, they're treated as temporary IDs, | ||
| 252 | * which can used elsewhere to refer to the same object. Once an item | ||
| 253 | * with a temporary ID gets inserted, we record its mapping from temporary | ||
| 254 | * ID to real ID, and use the real ID in place of the temporary ID | ||
| 255 | * when that temporary ID was used as a foreign key. | ||
| 256 | * If the CSV data says (in the metadata again) that it's from a DIFFERENT database, | ||
| 257 | * we treat all the IDs in the CSV as temporary ID- eg, if the CSV specifies an event with | ||
| 258 | * ID 1, and the database already has an event with ID 1, we assume that's just a coincidence, | ||
| 259 | * and insert a new event, and map it's temporary ID of 1 over to its new real ID. | ||
| 260 | * An important exception are non-auto-increment primary keys. If one entry in the | ||
| 261 | * CSV file has the same ID as one in the DB, we assume they are meant to be | ||
| 262 | * the same item, and instead update the item in the DB with that same ID. | ||
| 263 | * Also note, we remember the mappings permanently. So the 2nd, 3rd, and 10000th | ||
| 264 | * time you import a CSV from a different site, we remember their mappings, and | ||
| 265 | * will try to update the item in the DB instead of inserting another item (eg | ||
| 266 | * if we previously imported an event with temporary ID 1, and then it got a | ||
| 267 | * real ID of 123, we remember that. So the next time we import an event with | ||
| 268 | * temporary ID, from the same site, we know that it's real ID is 123, and will | ||
| 269 | * update that event, instead of adding a new event). | ||
| 270 | * @access public | ||
| 271 | * @param array $csv_data_array - the array containing the csv data produced from EE_CSV::import_csv_to_model_data_array() | ||
| 272 | * @param array $fields_to_save - an array containing the csv column names as keys with the corresponding db table fields they will be saved to | ||
| 273 | * @return TRUE on success, FALSE on fail | ||
| 274 | */ | ||
| 275 | 	public function save_csv_data_array_to_db( $csv_data_array, $model_name = FALSE ) { | ||
| 349 | |||
| 350 | |||
| 351 | /** | ||
| 352 | * Processes the array of data, given the knowledge that it's from the same database or a different one, | ||
| 353 | * and the mapping from temporary IDs to real IDs. | ||
| 354 | * If the data is from a different database, we treat the primary keys and their corresponding | ||
| 355 | * foreign keys as "temp Ids", basically identifiers that get mapped to real primary keys | ||
| 356 | * in the real target database. As items are inserted, their temporary primary keys | ||
| 357 | * are mapped to the real IDs in the target database. Also, before doing any update or | ||
| 358 | * insert, we replace all the temp ID which are foreign keys with their mapped real IDs. | ||
| 359 | * An exception: string primary keys are treated as real IDs, or else we'd need to | ||
| 360 | * dynamically generate new string primary keys which would be very awkard for the country table etc. | ||
| 361 | * Also, models with no primary key are strange too. We combine use their primar key INDEX (a | ||
| 362 | * combination of fields) to create a unique string identifying the row and store | ||
| 363 | * those in the mapping. | ||
| 364 | * | ||
| 365 | * If the data is from the same database, we usually treat primary keys as real IDs. | ||
| 366 | * An exception is if there is nothing in the database for that ID. If that's the case, | ||
| 367 | * we need to insert a new row for that ID, and then map from the non-existent ID | ||
| 368 | * to the newly-inserted real ID. | ||
| 369 | * @param type $csv_data_array | ||
| 370 | * @param type $export_from_site_a_to_b | ||
| 371 | * @param type $old_db_to_new_db_mapping | ||
| 372 | * @return array updated $old_db_to_new_db_mapping | ||
| 373 | */ | ||
| 374 | 	public function save_data_rows_to_db( $csv_data_array, $export_from_site_a_to_b, $old_db_to_new_db_mapping ) { | ||
| 375 | 		foreach ( $csv_data_array as $model_name_in_csv_data => $model_data_from_import ) { | ||
| 376 | //now check that assumption was correct. If | ||
| 377 | 			if ( EE_Registry::instance()->is_model_name($model_name_in_csv_data)) { | ||
| 378 | $model_name = $model_name_in_csv_data; | ||
| 379 | 			}else { | ||
| 380 | // no table info in the array and no table name passed to the function?? FAIL | ||
| 381 | 				EE_Error::add_error( __('No table information was specified and/or found, therefore the import could not be completed','event_espresso'), __FILE__, __FUNCTION__, __LINE__ ); | ||
| 382 | return FALSE; | ||
| 383 | } | ||
| 384 | /* @var $model EEM_Base */ | ||
| 385 | $model = EE_Registry::instance()->load_model($model_name); | ||
| 386 | |||
| 387 | //so without further ado, scanning all the data provided for primary keys and their inital values | ||
| 388 | 			foreach ( $model_data_from_import as $model_object_data ) { | ||
| 389 | //before we do ANYTHING, make sure the csv row wasn't just completely blank | ||
| 390 | $row_is_completely_empty = true; | ||
| 391 | 				foreach($model_object_data as $field){ | ||
| 392 | 					if($field){ | ||
| 393 | $row_is_completely_empty = false; | ||
| 394 | } | ||
| 395 | } | ||
| 396 | 				if($row_is_completely_empty){ | ||
| 397 | continue; | ||
| 398 | } | ||
| 399 | //find the PK in the row of data (or a combined key if | ||
| 400 | //there is no primary key) | ||
| 401 | 				if($model->has_primary_key_field()){ | ||
| 402 | $id_in_csv = $model_object_data[$model->primary_key_name()]; | ||
| 403 | 				}else{ | ||
| 404 | $id_in_csv = $model->get_index_primary_key_string($model_object_data); | ||
| 405 | } | ||
| 406 | |||
| 407 | |||
| 408 | $model_object_data = $this->_replace_temp_ids_with_mappings( $model_object_data, $model, $old_db_to_new_db_mapping, $export_from_site_a_to_b ); | ||
| 409 | //now we need to decide if we're going to add a new model object given the $model_object_data, | ||
| 410 | //or just update. | ||
| 411 | 				if($export_from_site_a_to_b){ | ||
| 412 | $what_to_do = $this->_decide_whether_to_insert_or_update_given_data_from_other_db( $id_in_csv, $model_object_data, $model, $old_db_to_new_db_mapping ); | ||
| 413 | 				}else{//this is just a re-import | ||
| 414 | $what_to_do = $this->_decide_whether_to_insert_or_update_given_data_from_same_db( $id_in_csv, $model_object_data, $model, $old_db_to_new_db_mapping ); | ||
| 415 | } | ||
| 416 | 				if( $what_to_do == self::do_nothing ) { | ||
| 417 | continue; | ||
| 418 | } | ||
| 419 | |||
| 420 | //double-check we actually want to insert, if that's what we're planning | ||
| 421 | //based on whether this item would be unique in the DB or not | ||
| 422 | 				if( $what_to_do == self::do_insert ) { | ||
| 423 | //we're supposed to be inserting. But wait, will this thing | ||
| 424 | //be acceptable if inserted? | ||
| 425 | $conflicting = $model->get_one_conflicting( $model_object_data, false ); | ||
| 426 | 					if($conflicting){ | ||
| 427 | //ok, this item would conflict if inserted. Just update the item that it conflicts with. | ||
| 428 | $what_to_do = self::do_update; | ||
| 429 | //and if this model has a primary key, remember its mapping | ||
| 430 | 						if($model->has_primary_key_field()){ | ||
| 431 | $old_db_to_new_db_mapping[$model_name][$id_in_csv] = $conflicting->ID(); | ||
| 432 | $model_object_data[$model->primary_key_name()] = $conflicting->ID(); | ||
| 433 | 						}else{ | ||
| 434 | //we want to update this conflicting item, instead of inserting a conflicting item | ||
| 435 | //so we need to make sure they match entirely (its possible that they only conflicted on one field, but we need them to match on other fields | ||
| 436 | //for the WHERE conditions in the update). At the time of this comment, there were no models like this | ||
| 437 | 							foreach($model->get_combined_primary_key_fields() as $key_field){ | ||
| 438 | $model_object_data[$key_field->get_name()] = $conflicting->get($key_field->get_name()); | ||
| 439 | } | ||
| 440 | } | ||
| 441 | } | ||
| 442 | } | ||
| 443 | 				if( $what_to_do == self::do_insert ) { | ||
| 444 | $old_db_to_new_db_mapping = $this->_insert_from_data_array( $id_in_csv, $model_object_data, $model, $old_db_to_new_db_mapping ); | ||
| 445 | 				}elseif( $what_to_do == self::do_update ) { | ||
| 446 | $old_db_to_new_db_mapping = $this->_update_from_data_array( $id_in_csv, $model_object_data, $model, $old_db_to_new_db_mapping ); | ||
| 447 | 				}else{ | ||
| 448 | throw new EE_Error( sprintf( __( 'Programming error. We shoudl be inserting or updating, but instead we are being told to "%s", whifh is invalid', 'event_espresso' ), $what_to_do ) ); | ||
| 449 | } | ||
| 450 | } | ||
| 451 | } | ||
| 452 | return $old_db_to_new_db_mapping; | ||
| 453 | } | ||
| 454 | |||
| 455 | |||
| 456 | |||
| 457 | /** | ||
| 458 | * Decides whether or not to insert, given that this data is from another database. | ||
| 459 | * So, if the primary key of this $model_object_data already exists in the database, | ||
| 460 | * it's just a coincidence and we should still insert. The only time we should | ||
| 461 | * update is when we know what it maps to, or there's something that would | ||
| 462 | * conflict (and we should instead just update that conflicting thing) | ||
| 463 | * @param string $id_in_csv | ||
| 464 | * @param array $model_object_data by reference so it can be modified | ||
| 465 | * @param EEM_Base $model | ||
| 466 | * @param array $old_db_to_new_db_mapping by reference so it can be modified | ||
| 467 | * @return string one of the consts on this class that starts with do_* | ||
| 468 | */ | ||
| 469 | 	protected function _decide_whether_to_insert_or_update_given_data_from_other_db( $id_in_csv, $model_object_data, $model, $old_db_to_new_db_mapping ) { | ||
| 479 | |||
| 480 | /** | ||
| 481 | * If this thing basically already exists in the database, we want to update it; | ||
| 482 | * otherwise insert it (ie, someone tweaked the CSV file, or the item was | ||
| 483 | * deleted in the database so it should be re-inserted) | ||
| 484 | * @param type $id_in_csv | ||
| 485 | * @param type $model_object_data | ||
| 486 | * @param EEM_Base $model | ||
| 487 | * @param type $old_db_to_new_db_mapping | ||
| 488 | * @return | ||
| 489 | */ | ||
| 490 | 	protected function _decide_whether_to_insert_or_update_given_data_from_same_db( $id_in_csv, $model_object_data, $model ) { | ||
| 498 | |||
| 499 | /** | ||
| 500 | * Using the $old_db_to_new_db_mapping array, replaces all the temporary IDs | ||
| 501 | * with their mapped real IDs. Eg, if importing from site A to B, the mapping | ||
| 502 | * file may indicate that the ID "my_event_id" maps to an actual event ID of 123. | ||
| 503 | * So this function searches for any event temp Ids called "my_event_id" and | ||
| 504 | * replaces them with 123. | ||
| 505 | * Also, if there is no temp ID for the INT foreign keys from another database, | ||
| 506 | * replaces them with 0 or the field's default. | ||
| 507 | * @param type $model_object_data | ||
| 508 | * @param EEM_Base $model | ||
| 509 | * @param type $old_db_to_new_db_mapping | ||
| 510 | * @param boolean $export_from_site_a_to_b | ||
| 511 | * @return array updated model object data with temp IDs removed | ||
| 512 | */ | ||
| 513 | 	protected function _replace_temp_ids_with_mappings( $model_object_data, $model, $old_db_to_new_db_mapping, $export_from_site_a_to_b ) { | ||
| 574 | |||
| 575 | /** | ||
| 576 | * If the data was exported PRE-4.2, but then imported POST-4.2, then the term_id | ||
| 577 | * this term-taxonomy refers to may be out-of-date so we need to update it. | ||
| 578 | * see https://make.wordpress.org/core/2015/02/16/taxonomy-term-splitting-in-4-2-a-developer-guide/ | ||
| 579 | * @param type $model_object_data | ||
| 580 | * @return array new model object data | ||
| 581 | */ | ||
| 582 | 	protected function _handle_split_term_ids( $model_object_data ){ | ||
| 591 | /** | ||
| 592 | * Given the object's ID and its model's name, find it int he mapping data, | ||
| 593 | * bearing in mind where it came from | ||
| 594 | * @param type $object_id | ||
| 595 | * @param string $model_name | ||
| 596 | * @param array $old_db_to_new_db_mapping | ||
| 597 | * @param type $export_from_site_a_to_b | ||
| 598 | * @return int | ||
| 599 | */ | ||
| 600 | 	protected function _find_mapping_in( $object_id, $model_name, $old_db_to_new_db_mapping, $export_from_site_a_to_b) { | ||
| 617 | |||
| 618 | /** | ||
| 619 | * | ||
| 620 | * @param type $id_in_csv | ||
| 621 | * @param type $model_object_data | ||
| 622 | * @param EEM_Base $model | ||
| 623 | * @param type $old_db_to_new_db_mapping | ||
| 624 | * @return array updated $old_db_to_new_db_mapping | ||
| 625 | */ | ||
| 626 | 	protected function _insert_from_data_array( $id_in_csv, $model_object_data, $model, $old_db_to_new_db_mapping ) { | ||
| 659 | |||
| 660 | /** | ||
| 661 | * Given the model object data, finds the row to update and updates it | ||
| 662 | * @param string|int $id_in_csv | ||
| 663 | * @param array $model_object_data | ||
| 664 | * @param EEM_Base $model | ||
| 665 | * @param array $old_db_to_new_db_mapping | ||
| 666 | * @return array updated $old_db_to_new_db_mapping | ||
| 667 | */ | ||
| 668 | 	protected function _update_from_data_array( $id_in_csv,  $model_object_data, $model, $old_db_to_new_db_mapping ) { | ||
| 720 | |||
| 721 | /** | ||
| 722 | * Gets the number of inserts performed since importer was instantiated or reset | ||
| 723 | * @return int | ||
| 724 | */ | ||
| 725 | 	public function get_total_inserts(){ | ||
| 728 | /** | ||
| 729 | * Gets the number of insert errors since importer was instantiated or reset | ||
| 730 | * @return int | ||
| 731 | */ | ||
| 732 | 	public function get_total_insert_errors(){ | ||
| 735 | /** | ||
| 736 | * Gets the number of updates performed since importer was instantiated or reset | ||
| 737 | * @return int | ||
| 738 | */ | ||
| 739 | 	public function get_total_updates(){ | ||
| 742 | /** | ||
| 743 | * Gets the number of update errors since importer was instantiated or reset | ||
| 744 | * @return int | ||
| 745 | */ | ||
| 746 | 	public function get_total_update_errors(){ | ||
| 749 | |||
| 750 | |||
| 751 | |||
| 752 | |||
| 753 | |||
| 754 | } | ||
| 755 | /* End of file EE_Import.class.php */ | ||
| 757 | ?> | 
This check marks private properties in classes that are never used. Those properties can be removed.