| Conditions | 7 |
| Total Lines | 274 |
| Code Lines | 199 |
| Lines | 0 |
| Ratio | 0 % |
| Changes | 0 | ||
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
| 1 | """ |
||
| 411 | def __init__(self, dependencies): |
||
| 412 | def generate_model_data_tasks(scenario_name): |
||
| 413 | """Dynamically generate tasks for model data creation. |
||
| 414 | |||
| 415 | The goal is to speed up the creation of model timeseries. However, |
||
| 416 | the exact number of parallel task cannot be determined during the |
||
| 417 | DAG building as the number of grid districts (MVGD) is calculated |
||
| 418 | within another pipeline task. |
||
| 419 | Approach: assuming an approx. count of `mvgd_min_count` of 3700, |
||
| 420 | the majority of the MVGDs can be parallelized. The remainder is |
||
| 421 | handled subsequently in XXX. |
||
| 422 | The number of parallel tasks is defined via parameter |
||
| 423 | `parallel_tasks` in the dataset config `datasets.yml`. |
||
| 424 | |||
| 425 | Parameters |
||
| 426 | ---------- |
||
| 427 | scenario_name : str |
||
| 428 | Scenario name |
||
| 429 | |||
| 430 | Returns |
||
| 431 | ------- |
||
| 432 | set of functools.partial |
||
| 433 | The tasks. Each element is of |
||
| 434 | :func:`egon.data.datasets.emobility.motorized_individual_travel.model_timeseries.generate_model_data` |
||
| 435 | """ |
||
| 436 | parallel_tasks = DATASET_CFG["model_timeseries"].get( |
||
| 437 | "parallel_tasks", 1 |
||
| 438 | ) |
||
| 439 | mvgd_bunch_size = divmod(MVGD_MIN_COUNT, parallel_tasks)[0] |
||
| 440 | |||
| 441 | tasks = set() |
||
| 442 | for _ in range(parallel_tasks): |
||
| 443 | bunch = range(_ * mvgd_bunch_size, (_ + 1) * mvgd_bunch_size) |
||
| 444 | tasks.add( |
||
| 445 | PythonOperator( |
||
| 446 | task_id=( |
||
| 447 | f"generate_model_data_" |
||
| 448 | f"{scenario_name}_" |
||
| 449 | f"bunch{bunch[0]}-{bunch[-1]}" |
||
| 450 | ), |
||
| 451 | python_callable=generate_model_data_bunch, |
||
| 452 | op_kwargs={ |
||
| 453 | "scenario_name": scenario_name, |
||
| 454 | "bunch": bunch, |
||
| 455 | }, |
||
| 456 | ) |
||
| 457 | ) |
||
| 458 | |||
| 459 | if scenario_name == "status2019": |
||
| 460 | tasks.add(generate_model_data_status2019_remaining) |
||
| 461 | if scenario_name == "status2023": |
||
| 462 | tasks.add(generate_model_data_status2023_remaining) |
||
| 463 | elif scenario_name == "eGon2035": |
||
| 464 | tasks.add(generate_model_data_eGon2035_remaining) |
||
| 465 | elif scenario_name == "eGon100RE": |
||
| 466 | tasks.add(generate_model_data_eGon100RE_remaining) |
||
| 467 | return tasks |
||
| 468 | |||
| 469 | tasks = ( |
||
| 470 | create_tables, |
||
| 471 | { |
||
| 472 | ( |
||
| 473 | download_and_preprocess, |
||
| 474 | allocate_evs_numbers, |
||
| 475 | ), |
||
| 476 | ( |
||
| 477 | extract_trip_file, |
||
| 478 | write_metadata_to_db, |
||
| 479 | write_evs_trips_to_db, |
||
| 480 | ), |
||
| 481 | }, |
||
| 482 | allocate_evs_to_grid_districts, |
||
| 483 | delete_model_data_from_db, |
||
| 484 | ) |
||
| 485 | |||
| 486 | tasks_per_scenario = set() |
||
| 487 | |||
| 488 | for scenario_name in config.settings()["egon-data"]["--scenarios"]: |
||
| 489 | tasks_per_scenario.update( |
||
| 490 | generate_model_data_tasks(scenario_name=scenario_name) |
||
| 491 | ) |
||
| 492 | |||
| 493 | tasks = tasks + (tasks_per_scenario,) |
||
| 494 | |||
| 495 | super().__init__( |
||
| 496 | name=self.name, |
||
| 497 | version=self.version, |
||
| 498 | dependencies=dependencies, |
||
| 499 | tasks=tasks, |
||
| 500 | validation={ |
||
| 501 | "data_quality": [ |
||
| 502 | RowCountValidation( |
||
| 503 | table=" demand.egon_ev_count_municipality", |
||
| 504 | rule_id="ROW_COUNT.egon_ev_count_municipality", |
||
| 505 | expected_count={"Schleswig-Holstein": 1108, "Everything": 44012} |
||
| 506 | ), |
||
| 507 | DataTypeValidation( |
||
| 508 | table="demand.egon_ev_count_municipality", |
||
| 509 | rule_id="DATA_MULTIPLE_TYPES.egon_ev_count_municipality", |
||
| 510 | column_types={"scenario": "character varying", "scenario_variation": "character varying", |
||
| 511 | "ags": "integer", "bev_mini": "integer", "bev_medium": "integer", |
||
| 512 | "bev_luxury": "integer", "phev_mini": "integer", "phev_medium": "integer", |
||
| 513 | "phev_luxury": "integer", "rs7_id": "smallint"} |
||
| 514 | ), |
||
| 515 | WholeTableNotNullAndNotNaNValidation( |
||
| 516 | table="demand.egon_ev_count_municipality", |
||
| 517 | rule_id="WHOLE_TABLE_NOT_NAN.egon_ev_count_municipality" |
||
| 518 | ), |
||
| 519 | ValueSetValidation( |
||
| 520 | table="demand.egon_ev_count_municipality", |
||
| 521 | rule_id="VALUE_SET_VALIDATION_SCENARIO.egon_ev_count_municipality", |
||
| 522 | column="scenario", |
||
| 523 | expected_values=["eGon2035", "eGon100RE"] |
||
| 524 | ), |
||
| 525 | ValueSetValidation( |
||
| 526 | table="demand.egon_ev_count_municipality", |
||
| 527 | rule_id="VALUE_SET_VALIDATION_SCENARIO_VARIATION.egon_ev_count_municipality", |
||
| 528 | column="scenario_variation", |
||
| 529 | expected_values=["Mobility Transition 2050", "NEP C 2035", "Electrification 2050", "Reference 2050"] |
||
| 530 | ), |
||
| 531 | RowCountValidation( |
||
| 532 | table=" demand.egon_ev_count_mv_grid_district", |
||
| 533 | rule_id="ROW_COUNT.egon_ev_count_mv_grid_district", |
||
| 534 | expected_count={"Schleswig-Holstein": 199, "Everything": 15348} |
||
| 535 | ), |
||
| 536 | DataTypeValidation( |
||
| 537 | table="demand.egon_ev_count_mv_grid_district", |
||
| 538 | rule_id="DATA_MULTIPLE_TYPES.egon_ev_count_mv_grid_district", |
||
| 539 | column_types={"scenario": "character varying", "scenario_variation": "character varying", |
||
| 540 | "bus_id": "integer", "bev_mini": "integer", "bev_medium": "integer", |
||
| 541 | "bev_luxury": "integer", "phev_mini": "integer", "phev_medium": "integer", |
||
| 542 | "phev_luxury": "integer", "rs7_id": "smallint"} |
||
| 543 | ), |
||
| 544 | WholeTableNotNullAndNotNaNValidation( |
||
| 545 | table="demand.egon_ev_count_mv_grid_district", |
||
| 546 | rule_id="WHOLE_TABLE_NOT_NAN.egon_ev_count_mv_grid_district" |
||
| 547 | ), |
||
| 548 | ValueSetValidation( |
||
| 549 | table="demand.egon_ev_count_mv_grid_district", |
||
| 550 | rule_id="VALUE_SET_VALIDATION_SCENARIO.egon_ev_count_mv_grid_district", |
||
| 551 | column="scenario", |
||
| 552 | expected_values=["eGon2035", "eGon100RE"] |
||
| 553 | ), |
||
| 554 | ValueSetValidation( |
||
| 555 | table="demand.egon_ev_count_mv_grid_district", |
||
| 556 | rule_id="VALUE_SET_VALIDATION_SCENARIO_VARIATION.egon_ev_count_mv_grid_district", |
||
| 557 | column="scenario_variation", |
||
| 558 | expected_values=["Mobility Transition 2050", "NEP C 2035", "Electrification 2050", |
||
| 559 | "Reference 2050"] |
||
| 560 | ), |
||
| 561 | RowCountValidation( |
||
| 562 | table=" demand.egon_ev_count_registration_district", |
||
| 563 | rule_id="ROW_COUNT.egon_ev_count_registration_district", |
||
| 564 | expected_count={"Schleswig-Holstein": 400, "Everything": 1600} |
||
| 565 | ), |
||
| 566 | DataTypeValidation( |
||
| 567 | table="demand.egon_ev_count_registration_district", |
||
| 568 | rule_id="DATA_MULTIPLE_TYPES.egon_ev_count_registration_district", |
||
| 569 | column_types={"scenario": "character varying", "scenario_variation": "character varying", |
||
| 570 | "ags_reg_district": "integer", "reg_district": "character varying", |
||
| 571 | "bev_mini": "integer", "bev_medium": "integer", "bev_luxury": "integer", |
||
| 572 | "phev_mini": "integer", "phev_medium": "integer", "phev_luxury": "integer"} |
||
| 573 | ), |
||
| 574 | WholeTableNotNullAndNotNaNValidation( |
||
| 575 | table="demand.egon_ev_count_registration_district", |
||
| 576 | rule_id="WHOLE_TABLE_NOT_NAN.egon_ev_count_registration_district" |
||
| 577 | ), |
||
| 578 | ValueSetValidation( |
||
| 579 | table="demand.egon_ev_count_registration_district", |
||
| 580 | rule_id="VALUE_SET_VALIDATION_SCENARIO.egon_ev_count_registration_district", |
||
| 581 | column="scenario", |
||
| 582 | expected_values=["eGon2035", "eGon100RE"] |
||
| 583 | ), |
||
| 584 | ValueSetValidation( |
||
| 585 | table="demand.egon_ev_count_registration_district", |
||
| 586 | rule_id="VALUE_SET_VALIDATION_SCENARIO_VARIATION.egon_ev_count_registration_district", |
||
| 587 | column="scenario_variation", |
||
| 588 | expected_values=["Mobility Transition 2050", "NEP C 2035", "Electrification 2050", |
||
| 589 | "Reference 2050"] |
||
| 590 | ), |
||
| 591 | RowCountValidation( |
||
| 592 | table=" demand.egon_ev_mv_grid_district", |
||
| 593 | rule_id="ROW_COUNT.egon_ev_mv_grid_district", |
||
| 594 | expected_count={"Schleswig-Holstein": 534899, "Everything": 125609556} |
||
| 595 | ), |
||
| 596 | DataTypeValidation( |
||
| 597 | table="demand.egon_ev_mv_grid_district", |
||
| 598 | rule_id="DATA_MULTIPLE_TYPES.egon_ev_mv_grid_district", |
||
| 599 | column_types={"scenario": "character varying", "scenario_variation": "character varying", |
||
| 600 | "bus_id": "integer", "reg_district": "character varying", |
||
| 601 | "bev_mini": "integer", "bev_medium": "integer", "bev_luxury": "integer", |
||
| 602 | "phev_mini": "integer", "phev_medium": "integer", "phev_luxury": "integer", |
||
| 603 | "rs7_id": "smallint"} |
||
| 604 | ), |
||
| 605 | WholeTableNotNullAndNotNaNValidation( |
||
| 606 | table="demand.egon_ev_mv_grid_district", |
||
| 607 | rule_id="WHOLE_TABLE_NOT_NAN.egon_ev_mv_grid_district" |
||
| 608 | ), |
||
| 609 | ValueSetValidation( |
||
| 610 | table="demand.egon_ev_mv_grid_district", |
||
| 611 | rule_id="VALUE_SET_VALIDATION_SCENARIO.egon_ev_mv_grid_district", |
||
| 612 | column="scenario", |
||
| 613 | expected_values=["eGon2035", "eGon100RE"] |
||
| 614 | ), |
||
| 615 | ValueSetValidation( |
||
| 616 | table="demand.egon_ev_mv_grid_district", |
||
| 617 | rule_id="VALUE_SET_VALIDATION_SCENARIO_VARIATION.egon_ev_mv_grid_district", |
||
| 618 | column="scenario_variation", |
||
| 619 | expected_values=["Mobility Transition 2050", "NEP C 2035", "Electrification 2050", |
||
| 620 | "Reference 2050"] |
||
| 621 | ), |
||
| 622 | RowCountValidation( |
||
| 623 | table=" demand.egon_ev_pool", |
||
| 624 | rule_id="ROW_COUNT.egon_ev_pool", |
||
| 625 | expected_count={"Schleswig-Holstein": 7000, "Everything": 65376} |
||
| 626 | ), |
||
| 627 | DataTypeValidation( |
||
| 628 | table="demand.egon_ev_pool", |
||
| 629 | rule_id="DATA_MULTIPLE_TYPES.egon_ev_pool", |
||
| 630 | column_types={"scenario": "character varying", "ev_id": "integer", "rs7_id": "smallint", |
||
| 631 | "type": "character varying", "simbev_ev_id": "integer"} |
||
| 632 | ), |
||
| 633 | WholeTableNotNullAndNotNaNValidation( |
||
| 634 | table="demand.egon_ev_pool", |
||
| 635 | rule_id="WHOLE_TABLE_NOT_NAN.egon_ev_pool" |
||
| 636 | ), |
||
| 637 | ValueSetValidation( |
||
| 638 | table="demand.egon_ev_pool", |
||
| 639 | rule_id="VALUE_SET_VALIDATION_SCENARIO.egon_ev_pool", |
||
| 640 | column="scenario", |
||
| 641 | expected_values=["eGon2035", "eGon100RE"] |
||
| 642 | ), |
||
| 643 | ValueSetValidation( |
||
| 644 | table="demand.egon_ev_pool", |
||
| 645 | rule_id="VALUE_SET_VALIDATION_TYPE.egon_ev_pool", |
||
| 646 | column="type", |
||
| 647 | expected_values=["bev_mini", "bev_medium", "bev_luxury", "phev_mini", "phev_medium", |
||
| 648 | "phev_luxury"] |
||
| 649 | ), |
||
| 650 | RowCountValidation( |
||
| 651 | table=" demand.egon_ev_trip", |
||
| 652 | rule_id="ROW_COUNT.egon_ev_trip", |
||
| 653 | expected_count={"Schleswig-Holstein":11642066, "Everything": 108342188} |
||
| 654 | ), |
||
| 655 | DataTypeValidation( |
||
| 656 | table="demand.egon_ev_trip", |
||
| 657 | rule_id="DATA_MULTIPLE_TYPES.egon_ev_trip", |
||
| 658 | column_types={"scenario": "character varying", "event_id": "integer", "egon_ev_pool_ev_id": "integer", |
||
| 659 | "simbev_event_id": "integer", "location": "character varying", "use_case": "character varying", |
||
| 660 | "charging_capacity_nominal": "real", "charging_capacity_grid": "real", |
||
| 661 | "charging_capacity_battery": "real", "soc_start": "real", "soc_end": "real", |
||
| 662 | "charging_demand": "real", "park_start": "integer", "park_end": "integer", |
||
| 663 | "drive_start": "integer", "drive_end": "integer", "consumption": "real"} |
||
| 664 | ), |
||
| 665 | WholeTableNotNullAndNotNaNValidation( |
||
| 666 | table="demand.egon_ev_trip", |
||
| 667 | rule_id="WHOLE_TABLE_NOT_NAN.egon_ev_trip" |
||
| 668 | ), |
||
| 669 | ValueSetValidation( |
||
| 670 | table="demand.egon_ev_trip", |
||
| 671 | rule_id="VALUE_SET_VALIDATION_SCENARIO.egon_ev_trip", |
||
| 672 | column="scenario", |
||
| 673 | expected_values=["eGon2035", "eGon100RE"] |
||
| 674 | ), |
||
| 675 | ValueSetValidation( |
||
| 676 | table="demand.egon_ev_trip", |
||
| 677 | rule_id="VALUE_SET_LOCATION.egon_ev_trip", |
||
| 678 | column="type", |
||
| 679 | expected_values=["0_work", "1_business", "2_school", "3_shopping", "4_private/ridesharing", |
||
| 680 | "5_leisure", "6_home", "7_charging_hub", "driving"] |
||
| 681 | ) |
||
| 682 | ] |
||
| 683 | }, |
||
| 684 | on_validation_failure="continue" |
||
| 685 | ) |
||
| 686 |