data.datasets.zensus_vg250 - Code Metrics - Inspection of "Features/#450 add metadata" - openego/eGon-data - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Pull Request — dev (#568)

unknown

created 2023-03-06 12:48 UTC

data.datasets.zensus_vg250 A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	751
Duplicated Lines	20.37 %

Importance

Changes

Metric	Value
wmc	9
eloc	483
dl	153
loc	751
rs	10
c	0
b	0
f	0

6 Functions

Rating	Name	Duplication	Size	Complexity
A	population_in_municipalities()	0	46	1
B	add_metadata_vg250_gem_pop()	0	136	1
A	inside_germany()	0	51	2
B	add_metadata_vg250_zensus()	89	89	1
B	add_metadata_zensus_inside_ger()	0	163	1
B	map_zensus_vg250()	0	79	2

1 Method

Rating	Name	Duplication	Size	Complexity
A	ZensusVg250.__init__()	0	12	1

How to fix Duplicated Code

import datetime
import json
import time

from geoalchemy2 import Geometry
from sqlalchemy import (
    BigInteger,
    Column,
    Float,
    Integer,
    SmallInteger,
    String,
    func,
    select,
)
from sqlalchemy.ext.declarative import declarative_base
import geopandas as gpd

from egon.data import db
from egon.data.datasets import Dataset
from egon.data.datasets.vg250 import vg250_metadata_resources_fields
from egon.data.metadata import (
    context,
    generate_resource_fields_from_sqla_model,
    license_ccby,
    licenses_datenlizenz_deutschland,
    meta_metadata,
    sources,
)
import egon.data.config

Base = declarative_base()


class ZensusVg250(Dataset):
    def __init__(self, dependencies):
        super().__init__(
            name="ZensusVg250",
            version="0.0.3",
            dependencies=dependencies,
            tasks=(
                map_zensus_vg250,
                inside_germany,
                add_metadata_zensus_inside_ger,
                population_in_municipalities,
                add_metadata_vg250_gem_pop,
                add_metadata_vg250_zensus,
            ),
        )


class Vg250Sta(Base):

    __tablename__ = "vg250_sta"
    __table_args__ = {"schema": "boundaries"}

    id = Column(BigInteger, primary_key=True, index=True)
    ade = Column(BigInteger)
    gf = Column(BigInteger)
    bsg = Column(BigInteger)
    ars = Column(String)
    ags = Column(String)
    sdv_ars = Column(String)
    gen = Column(String)
    bez = Column(String)
    ibz = Column(BigInteger)
    bem = Column(String)
    nbd = Column(String)
    sn_l = Column(String)
    sn_r = Column(String)
    sn_k = Column(String)
    sn_v1 = Column(String)
    sn_v2 = Column(String)
    sn_g = Column(String)
    fk_s3 = Column(String)
    nuts = Column(String)
    ars_0 = Column(String)
    ags_0 = Column(String)
    wsk = Column(String)
    debkg_id = Column(String)
    rs = Column(String)
    sdv_rs = Column(String)
    rs_0 = Column(String)
    geometry = Column(Geometry(srid=4326), index=True)


class Vg250Gem(Base):

    __tablename__ = "vg250_gem"
    __table_args__ = {"schema": "boundaries"}

    id = Column(BigInteger, primary_key=True, index=True)
    ade = Column(BigInteger)
    gf = Column(BigInteger)
    bsg = Column(BigInteger)
    ars = Column(String)
    ags = Column(String)
    sdv_ars = Column(String)
    gen = Column(String)
    bez = Column(String)
    ibz = Column(BigInteger)
    bem = Column(String)
    nbd = Column(String)
    sn_l = Column(String)
    sn_r = Column(String)
    sn_k = Column(String)
    sn_v1 = Column(String)
    sn_v2 = Column(String)
    sn_g = Column(String)
    fk_s3 = Column(String)
    nuts = Column(String)
    ars_0 = Column(String)
    ags_0 = Column(String)
    wsk = Column(String)
    debkg_id = Column(String)
    rs = Column(String)
    sdv_rs = Column(String)
    rs_0 = Column(String)
    geometry = Column(Geometry(srid=4326), index=True)


class DestatisZensusPopulationPerHa(Base):
    __tablename__ = "destatis_zensus_population_per_ha"
    __table_args__ = {"schema": "society"}

    id = Column(Integer, primary_key=True, index=True)
    grid_id = Column(String(254), nullable=False)
    x_mp = Column(Integer)
    y_mp = Column(Integer)
    population = Column(SmallInteger)
    geom_point = Column(Geometry("POINT", 3035), index=True)
    geom = Column(Geometry("POLYGON", 3035), index=True)


class DestatisZensusPopulationPerHaInsideGermany(Base):
    __tablename__ = "destatis_zensus_population_per_ha_inside_germany"
    __table_args__ = {"schema": "society"}

    id = Column(Integer, primary_key=True, index=True)
    grid_id = Column(String(254), nullable=False)
    population = Column(SmallInteger)
    geom_point = Column(Geometry("POINT", 3035), index=True)
    geom = Column(Geometry("POLYGON", 3035), index=True)


class Vg250GemPopulation(Base):
    __tablename__ = "vg250_gem_population"
    __table_args__ = {"schema": "boundaries"}

    id = Column(Integer, primary_key=True, index=True)
    gen = Column(String)
    bez = Column(String)
    bem = Column(String)
    nuts = Column(String)
    ags_0 = Column(String)
    rs_0 = Column(String)
    area_ha = Column(Float)
    area_km2 = Column(Float)
    population_total = Column(Integer)
    cell_count = Column(Integer)
    population_density = Column(Integer)
    geom = Column(Geometry(srid=3035))


class MapZensusVg250(Base):
    __tablename__ = "egon_map_zensus_vg250"
    __table_args__ = {"schema": "boundaries"}

    zensus_population_id = Column(Integer, primary_key=True, index=True)
    zensus_geom = Column(Geometry("POINT", 3035))
    vg250_municipality_id = Column(Integer)
    vg250_nuts3 = Column(String)


def map_zensus_vg250():
    """Perform mapping between municipalities and zensus grid"""

    MapZensusVg250.__table__.drop(bind=db.engine(), checkfirst=True)
    MapZensusVg250.__table__.create(bind=db.engine(), checkfirst=True)

    # Get information from data configuration file
    cfg = egon.data.config.datasets()["map_zensus_vg250"]

    local_engine = db.engine()

    db.execute_sql(
        f"""DELETE FROM
        {cfg['targets']['map']['schema']}.{cfg['targets']['map']['table']}"""
    )

    gdf = db.select_geodataframe(
        f"""SELECT * FROM
        {cfg['sources']['zensus_population']['schema']}.
        {cfg['sources']['zensus_population']['table']}""",
        geom_col="geom_point",
    )

    gdf_boundaries = db.select_geodataframe(
        f"""SELECT * FROM  {cfg['sources']['vg250_municipalities']['schema']}.
        {cfg['sources']['vg250_municipalities']['table']}""",
        geom_col="geometry",
        epsg=3035,
    )

    # Join vg250 with zensus cells
    join = gpd.sjoin(gdf, gdf_boundaries, how="inner", op="intersects")

    # Deal with cells that don't interect with boundaries (e.g. at borders)
    missing_cells = gdf[(~gdf.id.isin(join.id_left)) & (gdf.population > 0)]

    # start with buffer
    buffer = 0

    # increase buffer until every zensus cell is matched to a nuts3 region
    while len(missing_cells) > 0:
        buffer += 100
        boundaries_buffer = gdf_boundaries.copy()
        boundaries_buffer.geometry = boundaries_buffer.geometry.buffer(buffer)
        join_missing = gpd.sjoin(
            missing_cells, boundaries_buffer, how="inner", op="intersects"
        )
        join = join.append(join_missing)
        missing_cells = gdf[
            (~gdf.id.isin(join.id_left)) & (gdf.population > 0)
        ]
    print(f"Maximal buffer to match zensus points to vg250: {buffer}m")

    # drop duplicates
    join = join.drop_duplicates(subset=["id_left"])

    # Insert results to database
    join.rename(
        {
            "id_left": "zensus_population_id",
            "geom_point": "zensus_geom",
            "nuts": "vg250_nuts3",
            "id_right": "vg250_municipality_id",
        },
        axis=1,
    )[
        [
            "zensus_population_id",
            "zensus_geom",
            "vg250_municipality_id",
            "vg250_nuts3",
        ]
    ].set_geometry(
        "zensus_geom"
    ).to_postgis(
        cfg["targets"]["map"]["table"],
        schema=cfg["targets"]["map"]["schema"],
        con=local_engine,
        if_exists="replace",
    )


def inside_germany():
    """
    Filter zensus data by data inside Germany and population > 0
    """

    # Get database engine
    engine_local_db = db.engine()

    # Create new table
    db.execute_sql(
        f"""
        DROP TABLE IF EXISTS {DestatisZensusPopulationPerHaInsideGermany.__table__.schema}.{DestatisZensusPopulationPerHaInsideGermany.__table__.name} CASCADE;
        """
    )
    DestatisZensusPopulationPerHaInsideGermany.__table__.create(
        bind=engine_local_db, checkfirst=True
    )

    with db.session_scope() as s:

        # Query zensus cells in German boundaries from vg250
        cells_in_germany = s.query(MapZensusVg250.zensus_population_id)

        # Query relevant data from zensus population table
        q = (
            s.query(
                DestatisZensusPopulationPerHa.id,
                DestatisZensusPopulationPerHa.grid_id,
                DestatisZensusPopulationPerHa.population,
                DestatisZensusPopulationPerHa.geom_point,
                DestatisZensusPopulationPerHa.geom,
            )
            .filter(DestatisZensusPopulationPerHa.population > 0)
            .filter(DestatisZensusPopulationPerHa.id.in_(cells_in_germany))
        )

        # Insert above queried data into new table
        insert = DestatisZensusPopulationPerHaInsideGermany.__table__.insert().from_select(
            (
                DestatisZensusPopulationPerHaInsideGermany.id,
                DestatisZensusPopulationPerHaInsideGermany.grid_id,
                DestatisZensusPopulationPerHaInsideGermany.population,
                DestatisZensusPopulationPerHaInsideGermany.geom_point,
                DestatisZensusPopulationPerHaInsideGermany.geom,
            ),
            q,
        )

        # Execute and commit (trigger transactions in database)
        s.execute(insert)
        s.commit()


def population_in_municipalities():
    """
    Create table of municipalities with information about population
    """

    engine_local_db = db.engine()
    Vg250GemPopulation.__table__.drop(bind=engine_local_db, checkfirst=True)
    Vg250GemPopulation.__table__.create(bind=engine_local_db, checkfirst=True)

    srid = 3035

    gem = db.select_geodataframe(
        "SELECT * FROM boundaries.vg250_gem",
        geom_col="geometry",
        epsg=srid,
        index_col="id",
    )

    gem["area_ha"] = gem.area / 10000

    gem["area_km2"] = gem.area / 1000000

    population = db.select_dataframe(
        """SELECT id, population, vg250_municipality_id
        FROM society.destatis_zensus_population_per_ha
        INNER JOIN boundaries.egon_map_zensus_vg250 ON (
             society.destatis_zensus_population_per_ha.id =
             boundaries.egon_map_zensus_vg250.zensus_population_id)
        WHERE population > 0"""
    )

    gem["population_total"] = (
        population.groupby("vg250_municipality_id").population.sum().fillna(0)
    )

    gem["cell_count"] = population.groupby(
        "vg250_municipality_id"
    ).population.count()

    gem["population_density"] = gem["population_total"] / gem["area_km2"]

    gem.reset_index().to_postgis(
        "vg250_gem_population",
        schema="boundaries",
        con=db.engine(),
        if_exists="replace",
    )


def add_metadata_zensus_inside_ger():
    """
    Create metadata JSON for DestatisZensusPopulationPerHaInsideGermany

    Creates a metdadata JSON string and writes it to the database table comment
    """
    schema_table = ".".join(
        [
            DestatisZensusPopulationPerHaInsideGermany.__table__.schema,
            DestatisZensusPopulationPerHaInsideGermany.__table__.name,
        ]
    )

    metadata = {
        "name": schema_table,
        "title": "DESTATIS - Zensus 2011 - Population per hectar",
        "id": "WILL_BE_SET_AT_PUBLICATION",
        "description": (
            "National census in Germany in 2011 with the bounds on Germanys "
            "borders."
        ),
        "language": ["en-EN", "de-DE"],
        "publicationDate": datetime.date.today().isoformat(),
        "context": context(),
        "spatial": {
            "location": None,
            "extent": "Germany",
            "resolution": "1 ha",
        },
        "temporal": {
            "reference_date": "2011-12-31",
            "timeseries": {
                "start": None,
                "end": None,
                "resolution": None,
                "alignment": None,
                "aggregationType": None,
            },
        },
        "sources": [
            {
                "title": "Statistisches Bundesamt (Destatis) - Ergebnisse des "
                "Zensus 2011 zum Download",
                "description": (
                    "Als Download bieten wir Ihnen auf dieser Seite "
                    "zusätzlich zur Zensusdatenbank CSV- und "
                    "teilweise Excel-Tabellen mit umfassenden "
                    "Personen-, Haushalts- und Familien- sowie "
                    "Gebäude- und Wohnungsmerkmalen. Die "
                    "Ergebnisse liegen auf Bundes-, Länder-, Kreis- "
                    "und Gemeindeebene vor. Außerdem sind einzelne "
                    "Ergebnisse für Gitterzellen verfügbar."
                ),
                "path": "https://www.zensus2011.de/DE/Home/Aktuelles/"
                "DemografischeGrunddaten.html",
                "licenses": [
                    licenses_datenlizenz_deutschland(
                        attribution="© Statistische Ämter des Bundes und der "
                        "Länder 2014"
                    )
                ],
            },
            {
                "title": "Dokumentation - Zensus 2011 - Methoden und Verfahren",
                "description": (
                    "Diese Publikation beschreibt ausführlich die "
                    "Methoden und Verfahren des registergestützten "
                    "Zensus 2011; von der Datengewinnung und "
                    "-aufbereitung bis hin zur Ergebniserstellung"
                    " und Geheimhaltung. Der vorliegende Band wurde "
                    "von den Statistischen Ämtern des Bundes und "
                    "der Länder im Juni 2015 veröffentlicht."
                ),
                "path": "https://www.destatis.de/DE/Publikationen/Thematisch/Be"
                "voelkerung/Zensus/ZensusBuLaMethodenVerfahren51211051"
                "19004.pdf?__blob=publicationFile",
                "licenses": [
                    licenses_datenlizenz_deutschland(
                        attribution="© Statistisches Bundesamt, Wiesbaden "
                        "2015 (im Auftrag der "
                        "Herausgebergemeinschaft)"
                    )
                ],
            },
        ],
        "licenses": [
            licenses_datenlizenz_deutschland(
                attribution="© Statistische Ämter des Bundes und der Länder "
                "2014; © Statistisches Bundesamt, Wiesbaden 2015 "
                "(Daten verändert)"
            )
        ],
        "contributors": [
            {
                "title": "Guido Pleßmann",
                "email": "http://github.com/gplssm",
                "date": time.strftime("%Y-%m-%d"),
                "object": None,
                "comment": "Imported data",
            },
            {
                "title": "Jonathan Amme",
                "email": "http://github.com/nesnoj",
                "date": time.strftime("%Y-%m-%d"),
                "object": None,
                "comment": "Metadata extended",
            },
        ],
        "resources": [
            {
                "profile": "tabular-data-resource",
                "name": schema_table,
                "path": None,
                "format": "PostgreSQL",
                "encoding": "UTF-8",
                "schema": {
                    "fields": [
                        {
                            "name": "id",
                            "description": "Unique identifier",
                            "type": "none",
                            "unit": "integer",
                        },
                        {
                            "name": "grid_id",
                            "description": "Grid number of source",
                            "type": "string",
                            "unit": "none",
                        },
                        {
                            "name": "population",
                            "description": "Number of registred residents",
                            "type": "integer",
                            "unit": "resident",
                        },
                        {
                            "name": "geom_point",
                            "description": "Geometry centroid",
                            "type": "Geometry",
                            "unit": "none",
                        },
                        {
                            "name": "geom",
                            "description": "Geometry",
                            "type": "Geometry",
                            "unit": "",
                        },
                    ],
                    "primaryKey": ["id"],
                    "foreignKeys": [],
                },
                "dialect": {"delimiter": None, "decimalSeparator": "."},
            }
        ],
        "metaMetadata": meta_metadata(),
    }

    meta_json = "'" + json.dumps(metadata) + "'"

    db.submit_comment(
        meta_json,
        DestatisZensusPopulationPerHaInsideGermany.__table__.schema,
        DestatisZensusPopulationPerHaInsideGermany.__table__.name,
    )


def add_metadata_vg250_gem_pop():
    """
    Create metadata JSON for Vg250GemPopulation

    Creates a metdadata JSON string and writes it to the database table comment
    """
    vg250_config = egon.data.config.datasets()["vg250"]
    schema_table = ".".join(
        [
            Vg250GemPopulation.__table__.schema,
            Vg250GemPopulation.__table__.name,
        ]
    )

    licenses = [
        licenses_datenlizenz_deutschland(
            attribution="© Bundesamt für Kartographie und Geodäsie "
            "2020 (Daten verändert)"
        )
    ]

    vg250_source = {
        "title": "Verwaltungsgebiete 1:250 000 (Ebenen)",
        "description": "Der Datenbestand umfasst sämtliche Verwaltungseinheiten der "
        "hierarchischen Verwaltungsebenen vom Staat bis zu den Gemeinden "
        "mit ihren Grenzen, statistischen Schlüsselzahlen, Namen der "
        "Verwaltungseinheit sowie die spezifische Bezeichnung der "
        "Verwaltungsebene des jeweiligen Landes.",
        "path": vg250_config["original_data"]["source"]["url"],
        "licenses": licenses,
    }

    resources_fields = vg250_metadata_resources_fields()
    resources_fields.extend(
        [
            {
                "name": "area_ha",
                "description": "Area in ha",
                "type": "float",
                "unit": "ha",
            },
            {
                "name": "area_km2",
                "description": "Area in km2",
                "type": "float",
                "unit": "km2",
            },
            {
                "name": "population_total",
                "description": "Number of inhabitants",
                "type": "integer",
                "unit": "none",
            },
            {
                "name": "cell_count",
                "description": "Number of Zensus cells",
                "type": "integer",
                "unit": "none",
            },
            {
                "name": "population_density",
                "description": "Number of inhabitants per km2",
                "type": "float",
                "unit": "inhabitants/km²",
            },
        ]
    )

    metadata = {
        "name": schema_table,
        "title": (
            "Municipalities (BKG Verwaltungsgebiete 250) and population "
            "(Destatis Zensus)"
        ),
        "id": "WILL_BE_SET_AT_PUBLICATION",
        "description": "Municipality data enriched by population data",
        "language": ["de-DE"],
        "publicationDate": datetime.date.today().isoformat(),
        "context": context(),
        "spatial": {
            "location": None,
            "extent": "Germany",
            "resolution": "1:250000",
        },
        "temporal": {
            "referenceDate": "2020-01-01",
            "timeseries": {
                "start": None,
                "end": None,
                "resolution": None,
                "alignment": None,
                "aggregationType": None,
            },
        },
        "sources": [vg250_source],
        "licenses": licenses,
        "contributors": [
            {
                "title": "Guido Pleßmann",
                "email": "http://github.com/gplssm",
                "date": time.strftime("%Y-%m-%d"),
                "object": None,
                "comment": "Imported data",
            },
            {
                "title": "Jonathan Amme",
                "email": "http://github.com/nesnoj",
                "date": time.strftime("%Y-%m-%d"),
                "object": None,
                "comment": "Metadata extended",
            },
        ],
        "resources": [
            {
                "profile": "tabular-data-resource",
                "name": schema_table,
                "path": None,
                "format": "PostgreSQL",
                "encoding": "UTF-8",
                "schema": {
                    "fields": resources_fields,
                    "primaryKey": ["id"],
                    "foreignKeys": [],
                },
                "dialect": {"delimiter": None, "decimalSeparator": "."},
            }
        ],
        "metaMetadata": meta_metadata(),
    }

    meta_json = "'" + json.dumps(metadata) + "'"

    db.submit_comment(
        meta_json,
        Vg250GemPopulation.__table__.schema,
        Vg250GemPopulation.__table__.name,
    )


def add_metadata_vg250_zensus():

    # Import column names and datatypes
    fields = [
        {
            "name": "zensus_population_id",
            "description": "Index of zensus population cell",
            "type": "integer",
            "unit": "none",
        },
        {
            "name": "zensus_geom",
            "description": "Gemetry of zensus cell",
            "type": "geometry(point,3035)",
            "unit": "none",
        },
        {
            "name": "vg250_municipality_id",
            "description": "Index of municipality",
            "type": "integer",
            "unit": "none",
        },
        {
            "name": "vg250_nuts3",
            "description": "NUTS3 region-code",
            "type": "varchar",
            "unit": "none",
        },
    ]

    meta = {
        "name": "boundaries.egon_map_zensus_vg250",
        "title": "eGon feedin timeseries for RES",
        "id": "WILL_BE_SET_AT_PUBLICATION",
        "description": "Weather-dependent feedin timeseries for RES",
        "language": ["EN"],
        "publicationDate": datetime.date.today().isoformat(),
        "context": context(),
        "spatial": {
            "location": None,
            "extent": "Germany",
            "resolution": "100mx100m",
        },
        "sources": [
            sources()["zensus"],
            sources()["vg250"],
            sources()["egon-data"],
        ],
        "licenses": [
            license_ccby(
                "© Bundesamt für Kartographie und Geodäsie 2020 (Daten verändert); "
                "© Statistische Ämter des Bundes und der Länder 2014 "
                "© Jonathan Amme, Clara Büttner, Ilka Cußmann, Julian Endres, Carlos Epia, Stephan Günther, Ulf Müller, Amélia Nadal, Guido Pleßmann, Francesco Witte",
            )
        ],
        "contributors": [
            {
                "title": "Clara Büttner",
                "email": "http://github.com/ClaraBuettner",
                "date": time.strftime("%Y-%m-%d"),
                "object": None,
                "comment": "Added metadata",
            },
        ],
        "resources": [
            {
                "profile": "tabular-data-resource",
                "name": "boundaries.egon_map_zensus_vg250",
                "path": None,
                "format": "PostgreSQL",
                "encoding": "UTF-8",
                "schema": {
                    "fields": fields,
                    "primaryKey": ["index"],
                    "foreignKeys": [],
                },
                "dialect": {"delimiter": None, "decimalSeparator": "."},
            }
        ],
        "metaMetadata": meta_metadata(),
    }

    # Create json dump
    meta_json = "'" + json.dumps(meta) + "'"

    # Add metadata as a comment to the table
    db.submit_comment(
        meta_json,
        MapZensusVg250.__table__.schema,
        MapZensusVg250.__table__.name,
    )


1		import datetime
2		import json
3		import time
4
5		from geoalchemy2 import Geometry
6		from sqlalchemy import (
7		BigInteger,
8		Column,
9		Float,
10		Integer,
11		SmallInteger,
12		String,
13		func,
14		select,
15		)
16		from sqlalchemy.ext.declarative import declarative_base
17		import geopandas as gpd
18
19		from egon.data import db
20		from egon.data.datasets import Dataset
21		from egon.data.datasets.vg250 import vg250_metadata_resources_fields
22		from egon.data.metadata import (
23		context,
24		generate_resource_fields_from_sqla_model,
25		license_ccby,
26		licenses_datenlizenz_deutschland,
27		meta_metadata,
28		sources,
29		)
30		import egon.data.config
31
32		Base = declarative_base()
33
34
35		class ZensusVg250(Dataset):
36		def __init__(self, dependencies):
37		super().__init__(
38		name="ZensusVg250",
39		version="0.0.3",
40		dependencies=dependencies,
41		tasks=(
42		map_zensus_vg250,
43		inside_germany,
44		add_metadata_zensus_inside_ger,
45		population_in_municipalities,
46		add_metadata_vg250_gem_pop,
47		add_metadata_vg250_zensus,
48		),
49		)
50
51
52	View Code Duplication	class Vg250Sta(Base):
		0 ignored issues – show Duplication introduced 2021-08-05 11:02 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
53		__tablename__ = "vg250_sta"
54		__table_args__ = {"schema": "boundaries"}
55
56		id = Column(BigInteger, primary_key=True, index=True)
57		ade = Column(BigInteger)
58		gf = Column(BigInteger)
59		bsg = Column(BigInteger)
60		ars = Column(String)
61		ags = Column(String)
62		sdv_ars = Column(String)
63		gen = Column(String)
64		bez = Column(String)
65		ibz = Column(BigInteger)
66		bem = Column(String)
67		nbd = Column(String)
68		sn_l = Column(String)
69		sn_r = Column(String)
70		sn_k = Column(String)
71		sn_v1 = Column(String)
72		sn_v2 = Column(String)
73		sn_g = Column(String)
74		fk_s3 = Column(String)
75		nuts = Column(String)
76		ars_0 = Column(String)
77		ags_0 = Column(String)
78		wsk = Column(String)
79		debkg_id = Column(String)
80		rs = Column(String)
81		sdv_rs = Column(String)
82		rs_0 = Column(String)
83		geometry = Column(Geometry(srid=4326), index=True)
84
85
86	View Code Duplication	class Vg250Gem(Base):
		0 ignored issues – show Duplication introduced 2021-03-11 12:59 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
87		__tablename__ = "vg250_gem"
88		__table_args__ = {"schema": "boundaries"}
89
90		id = Column(BigInteger, primary_key=True, index=True)
91		ade = Column(BigInteger)
92		gf = Column(BigInteger)
93		bsg = Column(BigInteger)
94		ars = Column(String)
95		ags = Column(String)
96		sdv_ars = Column(String)
97		gen = Column(String)
98		bez = Column(String)
99		ibz = Column(BigInteger)
100		bem = Column(String)
101		nbd = Column(String)
102		sn_l = Column(String)
103		sn_r = Column(String)
104		sn_k = Column(String)
105		sn_v1 = Column(String)
106		sn_v2 = Column(String)
107		sn_g = Column(String)
108		fk_s3 = Column(String)
109		nuts = Column(String)
110		ars_0 = Column(String)
111		ags_0 = Column(String)
112		wsk = Column(String)
113		debkg_id = Column(String)
114		rs = Column(String)
115		sdv_rs = Column(String)
116		rs_0 = Column(String)
117		geometry = Column(Geometry(srid=4326), index=True)
118
119
120		class DestatisZensusPopulationPerHa(Base):
121		__tablename__ = "destatis_zensus_population_per_ha"
122		__table_args__ = {"schema": "society"}
123
124		id = Column(Integer, primary_key=True, index=True)
125		grid_id = Column(String(254), nullable=False)
126		x_mp = Column(Integer)
127		y_mp = Column(Integer)
128		population = Column(SmallInteger)
129		geom_point = Column(Geometry("POINT", 3035), index=True)
130		geom = Column(Geometry("POLYGON", 3035), index=True)
131
132
133		class DestatisZensusPopulationPerHaInsideGermany(Base):
134		__tablename__ = "destatis_zensus_population_per_ha_inside_germany"
135		__table_args__ = {"schema": "society"}
136
137		id = Column(Integer, primary_key=True, index=True)
138		grid_id = Column(String(254), nullable=False)
139		population = Column(SmallInteger)
140		geom_point = Column(Geometry("POINT", 3035), index=True)
141		geom = Column(Geometry("POLYGON", 3035), index=True)
142
143
144		class Vg250GemPopulation(Base):
145		__tablename__ = "vg250_gem_population"
146		__table_args__ = {"schema": "boundaries"}
147
148		id = Column(Integer, primary_key=True, index=True)
149		gen = Column(String)
150		bez = Column(String)
151		bem = Column(String)
152		nuts = Column(String)
153		ags_0 = Column(String)
154		rs_0 = Column(String)
155		area_ha = Column(Float)
156		area_km2 = Column(Float)
157		population_total = Column(Integer)
158		cell_count = Column(Integer)
159		population_density = Column(Integer)
160		geom = Column(Geometry(srid=3035))
161
162
163		class MapZensusVg250(Base):
164		__tablename__ = "egon_map_zensus_vg250"
165		__table_args__ = {"schema": "boundaries"}
166
167		zensus_population_id = Column(Integer, primary_key=True, index=True)
168		zensus_geom = Column(Geometry("POINT", 3035))
169		vg250_municipality_id = Column(Integer)
170		vg250_nuts3 = Column(String)
171
172
173		def map_zensus_vg250():
174		"""Perform mapping between municipalities and zensus grid"""
175
176		MapZensusVg250.__table__.drop(bind=db.engine(), checkfirst=True)
177		MapZensusVg250.__table__.create(bind=db.engine(), checkfirst=True)
178
179		# Get information from data configuration file
180		cfg = egon.data.config.datasets()["map_zensus_vg250"]
181
182		local_engine = db.engine()
183
184		db.execute_sql(
185		f"""DELETE FROM
186		{cfg['targets']['map']['schema']}.{cfg['targets']['map']['table']}"""
187		)
188
189		gdf = db.select_geodataframe(
190		f"""SELECT * FROM
191		{cfg['sources']['zensus_population']['schema']}.
192		{cfg['sources']['zensus_population']['table']}""",
193		geom_col="geom_point",
194		)
195
196		gdf_boundaries = db.select_geodataframe(
197		f"""SELECT * FROM {cfg['sources']['vg250_municipalities']['schema']}.
198		{cfg['sources']['vg250_municipalities']['table']}""",
199		geom_col="geometry",
200		epsg=3035,
201		)
202
203		# Join vg250 with zensus cells
204		join = gpd.sjoin(gdf, gdf_boundaries, how="inner", op="intersects")
205
206		# Deal with cells that don't interect with boundaries (e.g. at borders)
207		missing_cells = gdf[(~gdf.id.isin(join.id_left)) & (gdf.population > 0)]
208
209		# start with buffer
210		buffer = 0
211
212		# increase buffer until every zensus cell is matched to a nuts3 region
213		while len(missing_cells) > 0:
214		buffer += 100
215		boundaries_buffer = gdf_boundaries.copy()
216		boundaries_buffer.geometry = boundaries_buffer.geometry.buffer(buffer)
217		join_missing = gpd.sjoin(
218		missing_cells, boundaries_buffer, how="inner", op="intersects"
219		)
220		join = join.append(join_missing)
221		missing_cells = gdf[
222		(~gdf.id.isin(join.id_left)) & (gdf.population > 0)
223		]
224		print(f"Maximal buffer to match zensus points to vg250: {buffer}m")
225
226		# drop duplicates
227		join = join.drop_duplicates(subset=["id_left"])
228
229		# Insert results to database
230		join.rename(
231		{
232		"id_left": "zensus_population_id",
233		"geom_point": "zensus_geom",
234		"nuts": "vg250_nuts3",
235		"id_right": "vg250_municipality_id",
236		},
237		axis=1,
238		)[
239		[
240		"zensus_population_id",
241		"zensus_geom",
242		"vg250_municipality_id",
243		"vg250_nuts3",
244		]
245		].set_geometry(
246		"zensus_geom"
247		).to_postgis(
248		cfg["targets"]["map"]["table"],
249		schema=cfg["targets"]["map"]["schema"],
250		con=local_engine,
251		if_exists="replace",
252		)
253
254
255		def inside_germany():
256		"""
257		Filter zensus data by data inside Germany and population > 0
258		"""
259
260		# Get database engine
261		engine_local_db = db.engine()
262
263		# Create new table
264		db.execute_sql(
265		f"""
266		DROP TABLE IF EXISTS {DestatisZensusPopulationPerHaInsideGermany.__table__.schema}.{DestatisZensusPopulationPerHaInsideGermany.__table__.name} CASCADE;
267		"""
268		)
269		DestatisZensusPopulationPerHaInsideGermany.__table__.create(
270		bind=engine_local_db, checkfirst=True
271		)
272
273		with db.session_scope() as s:
274
275		# Query zensus cells in German boundaries from vg250
276		cells_in_germany = s.query(MapZensusVg250.zensus_population_id)
277
278		# Query relevant data from zensus population table
279		q = (
280		s.query(
281		DestatisZensusPopulationPerHa.id,
282		DestatisZensusPopulationPerHa.grid_id,
283		DestatisZensusPopulationPerHa.population,
284		DestatisZensusPopulationPerHa.geom_point,
285		DestatisZensusPopulationPerHa.geom,
286		)
287		.filter(DestatisZensusPopulationPerHa.population > 0)
288		.filter(DestatisZensusPopulationPerHa.id.in_(cells_in_germany))
289		)
290
291		# Insert above queried data into new table
292		insert = DestatisZensusPopulationPerHaInsideGermany.__table__.insert().from_select(
293		(
294		DestatisZensusPopulationPerHaInsideGermany.id,
295		DestatisZensusPopulationPerHaInsideGermany.grid_id,
296		DestatisZensusPopulationPerHaInsideGermany.population,
297		DestatisZensusPopulationPerHaInsideGermany.geom_point,
298		DestatisZensusPopulationPerHaInsideGermany.geom,
299		),
300		q,
301		)
302
303		# Execute and commit (trigger transactions in database)
304		s.execute(insert)
305		s.commit()
306
307
308		def population_in_municipalities():
309		"""
310		Create table of municipalities with information about population
311		"""
312
313		engine_local_db = db.engine()
314		Vg250GemPopulation.__table__.drop(bind=engine_local_db, checkfirst=True)
315		Vg250GemPopulation.__table__.create(bind=engine_local_db, checkfirst=True)
316
317		srid = 3035
318
319		gem = db.select_geodataframe(
320		"SELECT * FROM boundaries.vg250_gem",
321		geom_col="geometry",
322		epsg=srid,
323		index_col="id",
324		)
325
326		gem["area_ha"] = gem.area / 10000
327
328		gem["area_km2"] = gem.area / 1000000
329
330		population = db.select_dataframe(
331		"""SELECT id, population, vg250_municipality_id
332		FROM society.destatis_zensus_population_per_ha
333		INNER JOIN boundaries.egon_map_zensus_vg250 ON (
334		society.destatis_zensus_population_per_ha.id =
335		boundaries.egon_map_zensus_vg250.zensus_population_id)
336		WHERE population > 0"""
337		)
338
339		gem["population_total"] = (
340		population.groupby("vg250_municipality_id").population.sum().fillna(0)
341		)
342
343		gem["cell_count"] = population.groupby(
344		"vg250_municipality_id"
345		).population.count()
346
347		gem["population_density"] = gem["population_total"] / gem["area_km2"]
348
349		gem.reset_index().to_postgis(
350		"vg250_gem_population",
351		schema="boundaries",
352		con=db.engine(),
353		if_exists="replace",
354		)
355
356
357		def add_metadata_zensus_inside_ger():
358		"""
359		Create metadata JSON for DestatisZensusPopulationPerHaInsideGermany
360
361		Creates a metdadata JSON string and writes it to the database table comment
362		"""
363		schema_table = ".".join(
364		[
365		DestatisZensusPopulationPerHaInsideGermany.__table__.schema,
366		DestatisZensusPopulationPerHaInsideGermany.__table__.name,
367		]
368		)
369
370		metadata = {
371		"name": schema_table,
372		"title": "DESTATIS - Zensus 2011 - Population per hectar",
373		"id": "WILL_BE_SET_AT_PUBLICATION",
374		"description": (
375		"National census in Germany in 2011 with the bounds on Germanys "
376		"borders."
377		),
378		"language": ["en-EN", "de-DE"],
379		"publicationDate": datetime.date.today().isoformat(),
380		"context": context(),
381		"spatial": {
382		"location": None,
383		"extent": "Germany",
384		"resolution": "1 ha",
385		},
386		"temporal": {
387		"reference_date": "2011-12-31",
388		"timeseries": {
389		"start": None,
390		"end": None,
391		"resolution": None,
392		"alignment": None,
393		"aggregationType": None,
394		},
395		},
396		"sources": [
397		{
398		"title": "Statistisches Bundesamt (Destatis) - Ergebnisse des "
399		"Zensus 2011 zum Download",
400		"description": (
401		"Als Download bieten wir Ihnen auf dieser Seite "
402		"zusätzlich zur Zensusdatenbank CSV- und "
403		"teilweise Excel-Tabellen mit umfassenden "
404		"Personen-, Haushalts- und Familien- sowie "
405		"Gebäude- und Wohnungsmerkmalen. Die "
406		"Ergebnisse liegen auf Bundes-, Länder-, Kreis- "
407		"und Gemeindeebene vor. Außerdem sind einzelne "
408		"Ergebnisse für Gitterzellen verfügbar."
409		),
410		"path": "https://www.zensus2011.de/DE/Home/Aktuelles/"
411		"DemografischeGrunddaten.html",
412		"licenses": [
413		licenses_datenlizenz_deutschland(
414		attribution="© Statistische Ämter des Bundes und der "
415		"Länder 2014"
416		)
417		],
418		},
419		{
420		"title": "Dokumentation - Zensus 2011 - Methoden und Verfahren",
421		"description": (
422		"Diese Publikation beschreibt ausführlich die "
423		"Methoden und Verfahren des registergestützten "
424		"Zensus 2011; von der Datengewinnung und "
425		"-aufbereitung bis hin zur Ergebniserstellung"
426		" und Geheimhaltung. Der vorliegende Band wurde "
427		"von den Statistischen Ämtern des Bundes und "
428		"der Länder im Juni 2015 veröffentlicht."
429		),
430		"path": "https://www.destatis.de/DE/Publikationen/Thematisch/Be"
431		"voelkerung/Zensus/ZensusBuLaMethodenVerfahren51211051"
432		"19004.pdf?__blob=publicationFile",
433		"licenses": [
434		licenses_datenlizenz_deutschland(
435		attribution="© Statistisches Bundesamt, Wiesbaden "
436		"2015 (im Auftrag der "
437		"Herausgebergemeinschaft)"
438		)
439		],
440		},
441		],
442		"licenses": [
443		licenses_datenlizenz_deutschland(
444		attribution="© Statistische Ämter des Bundes und der Länder "
445		"2014; © Statistisches Bundesamt, Wiesbaden 2015 "
446		"(Daten verändert)"
447		)
448		],
449		"contributors": [
450		{
451		"title": "Guido Pleßmann",
452		"email": "http://github.com/gplssm",
453		"date": time.strftime("%Y-%m-%d"),
454		"object": None,
455		"comment": "Imported data",
456		},
457		{
458		"title": "Jonathan Amme",
459		"email": "http://github.com/nesnoj",
460		"date": time.strftime("%Y-%m-%d"),
461		"object": None,
462		"comment": "Metadata extended",
463		},
464		],
465		"resources": [
466		{
467		"profile": "tabular-data-resource",
468		"name": schema_table,
469		"path": None,
470		"format": "PostgreSQL",
471		"encoding": "UTF-8",
472		"schema": {
473		"fields": [
474		{
475		"name": "id",
476		"description": "Unique identifier",
477		"type": "none",
478		"unit": "integer",
479		},
480		{
481		"name": "grid_id",
482		"description": "Grid number of source",
483		"type": "string",
484		"unit": "none",
485		},
486		{
487		"name": "population",
488		"description": "Number of registred residents",
489		"type": "integer",
490		"unit": "resident",
491		},
492		{
493		"name": "geom_point",
494		"description": "Geometry centroid",
495		"type": "Geometry",
496		"unit": "none",
497		},
498		{
499		"name": "geom",
500		"description": "Geometry",
501		"type": "Geometry",
502		"unit": "",
503		},
504		],
505		"primaryKey": ["id"],
506		"foreignKeys": [],
507		},
508		"dialect": {"delimiter": None, "decimalSeparator": "."},
509		}
510		],
511		"metaMetadata": meta_metadata(),
512		}
513
514		meta_json = "'" + json.dumps(metadata) + "'"
515
516		db.submit_comment(
517		meta_json,
518		DestatisZensusPopulationPerHaInsideGermany.__table__.schema,
519		DestatisZensusPopulationPerHaInsideGermany.__table__.name,
520		)
521
522
523		def add_metadata_vg250_gem_pop():
524		"""
525		Create metadata JSON for Vg250GemPopulation
526
527		Creates a metdadata JSON string and writes it to the database table comment
528		"""
529		vg250_config = egon.data.config.datasets()["vg250"]
530		schema_table = ".".join(
531		[
532		Vg250GemPopulation.__table__.schema,
533		Vg250GemPopulation.__table__.name,
534		]
535		)
536
537		licenses = [
538		licenses_datenlizenz_deutschland(
539		attribution="© Bundesamt für Kartographie und Geodäsie "
540		"2020 (Daten verändert)"
541		)
542		]
543
544		vg250_source = {
545		"title": "Verwaltungsgebiete 1:250 000 (Ebenen)",
546		"description": "Der Datenbestand umfasst sämtliche Verwaltungseinheiten der "
547		"hierarchischen Verwaltungsebenen vom Staat bis zu den Gemeinden "
548		"mit ihren Grenzen, statistischen Schlüsselzahlen, Namen der "
549		"Verwaltungseinheit sowie die spezifische Bezeichnung der "
550		"Verwaltungsebene des jeweiligen Landes.",
551		"path": vg250_config["original_data"]["source"]["url"],
552		"licenses": licenses,
553		}
554
555		resources_fields = vg250_metadata_resources_fields()
556		resources_fields.extend(
557		[
558		{
559		"name": "area_ha",
560		"description": "Area in ha",
561		"type": "float",
562		"unit": "ha",
563		},
564		{
565		"name": "area_km2",
566		"description": "Area in km2",
567		"type": "float",
568		"unit": "km2",
569		},
570		{
571		"name": "population_total",
572		"description": "Number of inhabitants",
573		"type": "integer",
574		"unit": "none",
575		},
576		{
577		"name": "cell_count",
578		"description": "Number of Zensus cells",
579		"type": "integer",
580		"unit": "none",
581		},
582		{
583		"name": "population_density",
584		"description": "Number of inhabitants per km2",
585		"type": "float",
586		"unit": "inhabitants/km²",
587		},
588		]
589		)
590
591		metadata = {
592		"name": schema_table,
593		"title": (
594		"Municipalities (BKG Verwaltungsgebiete 250) and population "
595		"(Destatis Zensus)"
596		),
597		"id": "WILL_BE_SET_AT_PUBLICATION",
598		"description": "Municipality data enriched by population data",
599		"language": ["de-DE"],
600		"publicationDate": datetime.date.today().isoformat(),
601		"context": context(),
602		"spatial": {
603		"location": None,
604		"extent": "Germany",
605		"resolution": "1:250000",
606		},
607		"temporal": {
608		"referenceDate": "2020-01-01",
609		"timeseries": {
610		"start": None,
611		"end": None,
612		"resolution": None,
613		"alignment": None,
614		"aggregationType": None,
615		},
616		},
617		"sources": [vg250_source],
618		"licenses": licenses,
619		"contributors": [
620		{
621		"title": "Guido Pleßmann",
622		"email": "http://github.com/gplssm",
623		"date": time.strftime("%Y-%m-%d"),
624		"object": None,
625		"comment": "Imported data",
626		},
627		{
628		"title": "Jonathan Amme",
629		"email": "http://github.com/nesnoj",
630		"date": time.strftime("%Y-%m-%d"),
631		"object": None,
632		"comment": "Metadata extended",
633		},
634		],
635		"resources": [
636		{
637		"profile": "tabular-data-resource",
638		"name": schema_table,
639		"path": None,
640		"format": "PostgreSQL",
641		"encoding": "UTF-8",
642		"schema": {
643		"fields": resources_fields,
644		"primaryKey": ["id"],
645		"foreignKeys": [],
646		},
647		"dialect": {"delimiter": None, "decimalSeparator": "."},
648		}
649		],
650		"metaMetadata": meta_metadata(),
651		}
652
653		meta_json = "'" + json.dumps(metadata) + "'"
654
655		db.submit_comment(
656		meta_json,
657		Vg250GemPopulation.__table__.schema,
658		Vg250GemPopulation.__table__.name,
659		)
660
661
662	View Code Duplication	def add_metadata_vg250_zensus():
		0 ignored issues – show Duplication introduced 2021-12-13 16:43 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
663		# Import column names and datatypes
664		fields = [
665		{
666		"name": "zensus_population_id",
667		"description": "Index of zensus population cell",
668		"type": "integer",
669		"unit": "none",
670		},
671		{
672		"name": "zensus_geom",
673		"description": "Gemetry of zensus cell",
674		"type": "geometry(point,3035)",
675		"unit": "none",
676		},
677		{
678		"name": "vg250_municipality_id",
679		"description": "Index of municipality",
680		"type": "integer",
681		"unit": "none",
682		},
683		{
684		"name": "vg250_nuts3",
685		"description": "NUTS3 region-code",
686		"type": "varchar",
687		"unit": "none",
688		},
689		]
690
691		meta = {
692		"name": "boundaries.egon_map_zensus_vg250",
693		"title": "eGon feedin timeseries for RES",
694		"id": "WILL_BE_SET_AT_PUBLICATION",
695		"description": "Weather-dependent feedin timeseries for RES",
696		"language": ["EN"],
697		"publicationDate": datetime.date.today().isoformat(),
698		"context": context(),
699		"spatial": {
700		"location": None,
701		"extent": "Germany",
702		"resolution": "100mx100m",
703		},
704		"sources": [
705		sources()["zensus"],
706		sources()["vg250"],
707		sources()["egon-data"],
708		],
709		"licenses": [
710		license_ccby(
711		"© Bundesamt für Kartographie und Geodäsie 2020 (Daten verändert); "
712		"© Statistische Ämter des Bundes und der Länder 2014 "
713		"© Jonathan Amme, Clara Büttner, Ilka Cußmann, Julian Endres, Carlos Epia, Stephan Günther, Ulf Müller, Amélia Nadal, Guido Pleßmann, Francesco Witte",
714		)
715		],
716		"contributors": [
717		{
718		"title": "Clara Büttner",
719		"email": "http://github.com/ClaraBuettner",
720		"date": time.strftime("%Y-%m-%d"),
721		"object": None,
722		"comment": "Added metadata",
723		},
724		],
725		"resources": [
726		{
727		"profile": "tabular-data-resource",
728		"name": "boundaries.egon_map_zensus_vg250",
729		"path": None,
730		"format": "PostgreSQL",
731		"encoding": "UTF-8",
732		"schema": {
733		"fields": fields,
734		"primaryKey": ["index"],
735		"foreignKeys": [],
736		},
737		"dialect": {"delimiter": None, "decimalSeparator": "."},
738		}
739		],
740		"metaMetadata": meta_metadata(),
741		}
742
743		# Create json dump
744		meta_json = "'" + json.dumps(meta) + "'"
745
746		# Add metadata as a comment to the table
747		db.submit_comment(
748		meta_json,
749		MapZensusVg250.__table__.schema,
750		MapZensusVg250.__table__.name,
751		)
752

openego / eGon-data

Pull Request — dev (#568)

data.datasets.zensus_vg250 A

Complexity

Size/Duplication

Importance

6 Functions

1 Method

How to fix Duplicated Code

Duplicated Code

Duplication Side-by-Side

Filter issues like