Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 66 additions & 14 deletions python/startup.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,79 @@
# Container for the Startup module. See the Estimates-Program wiki page for more
# details: https://github.com/SANDAG/Estimates-Program/wiki/Startup

import pandas as pd
import sqlalchemy as sql

import python.utils as utils
import python.tests as tests


def run_startup(debug: bool):
"""Control function to call the correct functions in the correct order"""
# Startup requires no input data
# Startup requires no processing of input data
_insert_outputs(debug)
"""Orchestrator function to grab MGRA data, validate, and insert.

Inserts MGRA geography data from SANDAG's GeoAnalyst database into the
production database. The data could be directly inserted via a single SQL
statement but it is instead brought into Python to allow for validation
and to be written out to csv files for debugging purposes.

def _insert_outputs(debug: bool):
"""Insert output data related to the Startup module"""
Functionality is segmented into functions for code encapsulation:
_get_startup_inputs - Get MGRA data from GeoAnalyst
_validate_startup_inputs - Validate MGRA data
_insert_startup_outputs - Insert MGRA data to the production database

# Skip insertion if running in debug mode
if debug:
return
Args:
debug (bool): Whether to run in debug mode
"""
mgra = _get_startup_inputs()
_validate_startup_inputs(mgra)

# Insert the MGRA geography
_insert_startup_outputs(mgra, debug)


def _get_startup_inputs() -> pd.DataFrame:
"""Get input data related to the Startup module"""
with utils.ESTIMATES_ENGINE.connect() as con:
with open(utils.SQL_FOLDER / "insert_mgra.sql") as file:
query = sql.text(file.read())
con.execute(query, {"run_id": utils.RUN_ID, "mgra": utils.MGRA_VERSION})
con.commit()
with open(utils.SQL_FOLDER / "startup/get_mgra.sql") as file:
mgra = pd.read_sql_query(
sql=sql.text(file.read()),
con=con,
params={
"run_id": utils.RUN_ID,
"mgra_version": utils.MGRA_VERSION,
"insert_switch": 0, # return tabular data only
}, # type: ignore
)

return mgra


def _validate_startup_inputs(mgra: pd.DataFrame) -> None:
"""Validate input data related to the Startup module"""
tests.validate_data(
"MGRA Geography",
mgra,
row_count={"key_columns": {"mgra"}},
negative={},
null={},
)


def _insert_startup_outputs(mgra: pd.DataFrame, debug: bool) -> None:
"""Insert output data related to the Startup module"""
# Save locally if in debug mode
if debug:
mgra.to_csv(utils.DEBUG_OUTPUT_FOLDER / "inputs_mgra.csv", index=False)
else:
# Insert the MGRA geography to the database
with utils.ESTIMATES_ENGINE.connect() as con:
with open(utils.SQL_FOLDER / "startup/get_mgra.sql") as file:
query = sql.text(file.read())
con.execute(
query,
{
"run_id": utils.RUN_ID,
"mgra_version": utils.MGRA_VERSION,
"insert_switch": 1, # write data to database
},
)
con.commit()
11 changes: 11 additions & 0 deletions sql/create_objects.sql
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ CREATE TABLE [inputs].[controls_ase] (
CONSTRAINT [fk_inputs_controls_ase_run_id] FOREIGN KEY ([run_id]) REFERENCES [metadata].[run] ([run_id]),
CONSTRAINT [chk_non_negative_inputs_controls_ase] CHECK ([value] >= 0)
)
GO

CREATE TABLE [inputs].[controls_tract] (
[run_id] INT NOT NULL,
Expand All @@ -45,6 +46,7 @@ CREATE TABLE [inputs].[controls_tract] (
CONSTRAINT [fk_inputs_controls_tract_run_id] FOREIGN KEY ([run_id]) REFERENCES [metadata].[run] ([run_id]),
CONSTRAINT [chk_non_negative_inputs_controls_tract] CHECK ([value] >= 0)
)
GO

CREATE TABLE [inputs].[controls_city] (
[run_id] INT NOT NULL,
Expand All @@ -57,10 +59,13 @@ CREATE TABLE [inputs].[controls_city] (
CONSTRAINT [fk_inputs_controls_city_run_id] FOREIGN KEY ([run_id]) REFERENCES [metadata].[run] ([run_id]),
CONSTRAINT [chk_non_negative_inputs_controls_city] CHECK ([value] >= 0)
)
GO

CREATE TABLE [inputs].[mgra] (
[run_id] INT NOT NULL,
[mgra] INT NOT NULL,
[2010_census_blockgroup] NVARCHAR(12) NOT NULL,
[2020_census_blockgroup] NVARCHAR(12) NOT NULL,
[2010_census_tract] NVARCHAR(11) NOT NULL,
[2020_census_tract] NVARCHAR(11) NOT NULL,
[puma00] nvarchar(5) NOT NULL,
Expand All @@ -72,6 +77,7 @@ CREATE TABLE [inputs].[mgra] (
CONSTRAINT [fk_inputs_mgra_run_id] FOREIGN KEY ([run_id]) REFERENCES [metadata].[run] ([run_id])
-- No non-negative CHECK here as these values are directly pulled from [GeoDepot]
) WITH (DATA_COMPRESSION = PAGE)
GO

CREATE TABLE [inputs].[special_mgras] (
[id] INT IDENTITY(1,1),
Expand Down Expand Up @@ -142,6 +148,7 @@ CREATE TABLE [outputs].[ase] (
CONSTRAINT [fk_outputs_ase_mgra] FOREIGN KEY ([run_id], [mgra]) REFERENCES [inputs].[mgra] ([run_id], [mgra]),
CONSTRAINT [chk_non_negative_outputs_ase] CHECK ([value] >= 0)
)
GO

-- For purposes of data insertion speed, only non-zero ASE data is inserted into
-- [outputs].[ase]. In case you want the full table with zeros, you can use the below
Expand Down Expand Up @@ -222,6 +229,7 @@ BEGIN
AND [shell].[ethnicity] = [ase].[ethnicity]
RETURN;
END
GO

CREATE TABLE [outputs].[gq] (
[run_id] INT NOT NULL,
Expand All @@ -235,6 +243,7 @@ CREATE TABLE [outputs].[gq] (
CONSTRAINT [fk_outputs_gq_mgra] FOREIGN KEY ([run_id], [mgra]) REFERENCES [inputs].[mgra] ([run_id], [mgra]),
CONSTRAINT [chk_non_negative_outputs_gq] CHECK ([value] >= 0)
)
GO

CREATE TABLE [outputs].[hh] (
[run_id] INT NOT NULL,
Expand All @@ -248,6 +257,7 @@ CREATE TABLE [outputs].[hh] (
CONSTRAINT [fk_outputs_hh_mgra] FOREIGN KEY ([run_id], [mgra]) REFERENCES [inputs].[mgra] ([run_id], [mgra]),
CONSTRAINT [chk_non_negative_outputs_hh] CHECK ([value] >= 0)
)
GO

CREATE TABLE [outputs].[hh_characteristics] (
[run_id] INT NOT NULL,
Expand All @@ -261,6 +271,7 @@ CREATE TABLE [outputs].[hh_characteristics] (
CONSTRAINT [fk_outputs_hh_characteristics_mgra] FOREIGN KEY ([run_id], [mgra]) REFERENCES [inputs].[mgra] ([run_id], [mgra]),
CONSTRAINT [chk_non_negative_outputs_hh_characteristics] CHECK ([value] >= 0)
)
GO

CREATE TABLE [outputs].[hs] (
[run_id] INT NOT NULL,
Expand Down
174 changes: 0 additions & 174 deletions sql/insert_mgra.sql

This file was deleted.

Loading
Loading