Skip to content

Commit

Permalink
Fix 2024 population estimates (impute with 2022/2023)
Browse files Browse the repository at this point in the history
  • Loading branch information
sid-kap committed Apr 15, 2024
1 parent c6b1e02 commit 5833a8c
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 50 deletions.
2 changes: 1 addition & 1 deletion lib/common_elements.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ const navItems = [
name: "States",
},
{
url: "/metros/Bakersfield_CA",
url: "/metros/Abilene_TX",
name: "Metros",
},
{
Expand Down
17 changes: 15 additions & 2 deletions python/housing_data/build_data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,13 +290,26 @@ def add_current_year_projections(year_to_date_df: pd.DataFrame) -> pd.DataFrame:
return year_to_date_df


def impute_2023_population(df_2020s: pd.DataFrame) -> pd.DataFrame:
def impute_2023_and_2024_population(df_2020s: pd.DataFrame) -> pd.DataFrame:
"""
Impute 2023 with the 2022 population
Impute 2023 and 2024 with the 2022 population
"""
return pd.concat(
[
df_2020s,
df_2020s[df_2020s["year"] == "2022"].assign(year="2023"),
df_2020s[df_2020s["year"] == "2022"].assign(year="2024"),
]
)


def impute_2024_population(df_2020s: pd.DataFrame) -> pd.DataFrame:
"""
Impute 2024 with the 2023 population
"""
return pd.concat(
[
df_2020s,
df_2020s[df_2020s["year"] == "2023"].assign(year="2024"),
]
)
43 changes: 11 additions & 32 deletions python/housing_data/county_population.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

import pandas as pd
import us
from housing_data.build_data_utils import impute_2023_population
from housing_data.data_loading_helpers import get_path, get_url_text
from housing_data.build_data_utils import impute_2024_population
from housing_data.data_loading_helpers import get_url_text
from housing_data.fips_crosswalk import load_fips_crosswalk


Expand All @@ -21,36 +21,21 @@ def _melt_df(df: pd.DataFrame, years: list[int]) -> pd.DataFrame:


def get_county_populations_2020s(data_path: Optional[Path]) -> pd.DataFrame:
df = pd.read_csv(
get_path(
"https://www2.census.gov/programs-surveys/popest/datasets/2020-2022/counties/totals/co-est2022-alldata.csv",
data_path,
),
)
df = pd.read_csv(data_path / "co-est2023-alldata.csv", encoding="latin_1")

df = _melt_df(df, list(range(2020, 2023)))
return impute_2023_population(df)
df = _melt_df(df, list(range(2020, 2024)))
return impute_2024_population(df)


def get_county_populations_2010s(data_path: Optional[Path]) -> pd.DataFrame:
df = pd.read_csv(
get_path(
"https://www2.census.gov/programs-surveys/popest/datasets/2010-2020/counties/totals/co-est2020-alldata.csv",
data_path,
),
encoding="latin_1",
)
df = pd.read_csv(data_path / "co-est2020-alldata.csv", encoding="latin_1")

return _melt_df(df, list(range(2010, 2020)))


def get_county_populations_2000s(data_path: Path, data_repo_path: Path) -> pd.DataFrame:
urls = [
(
state.fips,
f"https://www2.census.gov/programs-surveys/popest/tables/2000-2010/"
f"intercensal/county/co-est00int-01-{state.fips}.csv",
)
paths = [
(state.fips, f"co-est00int-01-{state.fips}.csv")
for state in us.STATES_AND_TERRITORIES + [us.states.DC]
if state.fips not in ["60", "66", "69", "72", "78"] # exclude territories
]
Expand All @@ -73,9 +58,9 @@ def get_county_populations_2000s(data_path: Path, data_repo_path: Path) -> pd.Da
]

dfs = []
for state_code, url in urls:
for state_code, path in paths:
df = pd.read_csv(
get_path(url, data_path),
data_path / path,
names=col_names,
skiprows=4,
skipfooter=8,
Expand Down Expand Up @@ -190,13 +175,7 @@ def get_county_populations_1990s(data_path: Optional[Path]) -> pd.DataFrame:
def get_county_populations_1980s(data_path: Optional[Path]) -> pd.DataFrame:
dfs = []
for year in range(1980, 1990):
df = pd.read_excel(
get_path(
f"https://www2.census.gov/programs-surveys/popest/tables/1980-1990/counties/asrh/pe-02-{year}.xls",
data_path,
),
skiprows=5,
)
df = pd.read_excel(data_path / f"pe-02-{year}.xls", skiprows=5)
df = df.rename(
columns={
"Year of Estimate": "year",
Expand Down
4 changes: 2 additions & 2 deletions python/housing_data/place_population.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import numpy as np
import pandas as pd
from housing_data.build_data_utils import impute_2023_population
from housing_data.build_data_utils import impute_2023_and_2024_population
from housing_data.data_loading_helpers import get_path, get_url_text


Expand Down Expand Up @@ -432,7 +432,7 @@ def get_place_populations_2020s(data_path: Optional[Path]) -> pd.DataFrame:
),
)
df = _melt_df(df, years=list(range(2020, 2023)))
df = impute_2023_population(df)
df = impute_2023_and_2024_population(df)
return df


Expand Down
18 changes: 5 additions & 13 deletions python/housing_data/state_population.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import pandas as pd
import us
from housing_data.build_data_utils import impute_2023_population
from housing_data.build_data_utils import impute_2024_population
from housing_data.data_loading_helpers import get_path, get_url_text

DIVISIONS = {
Expand Down Expand Up @@ -119,10 +119,7 @@ def _get_counties_population_table_1990s(
assert 1990 <= year <= 1999

df = pd.read_csv(
get_path(
f"https://www2.census.gov/programs-surveys/popest/tables/1990-2000/intercensal/st-co/stch-icen{year}.txt",
data_path,
),
data_path / f"stch-icen{year}.txt",
delim_whitespace=True,
names=[
"year",
Expand Down Expand Up @@ -223,15 +220,10 @@ def get_state_populations_2010s(data_path: Optional[Path]) -> pd.DataFrame:


def get_state_populations_2020s(data_path: Optional[Path]) -> pd.DataFrame:
df = pd.read_csv(
get_path(
"https://www2.census.gov/programs-surveys/popest/datasets/2020-2022/state/totals/NST-EST2022-ALLDATA.csv",
data_path,
)
)
df = pd.read_csv(data_path / "NST-EST2023-ALLDATA.csv")

df = _melt_df(df, list(range(2020, 2023)))
return impute_2023_population(df)
df = _melt_df(df, list(range(2020, 2024)))
return impute_2024_population(df)


def get_state_population_estimates(data_path: Optional[Path]) -> pd.DataFrame:
Expand Down

0 comments on commit 5833a8c

Please sign in to comment.