Skip to content

Commit

Permalink
🔀 Merge branch 'round_grids_to_250' (#155)
Browse files Browse the repository at this point in the history
Closes #155 Re-grid and re-tile groundtruth data with coordinates rounded to 250.
  • Loading branch information
weiji14 committed Jun 21, 2019
2 parents 1edb16e + 77b4fe1 commit 259398c
Show file tree
Hide file tree
Showing 10 changed files with 4,911 additions and 5,066 deletions.
141 changes: 77 additions & 64 deletions data_prep.ipynb

Large diffs are not rendered by default.

56 changes: 36 additions & 20 deletions data_prep.py
Expand Up @@ -30,6 +30,7 @@
import shutil
import sys
import tarfile
import tempfile
import urllib
import yaml
import zipfile
Expand Down Expand Up @@ -306,6 +307,7 @@ def ascii_to_xyz(pipeline_file: str) -> pd.DataFrame:
)
for f in files
)
df.dropna(axis="index", inplace=True) # drop rows with NaN values
df.reset_index(drop=True, inplace=True) # reset index after concatenation

## Advanced table read with conversions
Expand Down Expand Up @@ -354,19 +356,32 @@ def ascii_to_xyz(pipeline_file: str) -> pd.DataFrame:
# ![Clean XYZ Table to Raster Grid via interpolation function](https://yuml.me/diagram/scruffy;dir:LR/class/[Clean-XYZ-Table|*.xyz]->[Interpolation-Function],[Interpolation-Function]->[Raster-Grid|*.tif/*.nc])

# %%
def get_region(xyz_data: pd.DataFrame) -> str:
def get_region(xyz_data: pd.DataFrame, round_increment: int = 250) -> str:
"""
Gets the bounding box region of an xyz pandas.DataFrame in string
format xmin/xmax/ymin/ymax rounded to 5 decimal places.
Used for the -R 'region of interest' parameter in GMT.
>>> xyz_data = pd.DataFrame(np.random.RandomState(seed=42).rand(30).reshape(10, 3))
Gets an extended bounding box region for points in an xyz pandas.DataFrame with
columns x, y, and z. The coordinates will be rounded to values specified by the
round_increment parameter. Implementation uses gmt.info with the -I (increment)
setting, see also https://gmt.soest.hawaii.edu/doc/latest/gmtinfo.html#i
The output region is returned in a string format 'xmin/xmax/ymin/ymax' directly
usable as the -R 'region of interest' parameter in GMT. Indeed, the rounding is
specifically optimized to give grid dimensions for fastest results in programs like
GMT surface.
>>> xyz_data = pd.DataFrame(
... 10000 * np.random.RandomState(seed=42).rand(30).reshape(10, 3),
... columns=["x", "y", "z"],
... )
>>> get_region(xyz_data=xyz_data)
'0.05808/0.83244/0.02058/0.95071'
'-250/9500/0/9750'
"""
xmin, ymin, _ = xyz_data.min(axis="rows")
xmax, ymax, _ = xyz_data.max(axis="rows")
return f"{xmin:.5f}/{xmax:.5f}/{ymin:.5f}/{ymax:.5f}"
assert (xyz_data.columns == pd.Index(data=["x", "y", "z"], dtype="object")).all()

with tempfile.NamedTemporaryFile(suffix=".csv") as tmpfile:
xyz_data.to_csv(tmpfile.name, header=False, index=False)
region = gmt.info(fname=tmpfile.name, I=f"s{round_increment}").strip()[2:]

return region


# %%
Expand All @@ -381,18 +396,19 @@ def xyz_to_grid(
"""
Performs interpolation of x, y, z point data to a raster grid.
>>> xyz_data = 1000*pd.DataFrame(np.random.RandomState(seed=42).rand(60).reshape(20, 3))
>>> xyz_data = pd.DataFrame(
... 600 * np.random.RandomState(seed=42).rand(60).reshape(20, 3),
... columns=["x", "y", "z"],
... )
>>> region = get_region(xyz_data=xyz_data)
>>> grid = xyz_to_grid(xyz_data=xyz_data, region=region, spacing=250)
>>> grid.to_array().shape
(1, 5, 5)
(1, 4, 4)
>>> grid.to_array().values
array([[[403.17618 , 544.92535 , 670.7824 , 980.75055 , 961.47723 ],
[379.0757 , 459.26407 , 314.38297 , 377.78555 , 546.0469 ],
[450.67664 , 343.26 , 88.391594, 260.10492 , 452.3337 ],
[586.09906 , 469.74008 , 216.8168 , 486.9802 , 642.2116 ],
[451.4794 , 652.7244 , 325.77896 , 879.8973 , 916.7921 ]]],
dtype=float32)
array([[[135.95927, 294.8152 , 615.32513, 706.4135 ],
[224.46773, 191.75693, 298.87057, 521.85254],
[242.41916, 149.34332, 430.84137, 603.4236 ],
[154.60516, 194.24237, 447.6185 , 645.13574]]], dtype=float32)
"""
## Preprocessing with blockmedian
with gmt.helpers.GMTTempFile(suffix=".txt") as tmpfile:
Expand All @@ -414,7 +430,7 @@ def xyz_to_grid(
region=region,
spacing=f"{spacing}+e",
T=tension,
V="",
V="n", # normal verbosity: produce only fatal error messages
M=f"{mask_cell_radius}c",
)

Expand All @@ -435,7 +451,7 @@ def xyz_to_grid(
grid_dict[name] = xyz_to_grid(
xyz_data=xyz_data, region=region, outfile=f"highres/{name}.nc"
)
print(f"done! {grid_dict[name].to_array().shape}")
print(f"done! {grid_dict[name].z.shape}")

# %% [markdown]
# ### 2.3 Plot raster grids
Expand Down
477 changes: 214 additions & 263 deletions deepbedmap.ipynb

Large diffs are not rendered by default.

5 changes: 2 additions & 3 deletions deepbedmap.py
Expand Up @@ -108,11 +108,10 @@ def get_image_with_bounds(filepaths: list, indexers: dict = None) -> xr.DataArra


# %%
test_filepaths = ["highres/2007tx", "highres/2010tr", "highres/istarxx"]
test_filepaths = ["highres/2007tx"] # , "highres/2010tr", "highres/istarxx"]
groundtruth = get_image_with_bounds(
filepaths=[f"{t}.nc" for t in test_filepaths],
indexers={"y": slice(0, -1), "x": slice(0, -1)}, # for 2007tx, 2010tr and istarxx
# indexers={"y": slice(1, -1), "x": slice(1, -1)}, # for 2007tx only
# indexers={"y": slice(0, -1), "x": slice(0, -1)}, # for 2007tx, 2010tr and istarxx
)

# %% [markdown]
Expand Down
12 changes: 8 additions & 4 deletions features/environment.py
Expand Up @@ -84,10 +84,13 @@ def _quick_download_lowres_misc_datasets():
print("done!")


def _download_deepbedmap_model_weights_from_comet():
def _download_deepbedmap_model_weights_from_comet(experiment_key: str = "latest"):
"""
Download latest neural network model weights from Comet.ML
Uses their Python REST API class at https://www.comet.ml/docs/python-sdk/API/
Download DeepBedMap's Generator neural network model weights from Comet.ML
By default, the model weights from the latest experimental run are downloaded
Passing in an alternative experiment_key hash will download that one instead
Uses Comet.ML's Python REST API class at https://www.comet.ml/docs/python-sdk/API/
Requires the COMET_REST_API_KEY environment variable to be set in the .env file
"""
comet_api = comet_ml.API(
Expand All @@ -99,7 +102,8 @@ def _download_deepbedmap_model_weights_from_comet():
df = pd.io.json.json_normalize(data=project.data["experiments"].values())

# Get the key to the latest DeepBedMap experiment on Comet ML
experiment_key = df.loc[df["start_server_timestamp"].idxmax()].experiment_key
if experiment_key == "latest":
experiment_key = df.loc[df["start_server_timestamp"].idxmax()].experiment_key
experiment = comet_api.get(
workspace="weiji14", project="deepbedmap", experiment=experiment_key
)
Expand Down
6 changes: 4 additions & 2 deletions features/steps/test_data_prep.py
Expand Up @@ -45,7 +45,8 @@ def process_data_through_pipeline_and_get_output(context, pipeline_file):
@when("interpolate the xyz data table to {output_file}")
def interpolate_xyz_data_to_grid(context, output_file):
region = context.data_prep.get_region(context.xyz_data)
context.outfile = os.path.join("highres", output_file)
context.outfile = os.path.join("/tmp/highres", output_file)
os.makedirs(os.path.dirname(context.outfile), exist_ok=True)
context.data_prep.xyz_to_grid(
xyz_data=context.xyz_data, region=region, outfile=context.outfile
)
Expand All @@ -60,7 +61,8 @@ def open_raster_grid_to_check(context):
@given("a big {dataset_type} raster grid {raster_grid}")
def get_a_raster_grid(context, dataset_type, raster_grid):
context.raster_grid = raster_grid
context.filepath = os.path.join(dataset_type, raster_grid)
context.filepath = os.path.join("/tmp", dataset_type, raster_grid)
os.makedirs(os.path.dirname(context.filepath), exist_ok=True)
url = (
f"https://github.com/weiji14/deepbedmap/releases/download/v0.7.0/{raster_grid}"
)
Expand Down
2 changes: 1 addition & 1 deletion misc/README.md
Expand Up @@ -6,6 +6,6 @@ Filename|Location|Resolution|Literature Citation|Data Citation
---|---|---|---|---
Arthern_accumulation_bedmap2_grid1.tif|Antarctica|1000m|[Arthern2006Accumulation](https://doi.org/10.1029/2004JD005667)|
lisa750_2013182_2017120_0000_0400_vv_v1.tif|Antarctica|750m|[Fahnestock2019LISA](https://doi.org/10.1016/j.rse.2015.11.023)|[DOI](https://doi.org/10.7265/nxpc-e997)
3 *.dbf files|Antarctica|nan|[Mouginot2017MEASURES](https://doi.org/10.1126/science.1235798)|[DOI](https://doi.org/10.5067/AXE4121732AD)
3 *.shx files|Antarctica|nan|[Mouginot2017MEASURES](https://doi.org/10.1126/science.1235798)|[DOI](https://doi.org/10.5067/AXE4121732AD)
2 *.tif files|Antarctica|100m|[Noh2018REMA](https://doi.org/10.1016/j.isprsjprs.2017.12.008)|[DOI](https://doi.org/10.7910/DVN/SAIK8B)
MEaSUREs_IceFlowSpeed_450m.tif|Antarctica|450m|[Rignot2011MEASURES](https://doi.org/10.1126/science.1208336)|[DOI](https://doi.org/10.5067/D7GK8F5J8M8R)

0 comments on commit 259398c

Please sign in to comment.