microsoft · calebrob6 · Sep 9, 2021 · Sep 4, 2021 · Sep 4, 2021 · Sep 4, 2021
diff --git a/conf/chesapeake_cvpr.yaml b/conf/chesapeake_cvpr.yaml
@@ -0,0 +1,22 @@
+trainer:
+  gpus: 1  # single GPU training
+  min_epochs: 20
+  max_epochs: 100
+  benchmark: True
+
+experiment:
+  task: "chesapeake_cvpr"
+  name: "chesapeake_cvpr_example"
+  module:
+    loss: "ce"  # cross entropy loss
+    segmentation_model: "unet"
+    encoder_name: "resnet18"
+    encoder_weights: null  # use ImageNet weight initialization
+    encoder_output_stride: 16
+    optimizer: "adamw"
+    learning_rate: 1e-2
+    learning_rate_schedule_patience: 2
+  datamodule:
+    batch_size: 32
+    num_workers: 4
+    train_state: "de" # train/val/test in Delaware
diff --git a/conf/cyclone.yaml b/conf/cyclone.yaml
@@ -1,11 +1,18 @@
-program:  # These are experiment level arguments
-  experiment_name: cyclone_test
+program:  # These are the arguments that define how the train.py script works
+  seed: 1337
   overwrite: True
 
-trainer:  # These are all the arguments that will be passed to the pl.Trainer
-  min_epochs: 15
+experiment:
+  task: "cyclone"
+  name: cyclone_test
+  module:
+    model: "resnet18"
+    learning_rate: 1e-3
+    learning_rate_schedule_patience: 2
+  datamodule:
+    batch_size: 32
+    num_workers: 4
 
-task:  # These are all the arguments that will be used to create an appropriate task
-  name: cyclone
-  learning_rate: 1e-3
-  learning_rate_schedule_patience: 2
+trainer:
+  min_epochs: 15
+  gpus: 1
diff --git a/conf/defaults.yaml b/conf/defaults.yaml
@@ -1,21 +1,27 @@
-config_file: null  # The user can pass a filename here on the command line
+config_file: null  # This lets the user pass a config filename to load other arguments from
 
-program:  # These are the default arguments
-  batch_size: 32
-  num_workers: 4
+program:  # These are the arguments that define how the train.py script works
   seed: 1337
-  experiment_name: ???  # This is OmegaConf syntax that makes this a required field
   output_dir: output
   data_dir: data
   log_dir: logs
   overwrite: False
 
-task:
-  name: ???  # this must be defined so we can get the task specific arguments
+experiment: # These are arugments specific to the experiment we are running
+  name: ???  # this is the name given to this experiment run
+  task: ???  # this is the type of task to use for this experiement (e.g. "landcoverai")
+  module: # these will be passed as kwargs to the LightningModule assosciated with the task
+    learning_rate: 1e-3
+  datamodule: # these will be passed as kwargs to the LightningDataModule assosciated with the task
+    root_dir: ${program.data_dir}
+    seed: ${program.seed}
+    batch_size: 32
+    num_workers: 4
 
-# Taken from https://pytorch-lightning.readthedocs.io/en/1.3.8/common/trainer.html#init
+
+# The values here are taken from the defaults here https://pytorch-lightning.readthedocs.io/en/1.3.8/common/trainer.html#init
 # this probably should be made into a schema, e.g. as shown https://omegaconf.readthedocs.io/en/2.0_branch/structured_config.html#merging-with-other-configs
-trainer:
+trainer: # These are the parameters passed to the pytorch lightning Trainer object
   logger: True
   checkpoint_callback: True
   callbacks: null

diff --git a/conf/task_defaults/chesapeake_cvpr.yaml b/conf/task_defaults/chesapeake_cvpr.yaml
@@ -0,0 +1,14 @@
+experiment:
+  task: "chesapeake_cvpr"
+  module:
+    loss: "ce"
+    segmentation_model: "unet"
+    encoder_name: "resnet18"
+    encoder_weights: "imagenet"
+    encoder_output_stride: 16
+    learning_rate: 1e-3
+    learning_rate_schedule_patience: 2
+  datamodule:
+    batch_size: 32
+    num_workers: 4
+    train_state: "de"
diff --git a/conf/task_defaults/cyclone.yaml b/conf/task_defaults/cyclone.yaml
@@ -1,5 +1,9 @@
-task:
-  name: "cyclone"
-  learning_rate: 1e-3
-  learning_rate_schedule_patience: 2
-  model: "resnet18"
+experiment:
+  task: "cyclone"
+  module:
+    model: "resnet18"
+    learning_rate: 1e-3
+    learning_rate_schedule_patience: 2
+  datamodule:
+    batch_size: 32
+    num_workers: 4
diff --git a/conf/task_defaults/landcoverai.yaml b/conf/task_defaults/landcoverai.yaml
@@ -1,10 +1,14 @@
-task:
-  name: "landcoverai"
-  optimizer: "adamw"
-  learning_rate: 1e-3
-  learning_rate_schedule_patience: 2
-  loss: "ce"
-  segmentation_model: "deeplabv3+"
-  encoder_name: "resnet34"
-  encoder_weights: "imagenet"
-  encoder_output_stride: 16
+experiment:
+  task: "landcoverai"
+  module:
+    loss: "ce"
+    segmentation_model: "deeplabv3+"
+    encoder_name: "resnet34"
+    encoder_weights: "imagenet"
+    encoder_output_stride: 16
+    optimizer: "adamw"
+    learning_rate: 1e-3
+    learning_rate_schedule_patience: 2
+  datamodule:
+    batch_size: 32
+    num_workers: 4
diff --git a/conf/task_defaults/naipchesapeake.yaml b/conf/task_defaults/naipchesapeake.yaml
@@ -1,10 +1,14 @@
-task:
-  name: "naipchesapeake"
-  optimizer: "adamw"
-  learning_rate: 1e-3
-  learning_rate_schedule_patience: 2
-  loss: "ce"
-  segmentation_model: "deeplabv3+"
-  encoder_name: "resnet34"
-  encoder_weights: "imagenet"
-  encoder_output_stride: 16
+experiment:
+  task: "naipchesapeake"
+  module:
+    loss: "ce"
+    segmentation_model: "deeplabv3+"
+    encoder_name: "resnet34"
+    encoder_weights: "imagenet"
+    encoder_output_stride: 16
+    optimizer: "adamw"
+    learning_rate: 1e-3
+    learning_rate_schedule_patience: 2
+  datamodule:
+    batch_size: 32
+    num_workers: 4
diff --git a/conf/task_defaults/sen12ms.yaml b/conf/task_defaults/sen12ms.yaml
@@ -1,8 +1,13 @@
-task:
-  name: "sen12ms"
-  learning_rate: 1e-3
-  learning_rate_schedule_patience: 2
-  loss: "ce"
-  segmentation_model: "unet"
-  encoder_name: "resnet18"
-  encoder_weights: "imagenet"
+experiment:
+  task: "sen12ms"
+  module:
+    loss: "ce"
+    segmentation_model: "unet"
+    encoder_name: "resnet18"
+    encoder_weights: "imagenet"
+    encoder_output_stride: 16
+    learning_rate: 1e-3
+    learning_rate_schedule_patience: 2
+  datamodule:
+    batch_size: 32
+    num_workers: 4
diff --git a/tests/test_train.py b/tests/test_train.py
@@ -25,9 +25,9 @@ def test_output_file(tmp_path: Path) -> None:
     args = [
         sys.executable,
         "train.py",
-        "program.experiment_name=test",
+        "experiment.name=test",
         "program.output_dir=" + str(output_file),
-        "task.name=test",
+        "experiment.task=test",
     ]
     ps = subprocess.run(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     assert ps.returncode != 0
@@ -43,9 +43,9 @@ def test_experiment_dir_not_empty(tmp_path: Path) -> None:
     args = [
         sys.executable,
         "train.py",
-        "program.experiment_name=test",
+        "experiment.name=test",
         "program.output_dir=" + str(output_dir),
-        "task.name=test",
+        "experiment.task=test",
     ]
     ps = subprocess.run(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     assert ps.returncode != 0
@@ -64,11 +64,11 @@ def test_overwrite_experiment_dir(tmp_path: Path) -> None:
     args = [
         sys.executable,
         "train.py",
-        "program.experiment_name=test",
+        "experiment.name=test",
         "program.output_dir=" + str(output_dir),
         "program.data_dir=" + data_dir,
         "program.log_dir=" + str(log_dir),
-        "task.name=cyclone",
+        "experiment.task=cyclone",
         "program.overwrite=True",
         "trainer.fast_dev_run=1",
     ]
@@ -87,9 +87,9 @@ def test_invalid_task(task: str, tmp_path: Path) -> None:
     args = [
         sys.executable,
         "train.py",
-        "program.experiment_name=test",
+        "experiment.name=test",
         "program.output_dir=" + str(output_dir),
-        "task.name=" + task,
+        "experiment.task=" + task,
     ]
     ps = subprocess.run(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     assert ps.returncode != 0
@@ -102,9 +102,9 @@ def test_missing_config_file(tmp_path: Path) -> None:
     args = [
         sys.executable,
         "train.py",
-        "program.experiment_name=test",
+        "experiment.name=test",
         "program.output_dir=" + str(output_dir),
-        "task.name=test",
+        "experiment.task=test",
         "config_file=" + str(config_file),
     ]
     ps = subprocess.run(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
@@ -120,12 +120,12 @@ def test_config_file(tmp_path: Path) -> None:
     config_file.write_text(
         f"""
 program:
-  experiment_name: test
   output_dir: {output_dir}
   data_dir: {data_dir}
   log_dir: {log_dir}
-task:
-  name: cyclone
+experiment:
+  name: test
+  task: cyclone
 trainer:
   fast_dev_run: true
 """
@@ -146,12 +146,12 @@ def test_tasks(task: str, tmp_path: Path) -> None:
     args = [
         sys.executable,
         "train.py",
-        "program.experiment_name=test",
+        "experiment.name=test",
         "program.output_dir=" + str(output_dir),
         "program.data_dir=" + data_dir,
         "program.log_dir=" + str(log_dir),
         "trainer.fast_dev_run=1",
-        "task.name=" + task,
+        "experiment.task=" + task,
         "program.overwrite=True",
     ]
     subprocess.run(args, check=True)
diff --git a/tests/trainers/test_cyclone.py b/tests/trainers/test_cyclone.py
@@ -14,7 +14,7 @@ class TestCycloneTrainer:
     @pytest.fixture
     def default_config(self) -> Dict[str, Any]:
         task_conf = OmegaConf.load("conf/task_defaults/cyclone.yaml")
-        task_args = OmegaConf.to_object(task_conf.task)
+        task_args = OmegaConf.to_object(task_conf.experiment.module)
         task_args = cast(Dict[str, Any], task_args)
         return task_args
 

diff --git a/tests/trainers/test_landcoverai.py b/tests/trainers/test_landcoverai.py
@@ -17,7 +17,7 @@ class TestLandCoverAITrainer:
     @pytest.fixture
     def default_config(self) -> Dict[str, Any]:
         task_conf = OmegaConf.load("conf/task_defaults/landcoverai.yaml")
-        task_args = OmegaConf.to_object(task_conf.task)
+        task_args = OmegaConf.to_object(task_conf.experiment.module)
         task_args = cast(Dict[str, Any], task_args)
         return task_args
 

diff --git a/tests/trainers/test_sen12ms.py b/tests/trainers/test_sen12ms.py
@@ -16,7 +16,7 @@ class TestSEN12MSTrainer:
     @pytest.fixture
     def default_config(self) -> Dict[str, Any]:
         task_conf = OmegaConf.load("conf/task_defaults/sen12ms.yaml")
-        task_args = OmegaConf.to_object(task_conf.task)
+        task_args = OmegaConf.to_object(task_conf.experiment.module)
         task_args = cast(Dict[str, Any], task_args)
         return task_args
 

diff --git a/torchgeo/datasets/chesapeake.py b/torchgeo/datasets/chesapeake.py
@@ -9,11 +9,13 @@
 from typing import Any, Callable, Dict, List, Optional
 
 import fiona
+import numpy as np
 import pyproj
 import rasterio
 import rasterio.mask
 import shapely.geometry
 import shapely.ops
+import torch
 from rasterio.crs import CRS
 
 from .geo import GeoDataset, RasterDataset
@@ -291,6 +293,9 @@ class ChesapeakeCVPR(GeoDataset):
     filename = "cvpr_chesapeake_landcover.zip"
     md5 = "0ea5e7cb861be3fb8a06fedaaaf91af9"
 
+    crs = CRS.from_epsg(3857)
+    res = 1
+
     valid_layers = [
         "naip-new",
         "naip-old",
@@ -357,11 +362,12 @@ def __init__(
         if download:
             self._download()
 
-        if not self._check_integrity():
-            raise RuntimeError(
-                "Dataset not found or corrupted. "
-                + "You can use download=True to download it"
-            )
+        if checksum:
+            if not self._check_integrity():
+                raise RuntimeError(
+                    "Dataset not found or corrupted. "
+                    + "You can use download=True to download it"
+                )
 
         # Add all tiles into the index in epsg:3857 based on the included geojson
         mint: float = 0
@@ -402,6 +408,8 @@ def __getitem__(self, query: BoundingBox) -> Dict[str, Any]:
         filepaths = [hit.object for hit in hits]
 
         sample = {
+            "image": [],
+            "mask": [],
             "crs": self.crs,
             "bbox": query,
         }
@@ -436,11 +444,30 @@ def __getitem__(self, query: BoundingBox) -> Dict[str, Any]:
                         f, [query_geom_transformed], crop=True, all_touched=True
                     )
 
-                sample[layer] = data.squeeze()
-
+                if layer in [
+                    "naip-new",
+                    "naip-old",
+                    "landsat-leaf-on",
+                    "landsat-leaf-off",
+                ]:
+                    sample["image"].append(data)
+                elif layer in ["lc", "nlcd", "buildings"]:
+                    sample["mask"].append(data)
         else:
             raise IndexError(f"query: {query} spans multiple tiles which is not valid")
 
+        sample["image"] = np.concatenate(  # type: ignore[no-untyped-call]
+            sample["image"], axis=0
+        )
+        sample["mask"] = np.concatenate(  # type: ignore[no-untyped-call]
+            sample["mask"], axis=0
+        )
+
+        sample["image"] = torch.from_numpy(  # type: ignore[attr-defined]
+            sample["image"]
+        )
+        sample["mask"] = torch.from_numpy(sample["mask"])  # type: ignore[attr-defined]
+
         if self.transforms is not None:
             sample = self.transforms(sample)