Fix South America Soybean Filename Glob and Regex (#2062)

* fix filename glob and regex * remove unwanted file * update test data * update dataset test * remove zip file * remove additional directory * fix ruff check and format
microsoft · May 17, 2024 · 96507bd · 96507bd
1 parent 1a0fae9
commit 96507bd
Show file tree

Hide file tree

Showing 6 changed files with 10 additions and 27 deletions.
diff --git a/tests/data/south_america_soybean/SouthAmericaSoybean.zip b/tests/data/south_america_soybean/SouthAmericaSoybean.zip
diff --git a/...icaSoybean/South_America_Soybean_2002.tif → ...ica_soybean/SouthAmerica_Soybean_2002.tif b/...icaSoybean/South_America_Soybean_2002.tif → ...ica_soybean/SouthAmerica_Soybean_2002.tif
diff --git a/...icaSoybean/South_America_Soybean_2021.tif → ...ica_soybean/SouthAmerica_Soybean_2021.tif b/...icaSoybean/South_America_Soybean_2021.tif → ...ica_soybean/SouthAmerica_Soybean_2021.tif
diff --git a/tests/data/south_america_soybean/data.py b/tests/data/south_america_soybean/data.py
@@ -4,7 +4,6 @@
 # Licensed under the MIT License.
 import hashlib
 import os
-import shutil
 
 import numpy as np
 import rasterio
@@ -15,7 +14,7 @@
 
 
 np.random.seed(0)
-files = ['South_America_Soybean_2002.tif', 'South_America_Soybean_2021.tif']
+files = ['SouthAmerica_Soybean_2002.tif', 'SouthAmerica_Soybean_2021.tif']
 
 
 def create_file(path: str, dtype: str):
@@ -41,19 +40,11 @@ def create_file(path: str, dtype: str):
 
 
 if __name__ == '__main__':
-    dir = os.path.join(os.getcwd(), 'SouthAmericaSoybean')
-    if os.path.exists(dir) and os.path.isdir(dir):
-        shutil.rmtree(dir)
-
-    os.makedirs(dir, exist_ok=True)
-
     for file in files:
-        create_file(os.path.join(dir, file), dtype='int8')
-
-    # Compress data
-    shutil.make_archive('SouthAmericaSoybean', 'zip', '.', dir)
+        create_file(os.path.join(os.getcwd(), file), dtype='int8')
 
     # Compute checksums
-    with open('SouthAmericaSoybean.zip', 'rb') as f:
-        md5 = hashlib.md5(f.read()).hexdigest()
-        print(f'SouthAmericaSoybean.zip: {md5}')
+    for file in files:
+        with open(file, 'rb') as f:
+            md5 = hashlib.md5(f.read()).hexdigest()
+            print(f'{file}: {md5}')
diff --git a/tests/datasets/test_south_america_soybean.py b/tests/datasets/test_south_america_soybean.py
@@ -33,11 +33,7 @@ def dataset(self, monkeypatch: MonkeyPatch, tmp_path: Path) -> SouthAmericaSoybe
         )
         transforms = nn.Identity()
         url = os.path.join(
-            'tests',
-            'data',
-            'south_america_soybean',
-            'SouthAmericaSoybean',
-            'South_America_Soybean_{}.tif',
+            'tests', 'data', 'south_america_soybean', 'SouthAmerica_Soybean_{}.tif'
         )
 
         monkeypatch.setattr(SouthAmericaSoybean, 'url', url)
@@ -69,11 +65,7 @@ def test_already_extracted(self, dataset: SouthAmericaSoybean) -> None:
 
     def test_already_downloaded(self, tmp_path: Path) -> None:
         pathname = os.path.join(
-            'tests',
-            'data',
-            'south_america_soybean',
-            'SouthAmericaSoybean',
-            'South_America_Soybean_2002.tif',
+            'tests', 'data', 'south_america_soybean', 'SouthAmerica_Soybean_2002.tif'
         )
         root = str(tmp_path)
         shutil.copy(pathname, root)

diff --git a/torchgeo/datasets/south_america_soybean.py b/torchgeo/datasets/south_america_soybean.py
@@ -39,8 +39,8 @@ class SouthAmericaSoybean(RasterDataset):
     .. versionadded:: 0.6
     """
 
-    filename_glob = 'South_America_Soybean_*.*'
-    filename_regex = r'South_America_Soybean_(?P<year>\d{4})'
+    filename_glob = 'SouthAmerica_Soybean_*.*'
+    filename_regex = r'SouthAmerica_Soybean_(?P<year>\d{4})'
 
     date_format = '%Y'
     is_image = False