Merge pull request #403 from radis/develop

0.11.1
radis · Nov 15, 2021 · 8605d64 · 8605d64
2 parents 20e0733 + dcf786e
commit 8605d64
Show file tree

Hide file tree

Showing 16 changed files with 382 additions and 116 deletions.
diff --git a/environment.yml b/environment.yml
@@ -4,6 +4,7 @@ channels:
 - conda-forge
 - astropy
 - cantera
+- plotly
 dependencies:
 - python=3.8
 - astropy  # Unit aware calculations

diff --git a/radis/__version__.txt b/radis/__version__.txt
@@ -1 +1 @@
-0.11.0
+0.11.1
diff --git a/radis/default_radis.json b/radis/default_radis.json
@@ -20,8 +20,8 @@
     "USE_CYTHON": true,                      # use Cython module if available (else, default to Python)
     "GRIDPOINTS_PER_LINEWIDTH_WARN_THRESHOLD": 3,    # raise a warning if less than THIS number of grid points per lineshape
     "GRIDPOINTS_PER_LINEWIDTH_ERROR_THRESHOLD": 1,   # raise an error if less than THIS number of grid points per lineshape
-    "MEMORY_MAPPING_ENGINE": "vaex",         #  warning: on Spyder this may result in freezes. see https://github.com/spyder-ide/spyder/issues/16183.
-    "SPARSE_WAVERANGE": "auto",              # sparse DLM algorithm. May be smaller on dense spectra. If "auto", a scarcity criterion is used (Nlines/Ngrids > 1)
+    "MEMORY_MAPPING_ENGINE": "auto",         # "vaex"/"pytables"/"feather". "auto" uses "vaex" in most cases unless you're using the Spyder IDE (where it may result in freezes). see https://github.com/spyder-ide/spyder/issues/16183.
+    "SPARSE_WAVERANGE": "auto",              # true/false. sparse DLM algorithm. May be smaller on dense spectra. If "auto", a scarcity criterion is used (Nlines/Ngrids > 1)
 
     # molecular parameters
     # --------------------

diff --git a/radis/io/dbmanager.py b/radis/io/dbmanager.py
@@ -7,7 +7,7 @@
 import os
 import shutil
 from io import BytesIO
-from os.path import abspath, exists, splitext
+from os.path import abspath, dirname, exists, expanduser, join, split, splitext
 from zipfile import ZipFile
 
 from radis.misc.config import addDatabankEntries, getDatabankEntries, getDatabankList
@@ -22,7 +22,6 @@
     from radis.io.cache_files import check_not_deprecated
 
 from datetime import date
-from os.path import join, split
 
 import numpy as np
 import pandas as pd
@@ -97,26 +96,43 @@ def __init__(
         nJobs=-2,
         batch_size="auto",
     ):
+        from os import environ
+
         if engine == "default":
             from radis import config
 
             engine = config["MEMORY_MAPPING_ENGINE"]  # 'pytables', 'vaex', 'feather'
+            # Quick fix for #401
+            if engine == "auto":
+                # "auto" uses "vaex" in most cases unless you're using the Spyder IDE (where it may result in freezes).
+                # see https://github.com/spyder-ide/spyder/issues/16183.
+                # and https://github.com/radis/radis/issues/401
+                if any("SPYDER" in name for name in environ):
+                    if verbose >= 3:
+                        print(
+                            "Spyder IDE detected. Memory-mapping-engine set to 'pytables' (less powerful than 'vaex' but Spyder user experience freezes). See https://github.com/spyder-ide/spyder/issues/16183. Change this behavior by setting the radis.config['MEMORY_MAPPING_ENGINE'] key"
+                        )
+                    engine = "pytables"  # for HITRAN and HITEMP databases
+                else:
+                    engine = "vaex"
 
         # vaex processes are stuck if ran from Spyder. See https://github.com/spyder-ide/spyder/issues/16183
-        if engine == "vaex" and any("SPYDER" in name for name in os.environ):
+        if engine == "vaex" and any("SPYDER" in name for name in environ):
             from radis.misc.log import printwarn
 
             printwarn(
-                "Spyder IDE detected while using memory_mapping_engine='vaex'.\nVaex is the fastest way to read database files in RADIS, but Vaex processes may be stuck if ran from Spyder. See https://github.com/spyder-ide/spyder/issues/16183. You may consider using another IDE, or using a different `memory_mapping_engine` such as 'pytables' or 'feather'. You can change the engine in Spectrum.fetch_databank() calls, or globally by setting the 'MEMORY_MAPPING_ENGINE' key in your ~/radis.json  (note: starting another iPython console somehow releases the freeze in Spyder)  \n"
+                "Spyder IDE detected while using memory_mapping_engine='vaex'.\nVaex is the fastest way to read database files in RADIS, but Vaex processes may be stuck if ran from Spyder. See https://github.com/spyder-ide/spyder/issues/16183. You may consider using another IDE, or using a different `memory_mapping_engine` such as 'pytables' or 'feather'. You can change the engine in Spectrum.fetch_databank() calls, or globally by setting the 'MEMORY_MAPPING_ENGINE' key in your ~/radis.json \n"
             )
 
         self.name = name
         self.molecule = molecule
         self.local_databases = local_databases
         # create folder if needed
-        from radis.misc.basics import make_folders
+        if not exists(local_databases):
+            from radis.misc.basics import make_folders
 
-        make_folders(*split(abspath(local_databases)))
+            make_folders(*split(abspath(dirname(local_databases))))
+            make_folders(*split(abspath(local_databases)))
 
         self.downloadable = False  # by default
         self.format = ""
@@ -161,6 +177,15 @@ def get_filenames(self):
                 local_files = entries["path"]
             urlnames = None
 
+            # Check that local files are the one we expect :
+            for f in local_files:
+                if not abspath(expanduser(f)).startswith(
+                    abspath(expanduser(local_databases))
+                ):
+                    raise ValueError(
+                        f"Database {self.name} is inconsistent : it should be stored in {local_databases} but files registered in ~/radis.json contains {f}. Please fix or delete the ~/radis.json entry."
+                    )
+
         elif self.is_downloadable():
             # local_files = self.fetch_filenames()
             urlnames = self.fetch_urlnames()

diff --git a/radis/io/exomol.py b/radis/io/exomol.py
@@ -436,6 +436,7 @@ def __init__(
         bkgdatm="H2",
         broadf=True,
         engine="vaex",
+        verbose=True,
     ):
         """Molecular database for Exomol form
 
@@ -451,10 +452,25 @@ def __init__(
            The trans/states files can be very large. For the first time to read it, we convert it to the feather-format. After the second-time, we use the feather format instead.
 
         """
-        if engine == "default":
-            import radis
+        from os import environ
 
-            engine = radis.config["MEMORY_MAPPING_ENGINE"]
+        if engine == "default":
+            from radis import config
+
+            engine = config["MEMORY_MAPPING_ENGINE"]
+            # Quick fix for #401
+            if engine == "auto":
+                # "auto" uses "vaex" in most cases unless you're using the Spyder IDE (where it may result in freezes).
+                # see https://github.com/spyder-ide/spyder/issues/16183.
+                # and https://github.com/radis/radis/issues/401
+                if any("SPYDER" in name for name in environ):
+                    if verbose >= 3:
+                        print(
+                            "Spyder IDE detected. Memory-mapping-engine set to 'feather' (less powerful than 'vaex' but Spyder user experience freezes). See https://github.com/spyder-ide/spyder/issues/16183. Change this behavior by setting the radis.config['MEMORY_MAPPING_ENGINE'] key"
+                        )
+                    engine = "feather"  # for ExoMol database
+                else:
+                    engine = "vaex"
 
         if engine == "vaex":
             import vaex
@@ -530,13 +546,16 @@ def __init__(
         # load states
         if engine == "feather":
             if self.states_file.with_suffix(".feather").exists():
-                ndstates = pd.read_feather(self.states_file.with_suffix(".feather"))
+                states = pd.read_feather(self.states_file.with_suffix(".feather"))
             else:
                 print(
                     "Note: Caching states data to the feather format. After the second time, it will become much faster."
                 )
-                ndstates = exomolapi.read_states(self.states_file, dic_def)
-                ndstates.to_feather(self.states_file.with_suffix(".feather"))
+                states = exomolapi.read_states(self.states_file, dic_def, engine="csv")
+                states.to_feather(self.states_file.with_suffix(".feather"))
+            ndstates = states.to_numpy()[
+                :, :4
+            ]  # the i, E, g, J are in the 4 first columns
         elif engine == "vaex":
             if self.states_file.with_suffix(".bz2.hdf5").exists():
                 states = vaex.open(self.states_file.with_suffix(".bz2.hdf5"))
@@ -545,7 +564,7 @@ def __init__(
                 print(
                     "Note: Caching states data to the hdf5 format with vaex. After the second time, it will become much faster."
                 )
-                states = exomolapi.read_states(self.states_file, dic_def)
+                states = exomolapi.read_states(self.states_file, dic_def, engine="vaex")
                 ndstates = vaex.array_types.to_numpy(states)
 
         # load pf
@@ -580,20 +599,23 @@ def __init__(
                     print(
                         "Note: Caching line transition data to the HDF5 format with vaex. After the second time, it will become much faster."
                     )
-                    trans = exomolapi.read_trans(self.trans_file)
+                    trans = exomolapi.read_trans(self.trans_file, engine="vaex")
                     ndtrans = vaex.array_types.to_numpy(trans)
 
                     # mask needs to be applied
                     mask_needed = True
             elif engine == "feather":
                 if self.trans_file.with_suffix(".feather").exists():
-                    ndtrans = pd.read_feather(self.trans_file.with_suffix(".feather"))
+                    trans = pd.read_feather(self.trans_file.with_suffix(".feather"))
                 else:
                     print(
                         "Note: Caching line transition data to the feather format. After the second time, it will become much faster."
                     )
-                    ndtrans = exomolapi.read_trans(self.trans_file)
-                    ndtrans.to_feather(self.trans_file.with_suffix(".feather"))
+                    trans = exomolapi.read_trans(self.trans_file, engine="csv")
+                    trans.to_feather(self.trans_file.with_suffix(".feather"))
+                ndtrans = trans.to_numpy()
+                # mask needs to be applied   (in feather mode we don't sleect wavneumbers)
+                mask_needed = True
 
             # compute gup and elower
             (
@@ -631,16 +653,24 @@ def __init__(
                     T=self.Tref,
                 )
 
-                # exclude the lines whose nu_lines evaluated inside exomolapi.pickup_gE (thus sometimes different from the "nu_lines" column in trans) is not positive
                 trans["nu_positive"] = mask_zeronu
-                trans = trans[trans.nu_positive].extract()
-                trans.drop("nu_positive", inplace=True)
+                if engine == "vaex":
+                    # exclude the lines whose nu_lines evaluated inside exomolapi.pickup_gE (thus sometimes different from the "nu_lines" column in trans) is not positive
+
+                    trans = trans[trans.nu_positive].extract()
+                    trans.drop("nu_positive", inplace=True)
+                else:
+                    if False in mask_zeronu:
+                        raise NotImplementedError(
+                            "some wavenumber is not defined;  masking not impleemtend so far in 'feather' engine"
+                        )
 
                 trans["nu_lines"] = self.nu_lines
                 trans["Sij0"] = self.Sij0
 
                 if engine == "vaex":
                     trans.export(self.trans_file.with_suffix(".hdf5"))
+                #  TODO : implement masking in 'feather' mode
 
         else:  # dic_def["numinf"] is not None
             imin = (
@@ -665,10 +695,13 @@ def __init__(
                         print(
                             "Note: Caching line transition data to the feather format. After the second time, it will become much faster."
                         )
-                        ndtrans = exomolapi.read_trans(trans_file)
-                        ndtrans.to_feather(trans_file.with_suffix(".feather"))
+                        trans = exomolapi.read_trans(trans_file, engine="csv")
+                        trans.to_feather(trans_file.with_suffix(".feather"))
                         #!!TODO:restrict NOW the trans size to avoid useless overload of memory and CPU
                         # trans = trans[(trans['nu'] > self.nurange[0] - self.margin) & (trans['nu'] < self.nurange[1] + self.margin)]
+                    ndtrans = trans.to_numpy()
+                    # mask needs to be applied   (in feather mode we don't sleect wavneumbers)
+                    mask_needed = True
                 elif engine == "vaex":
                     if trans_file.with_suffix(".hdf5").exists():
                         trans = vaex.open(trans_file.with_suffix(".hdf5"))
@@ -689,7 +722,7 @@ def __init__(
                         print(
                             "Note: Caching line transition data to the HDF5 format with vaex. After the second time, it will become much faster."
                         )
-                        trans = exomolapi.read_trans(trans_file)
+                        trans = exomolapi.read_trans(trans_file, engine="vaex")
                         ndtrans = vaex.array_types.to_numpy(trans)
 
                         # mask needs to be applied
@@ -750,7 +783,7 @@ def __init__(
                         jupperx,
                         mask_zeronu,
                         quantumNumbersx,
-                    ) = exomolapi.pickup_gE(ndstates, ndtrans, trans_file)
+                    ) = exomolapi.pickup_gE(ndstates, ndtrans, trans_file, dic_def)
                     if engine == "vaex" and trans_file.with_suffix(".hdf5").exists():
                         Sij0x = ndtrans[:, 4]
                     else:

diff --git a/radis/io/exomolapi.py b/radis/io/exomolapi.py
@@ -107,7 +107,7 @@ def read_pf(pff):
         partition data in pandas DataFrame
 
     """
-    dat = pd.read_csv(pff, sep="\s+", names=("T", "QT"))
+    dat = pd.read_csv(pff, sep=r"\s+", names=("T", "QT"))
     return dat
 
 
@@ -134,14 +134,14 @@ def read_trans(transf, engine="vaex"):
             dat = vaex.from_csv(
                 transf,
                 compression="bz2",
-                sep="\s+",
+                sep=r"\s+",
                 names=("i_upper", "i_lower", "A", "nu_lines"),
                 convert=True,
             )
         except:
             dat = vaex.read_csv(
                 transf,
-                sep="\s+",
+                sep=r"\s+",
                 names=("i_upper", "i_lower", "A", "nu_lines"),
                 convert=True,
             )
@@ -150,12 +150,12 @@ def read_trans(transf, engine="vaex"):
             dat = pd.read_csv(
                 transf,
                 compression="bz2",
-                sep="\s+",
+                sep=r"\s+",
                 names=("i_upper", "i_lower", "A", "nu_lines"),
             )
         except:
             dat = pd.read_csv(
-                transf, sep="\s+", names=("i_upper", "i_lower", "A", "nu_lines")
+                transf, sep=r"\s+", names=("i_upper", "i_lower", "A", "nu_lines")
             )
 
     return dat
@@ -206,22 +206,22 @@ def read_states(statesf, dic_def, engine="vaex"):
             dat = vaex.from_csv(
                 statesf,
                 compression="bz2",
-                sep="\s+",
+                sep=r"\s+",
                 usecols=usecol,
                 names=names,
                 convert=True,
             )
         except:
             dat = vaex.read_csv(
-                statesf, sep="\s+", usecols=usecol, names=names, convert=True
+                statesf, sep=r"\s+", usecols=usecol, names=names, convert=True
             )
     elif engine == "csv":
         try:
             dat = pd.read_csv(
-                statesf, compression="bz2", sep="\s+", usecols=usecol, names=names
+                statesf, compression="bz2", sep=r"\s+", usecols=usecol, names=names
             )
         except:  #!!!TODO What was the expected error?
-            dat = pd.read_csv(statesf, sep="\s+", usecols=usecol, names=names)
+            dat = pd.read_csv(statesf, sep=r"\s+", usecols=usecol, names=names)
     else:
         raise NotImplementedError(engine)
 
@@ -380,7 +380,7 @@ def read_broad(broadf):
     """
     bdat = pd.read_csv(
         broadf,
-        sep="\s+",
+        sep=r"\s+",
         names=(
             "code",
             "alpha_ref",

diff --git a/radis/io/hdf5.py b/radis/io/hdf5.py
@@ -262,7 +262,6 @@ def load(
             if key == "default":
                 key = r"/table"
 
-            import h5py
             import vaex
 
             # Open file
@@ -300,7 +299,6 @@ def load(
             # TODO: define default key ?
             if key == "default":
                 key = None
-            import h5py
 
             with h5py.File(fname, "r") as f:
                 if key is None:  # load from root level
@@ -446,7 +444,9 @@ def read_metadata(self, fname: str, key="default") -> dict:
                             raise err
 
         else:
-            raise NotImplementedError(self.engine)
+            raise NotImplementedError(
+                f"'{self.engine}' is not implemented. Use 'pytables' or 'vaex' ?"
+            )
 
         return metadata