Skip to content

Commit

Permalink
Merge pull request #403 from radis/develop
Browse files Browse the repository at this point in the history
0.11.1
  • Loading branch information
erwanp committed Nov 15, 2021
2 parents 20e0733 + dcf786e commit 8605d64
Show file tree
Hide file tree
Showing 16 changed files with 382 additions and 116 deletions.
1 change: 1 addition & 0 deletions environment.yml
Expand Up @@ -4,6 +4,7 @@ channels:
- conda-forge
- astropy
- cantera
- plotly
dependencies:
- python=3.8
- astropy # Unit aware calculations
Expand Down
2 changes: 1 addition & 1 deletion radis/__version__.txt
@@ -1 +1 @@
0.11.0
0.11.1
4 changes: 2 additions & 2 deletions radis/default_radis.json
Expand Up @@ -20,8 +20,8 @@
"USE_CYTHON": true, # use Cython module if available (else, default to Python)
"GRIDPOINTS_PER_LINEWIDTH_WARN_THRESHOLD": 3, # raise a warning if less than THIS number of grid points per lineshape
"GRIDPOINTS_PER_LINEWIDTH_ERROR_THRESHOLD": 1, # raise an error if less than THIS number of grid points per lineshape
"MEMORY_MAPPING_ENGINE": "vaex", # warning: on Spyder this may result in freezes. see https://github.com/spyder-ide/spyder/issues/16183.
"SPARSE_WAVERANGE": "auto", # sparse DLM algorithm. May be smaller on dense spectra. If "auto", a scarcity criterion is used (Nlines/Ngrids > 1)
"MEMORY_MAPPING_ENGINE": "auto", # "vaex"/"pytables"/"feather". "auto" uses "vaex" in most cases unless you're using the Spyder IDE (where it may result in freezes). see https://github.com/spyder-ide/spyder/issues/16183.
"SPARSE_WAVERANGE": "auto", # true/false. sparse DLM algorithm. May be smaller on dense spectra. If "auto", a scarcity criterion is used (Nlines/Ngrids > 1)

# molecular parameters
# --------------------
Expand Down
37 changes: 31 additions & 6 deletions radis/io/dbmanager.py
Expand Up @@ -7,7 +7,7 @@
import os
import shutil
from io import BytesIO
from os.path import abspath, exists, splitext
from os.path import abspath, dirname, exists, expanduser, join, split, splitext
from zipfile import ZipFile

from radis.misc.config import addDatabankEntries, getDatabankEntries, getDatabankList
Expand All @@ -22,7 +22,6 @@
from radis.io.cache_files import check_not_deprecated

from datetime import date
from os.path import join, split

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -97,26 +96,43 @@ def __init__(
nJobs=-2,
batch_size="auto",
):
from os import environ

if engine == "default":
from radis import config

engine = config["MEMORY_MAPPING_ENGINE"] # 'pytables', 'vaex', 'feather'
# Quick fix for #401
if engine == "auto":
# "auto" uses "vaex" in most cases unless you're using the Spyder IDE (where it may result in freezes).
# see https://github.com/spyder-ide/spyder/issues/16183.
# and https://github.com/radis/radis/issues/401
if any("SPYDER" in name for name in environ):
if verbose >= 3:
print(
"Spyder IDE detected. Memory-mapping-engine set to 'pytables' (less powerful than 'vaex' but Spyder user experience freezes). See https://github.com/spyder-ide/spyder/issues/16183. Change this behavior by setting the radis.config['MEMORY_MAPPING_ENGINE'] key"
)
engine = "pytables" # for HITRAN and HITEMP databases
else:
engine = "vaex"

# vaex processes are stuck if ran from Spyder. See https://github.com/spyder-ide/spyder/issues/16183
if engine == "vaex" and any("SPYDER" in name for name in os.environ):
if engine == "vaex" and any("SPYDER" in name for name in environ):
from radis.misc.log import printwarn

printwarn(
"Spyder IDE detected while using memory_mapping_engine='vaex'.\nVaex is the fastest way to read database files in RADIS, but Vaex processes may be stuck if ran from Spyder. See https://github.com/spyder-ide/spyder/issues/16183. You may consider using another IDE, or using a different `memory_mapping_engine` such as 'pytables' or 'feather'. You can change the engine in Spectrum.fetch_databank() calls, or globally by setting the 'MEMORY_MAPPING_ENGINE' key in your ~/radis.json (note: starting another iPython console somehow releases the freeze in Spyder) \n"
"Spyder IDE detected while using memory_mapping_engine='vaex'.\nVaex is the fastest way to read database files in RADIS, but Vaex processes may be stuck if ran from Spyder. See https://github.com/spyder-ide/spyder/issues/16183. You may consider using another IDE, or using a different `memory_mapping_engine` such as 'pytables' or 'feather'. You can change the engine in Spectrum.fetch_databank() calls, or globally by setting the 'MEMORY_MAPPING_ENGINE' key in your ~/radis.json \n"
)

self.name = name
self.molecule = molecule
self.local_databases = local_databases
# create folder if needed
from radis.misc.basics import make_folders
if not exists(local_databases):
from radis.misc.basics import make_folders

make_folders(*split(abspath(local_databases)))
make_folders(*split(abspath(dirname(local_databases))))
make_folders(*split(abspath(local_databases)))

self.downloadable = False # by default
self.format = ""
Expand Down Expand Up @@ -161,6 +177,15 @@ def get_filenames(self):
local_files = entries["path"]
urlnames = None

# Check that local files are the one we expect :
for f in local_files:
if not abspath(expanduser(f)).startswith(
abspath(expanduser(local_databases))
):
raise ValueError(
f"Database {self.name} is inconsistent : it should be stored in {local_databases} but files registered in ~/radis.json contains {f}. Please fix or delete the ~/radis.json entry."
)

elif self.is_downloadable():
# local_files = self.fetch_filenames()
urlnames = self.fetch_urlnames()
Expand Down
69 changes: 51 additions & 18 deletions radis/io/exomol.py
Expand Up @@ -436,6 +436,7 @@ def __init__(
bkgdatm="H2",
broadf=True,
engine="vaex",
verbose=True,
):
"""Molecular database for Exomol form
Expand All @@ -451,10 +452,25 @@ def __init__(
The trans/states files can be very large. For the first time to read it, we convert it to the feather-format. After the second-time, we use the feather format instead.
"""
if engine == "default":
import radis
from os import environ

engine = radis.config["MEMORY_MAPPING_ENGINE"]
if engine == "default":
from radis import config

engine = config["MEMORY_MAPPING_ENGINE"]
# Quick fix for #401
if engine == "auto":
# "auto" uses "vaex" in most cases unless you're using the Spyder IDE (where it may result in freezes).
# see https://github.com/spyder-ide/spyder/issues/16183.
# and https://github.com/radis/radis/issues/401
if any("SPYDER" in name for name in environ):
if verbose >= 3:
print(
"Spyder IDE detected. Memory-mapping-engine set to 'feather' (less powerful than 'vaex' but Spyder user experience freezes). See https://github.com/spyder-ide/spyder/issues/16183. Change this behavior by setting the radis.config['MEMORY_MAPPING_ENGINE'] key"
)
engine = "feather" # for ExoMol database
else:
engine = "vaex"

if engine == "vaex":
import vaex
Expand Down Expand Up @@ -530,13 +546,16 @@ def __init__(
# load states
if engine == "feather":
if self.states_file.with_suffix(".feather").exists():
ndstates = pd.read_feather(self.states_file.with_suffix(".feather"))
states = pd.read_feather(self.states_file.with_suffix(".feather"))
else:
print(
"Note: Caching states data to the feather format. After the second time, it will become much faster."
)
ndstates = exomolapi.read_states(self.states_file, dic_def)
ndstates.to_feather(self.states_file.with_suffix(".feather"))
states = exomolapi.read_states(self.states_file, dic_def, engine="csv")
states.to_feather(self.states_file.with_suffix(".feather"))
ndstates = states.to_numpy()[
:, :4
] # the i, E, g, J are in the 4 first columns
elif engine == "vaex":
if self.states_file.with_suffix(".bz2.hdf5").exists():
states = vaex.open(self.states_file.with_suffix(".bz2.hdf5"))
Expand All @@ -545,7 +564,7 @@ def __init__(
print(
"Note: Caching states data to the hdf5 format with vaex. After the second time, it will become much faster."
)
states = exomolapi.read_states(self.states_file, dic_def)
states = exomolapi.read_states(self.states_file, dic_def, engine="vaex")
ndstates = vaex.array_types.to_numpy(states)

# load pf
Expand Down Expand Up @@ -580,20 +599,23 @@ def __init__(
print(
"Note: Caching line transition data to the HDF5 format with vaex. After the second time, it will become much faster."
)
trans = exomolapi.read_trans(self.trans_file)
trans = exomolapi.read_trans(self.trans_file, engine="vaex")
ndtrans = vaex.array_types.to_numpy(trans)

# mask needs to be applied
mask_needed = True
elif engine == "feather":
if self.trans_file.with_suffix(".feather").exists():
ndtrans = pd.read_feather(self.trans_file.with_suffix(".feather"))
trans = pd.read_feather(self.trans_file.with_suffix(".feather"))
else:
print(
"Note: Caching line transition data to the feather format. After the second time, it will become much faster."
)
ndtrans = exomolapi.read_trans(self.trans_file)
ndtrans.to_feather(self.trans_file.with_suffix(".feather"))
trans = exomolapi.read_trans(self.trans_file, engine="csv")
trans.to_feather(self.trans_file.with_suffix(".feather"))
ndtrans = trans.to_numpy()
# mask needs to be applied (in feather mode we don't sleect wavneumbers)
mask_needed = True

# compute gup and elower
(
Expand Down Expand Up @@ -631,16 +653,24 @@ def __init__(
T=self.Tref,
)

# exclude the lines whose nu_lines evaluated inside exomolapi.pickup_gE (thus sometimes different from the "nu_lines" column in trans) is not positive
trans["nu_positive"] = mask_zeronu
trans = trans[trans.nu_positive].extract()
trans.drop("nu_positive", inplace=True)
if engine == "vaex":
# exclude the lines whose nu_lines evaluated inside exomolapi.pickup_gE (thus sometimes different from the "nu_lines" column in trans) is not positive

trans = trans[trans.nu_positive].extract()
trans.drop("nu_positive", inplace=True)
else:
if False in mask_zeronu:
raise NotImplementedError(
"some wavenumber is not defined; masking not impleemtend so far in 'feather' engine"
)

trans["nu_lines"] = self.nu_lines
trans["Sij0"] = self.Sij0

if engine == "vaex":
trans.export(self.trans_file.with_suffix(".hdf5"))
# TODO : implement masking in 'feather' mode

else: # dic_def["numinf"] is not None
imin = (
Expand All @@ -665,10 +695,13 @@ def __init__(
print(
"Note: Caching line transition data to the feather format. After the second time, it will become much faster."
)
ndtrans = exomolapi.read_trans(trans_file)
ndtrans.to_feather(trans_file.with_suffix(".feather"))
trans = exomolapi.read_trans(trans_file, engine="csv")
trans.to_feather(trans_file.with_suffix(".feather"))
#!!TODO:restrict NOW the trans size to avoid useless overload of memory and CPU
# trans = trans[(trans['nu'] > self.nurange[0] - self.margin) & (trans['nu'] < self.nurange[1] + self.margin)]
ndtrans = trans.to_numpy()
# mask needs to be applied (in feather mode we don't sleect wavneumbers)
mask_needed = True
elif engine == "vaex":
if trans_file.with_suffix(".hdf5").exists():
trans = vaex.open(trans_file.with_suffix(".hdf5"))
Expand All @@ -689,7 +722,7 @@ def __init__(
print(
"Note: Caching line transition data to the HDF5 format with vaex. After the second time, it will become much faster."
)
trans = exomolapi.read_trans(trans_file)
trans = exomolapi.read_trans(trans_file, engine="vaex")
ndtrans = vaex.array_types.to_numpy(trans)

# mask needs to be applied
Expand Down Expand Up @@ -750,7 +783,7 @@ def __init__(
jupperx,
mask_zeronu,
quantumNumbersx,
) = exomolapi.pickup_gE(ndstates, ndtrans, trans_file)
) = exomolapi.pickup_gE(ndstates, ndtrans, trans_file, dic_def)
if engine == "vaex" and trans_file.with_suffix(".hdf5").exists():
Sij0x = ndtrans[:, 4]
else:
Expand Down
20 changes: 10 additions & 10 deletions radis/io/exomolapi.py
Expand Up @@ -107,7 +107,7 @@ def read_pf(pff):
partition data in pandas DataFrame
"""
dat = pd.read_csv(pff, sep="\s+", names=("T", "QT"))
dat = pd.read_csv(pff, sep=r"\s+", names=("T", "QT"))
return dat


Expand All @@ -134,14 +134,14 @@ def read_trans(transf, engine="vaex"):
dat = vaex.from_csv(
transf,
compression="bz2",
sep="\s+",
sep=r"\s+",
names=("i_upper", "i_lower", "A", "nu_lines"),
convert=True,
)
except:
dat = vaex.read_csv(
transf,
sep="\s+",
sep=r"\s+",
names=("i_upper", "i_lower", "A", "nu_lines"),
convert=True,
)
Expand All @@ -150,12 +150,12 @@ def read_trans(transf, engine="vaex"):
dat = pd.read_csv(
transf,
compression="bz2",
sep="\s+",
sep=r"\s+",
names=("i_upper", "i_lower", "A", "nu_lines"),
)
except:
dat = pd.read_csv(
transf, sep="\s+", names=("i_upper", "i_lower", "A", "nu_lines")
transf, sep=r"\s+", names=("i_upper", "i_lower", "A", "nu_lines")
)

return dat
Expand Down Expand Up @@ -206,22 +206,22 @@ def read_states(statesf, dic_def, engine="vaex"):
dat = vaex.from_csv(
statesf,
compression="bz2",
sep="\s+",
sep=r"\s+",
usecols=usecol,
names=names,
convert=True,
)
except:
dat = vaex.read_csv(
statesf, sep="\s+", usecols=usecol, names=names, convert=True
statesf, sep=r"\s+", usecols=usecol, names=names, convert=True
)
elif engine == "csv":
try:
dat = pd.read_csv(
statesf, compression="bz2", sep="\s+", usecols=usecol, names=names
statesf, compression="bz2", sep=r"\s+", usecols=usecol, names=names
)
except: #!!!TODO What was the expected error?
dat = pd.read_csv(statesf, sep="\s+", usecols=usecol, names=names)
dat = pd.read_csv(statesf, sep=r"\s+", usecols=usecol, names=names)
else:
raise NotImplementedError(engine)

Expand Down Expand Up @@ -380,7 +380,7 @@ def read_broad(broadf):
"""
bdat = pd.read_csv(
broadf,
sep="\s+",
sep=r"\s+",
names=(
"code",
"alpha_ref",
Expand Down
6 changes: 3 additions & 3 deletions radis/io/hdf5.py
Expand Up @@ -262,7 +262,6 @@ def load(
if key == "default":
key = r"/table"

import h5py
import vaex

# Open file
Expand Down Expand Up @@ -300,7 +299,6 @@ def load(
# TODO: define default key ?
if key == "default":
key = None
import h5py

with h5py.File(fname, "r") as f:
if key is None: # load from root level
Expand Down Expand Up @@ -446,7 +444,9 @@ def read_metadata(self, fname: str, key="default") -> dict:
raise err

else:
raise NotImplementedError(self.engine)
raise NotImplementedError(
f"'{self.engine}' is not implemented. Use 'pytables' or 'vaex' ?"
)

return metadata

Expand Down

0 comments on commit 8605d64

Please sign in to comment.