Skip to content

Commit

Permalink
Scientific notation (#110)
Browse files Browse the repository at this point in the history
* Working on rules for sn identification

* Refine regex

Adding tests

* Build update

Adding latest pythons and removing mac-os because it's
complaining. `Version 3.7 with arch x64 not found`

* Make '3.10' explicit

Build treated this as 3.1

* Add Number to ALL import

So this field gets brought in with others on a
* import.

* Add readthedocs config

Required to build.

* Remove comments

* Add sphinx requirements

* Ignore cleanup errors for temp directories

We don't really care about the details of why this isn't working I don't think. Our runner is ephemeral so who cares if something goes weird with the test cleanup?

* Revert "Ignore cleanup errors for temp directories"

This reverts commit d749102.

* Conditionally ignore cleanup errors in newer versions of python tests

* Mark extra function as a fixture....

* Revert "Mark extra function as a fixture...."

This reverts commit 29361b4.

* Revert "Conditionally ignore cleanup errors in newer versions of python tests"

This reverts commit 2a9d47a.

* Revert "Revert "Ignore cleanup errors for temp directories""

This reverts commit 134997d.

* Revert "Ignore cleanup errors for temp directories"

This reverts commit d749102.

* Add exception handling to usage of tempfile.TemporaryDirectory() (#111)

I couldn't replicate this locally at all (something specific to the runner?).

I eventually found this [pytest issue](pytest-dev/pytest#7491) that discusses
similar(ish) behaviors. This lead me to attempt to catch the PermissionError exception and essentially ignore it.

I'm not sure if this is a good approach, but it appears to resolve the issue.

Co-authored-by: Jacob Campbell <jacob.campbell@hca.wa.gov>

* whitespace

* bump

---------

Co-authored-by: Jacob Campbell <jacob.campbell@hca.wa.gov>
Co-authored-by: Snotpus <jacobl.campbell@gmail.com>
  • Loading branch information
3 people committed May 11, 2024
1 parent 8febb78 commit a66f210
Show file tree
Hide file tree
Showing 9 changed files with 121 additions and 34 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/python_versions_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ jobs:
strategy:
max-parallel: 4
matrix:
python-version: [3.7, 3.8]
os: [ ubuntu-latest, macos-latest, windows-latest ]
python-version: [3.7, 3.8, 3.9, '3.10', 3.11]
os: [ ubuntu-latest, windows-latest ]

steps:
- uses: actions/checkout@v1
Expand Down
15 changes: 15 additions & 0 deletions .readthedocs.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Read the Docs configuration file for Sphinx projects
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
version: 2

build:
os: ubuntu-22.04
tools:
python: "3.11"

sphinx:
configuration: docs/source/conf.py

python:
install:
- requirements: docs/requirements.txt
1 change: 1 addition & 0 deletions docs/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
sphinx-rtd-theme
2 changes: 1 addition & 1 deletion rumydata/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,4 @@
from rumydata.menu import menu
from rumydata.table import *

__version__ = '1.4.3'
__version__ = '1.5.0'
32 changes: 31 additions & 1 deletion rumydata/field.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@
from rumydata._base import _BaseSubject
from rumydata.rules import cell as clr, column as cr

__all__ = ['Text', 'Date', 'Currency', 'Digit', 'Integer', 'Choice', 'Ignore', 'Empty']
__all__ = ['Text', 'Date', 'Currency', 'Digit', 'Integer', 'Choice', 'Ignore',
'Empty', 'Number']


class Field(_BaseSubject):
Expand Down Expand Up @@ -317,6 +318,35 @@ def __init__(self, max_length, min_length=None, **kwargs):
self.rules.append(clr.MinDigit(min_length))


class Number(Field):
"""
Number field
A value made up entirely of digits (numbers). A whole number.
:param max_length: the maximum number of digits
:param min_length: (optional) the minimum number of digits
:param allow_scientific: (optional) whether to allow scientific notation. Defaults to False.
"""
_default_args = (1,)

def __init__(self, max_length, min_length=None, allow_scientific=False, **kwargs):
super().__init__(**kwargs)

self.descriptors['Type'] = 'Numeric'
self.descriptors['Format'] = f'{"9" * max_length}.{"0"}'
self.descriptors['Max Length'] = f'{str(max_length)} digits'

self.rules.append(clr.CanBeFloat())
if allow_scientific is False:
self.rules.append(clr.NoScientific())
self.rules.append(clr.MaxDigit(max_length))

if min_length:
self.descriptors['Min Length'] = f'{str(max_length)} digits'
self.rules.append(clr.MinDigit(min_length))


class Choice(Field):
"""
Choice field
Expand Down
16 changes: 15 additions & 1 deletion rumydata/rules/cell.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
'DateRule', 'CanBeDateIso', 'DateGT', 'DateGTE', 'DateET', 'DateLTE',
'DateLT', 'GreaterThanColumn', 'NotNullIfCompare', 'GreaterThanOrEqualColumn',
'OtherMustExist', 'OtherCantExist', 'LessThanColumn', 'LessThanOrEqualColumn',
'NotNullIfOtherEquals',
'NotNullIfOtherEquals', 'NoScientific', 'CanBeFloat',
'make_static_cell_rule'
]

Expand Down Expand Up @@ -255,6 +255,20 @@ def _explain(self) -> str:
return 'cannot have a leading zero digit'


class NoScientific(Rule):
"""
Cell no scientific notation.
Ensure that there are no scientific notation characters in the cell.
"""

def _evaluator(self):
return lambda x: bool(re.fullmatch(r'^([+\-\d])[0-9.]*[eE+\-]{1,2}.*$', x)) is False

def _explain(self) -> str:
return 'cannot have scientific notation'


class CanBeFloat(Rule):
""" Cell can be float Rule """

Expand Down
8 changes: 5 additions & 3 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@

@pytest.fixture()
def tmpdir():
with tempfile.TemporaryDirectory() as d:
yield Path(d)

try:
with tempfile.TemporaryDirectory() as d:
yield Path(d)
except PermissionError:
pass

@pytest.fixture()
def basic() -> dict:
Expand Down
22 changes: 22 additions & 0 deletions tests/test_rules_cell.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,15 @@ def test_no_leading_zero(value: str, expected: bool):
@pytest.mark.parametrize('value,expected', [
('1', True),
('0', True),
('0.1', True),
('a', False),
('+3', True),
('3.2e23', True),
('-4.70e+9', True),
('-.2E-4', True),
('-7.6603', True),
('+0003 ', True),
('37.e88', True)
])
def test_can_be_float(value: str, expected: bool):
r = CanBeFloat()
Expand Down Expand Up @@ -475,3 +483,17 @@ def test_other_must_exist_if_equals(row, other, values, expected):
def test_non_trim(value, expected):
r = NonTrim()
assert r._evaluator()(*r._prepare(value)) is expected


@pytest.mark.parametrize('value, expected', [
('+3', True),
('3.2e23', False),
('-4.70e+9', False),
('-.2E-4', False),
('-7.6603', True),
('+0003 ', True),
('37.e88', False)
])
def test_non_scientific(value, expected):
r = NoScientific()
assert r._evaluator()(*r._prepare(value)) is expected
55 changes: 29 additions & 26 deletions tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from typing import List, Union, Tuple
from unittest.mock import DEFAULT


import openpyxl

from rumydata import Layout, CsvFile, ExcelFile
Expand All @@ -25,32 +26,34 @@ def func(*args):
def file_row_harness(row: List[Union[str, int]], layout: dict):
""" Write row to file for testing in ingest """
lay = Layout(layout, no_header=True)

with tempfile.TemporaryDirectory() as d:
csv_p = Path(d, 'file_test.csv')

with csv_p.open('w', newline='') as f:
writer = csv.writer(f)
writer.writerow(row)

xl_p = Path(d, 'excel_test.xlsx')
wb = openpyxl.Workbook()
sheet = wb['Sheet']
for ix, value in enumerate(row, start=1):
sheet.cell(row=1, column=ix).value = value
wb.save(filename=xl_p)

to_check = [
('CsvFile', CsvFile(lay), csv_p),
('ExcelFile', ExcelFile(lay), xl_p)
]
aes = {}
for nm, obj, p in to_check:
try:
assert not obj.check(p)
except AssertionError as e:
aes[nm] = e
new_line = '\n'
try:
with tempfile.TemporaryDirectory() as d:
csv_p = Path(d, 'file_test.csv')

with csv_p.open('w', newline='') as f:
writer = csv.writer(f)
writer.writerow(row)

xl_p = Path(d, 'excel_test.xlsx')
wb = openpyxl.Workbook()
sheet = wb['Sheet']
for ix, value in enumerate(row, start=1):
sheet.cell(row=1, column=ix).value = value
wb.save(filename=xl_p)

to_check = [
('CsvFile', CsvFile(lay), csv_p),
('ExcelFile', ExcelFile(lay), xl_p)
]
aes = {}
for nm, obj, p in to_check:
try:
assert not obj.check(p)
except AssertionError as e:
aes[nm] = e
new_line = '\n'
assert not aes, f'Write test failed for:\n {new_line.join([f"{k}:{v}" for k, v in aes.items()])}'
except PermissionError:
assert not aes, f'Write test failed for:\n {new_line.join([f"{k}:{v}" for k, v in aes.items()])}'


Expand Down

0 comments on commit a66f210

Please sign in to comment.