Merge pull request #336 from evaline-ju/int-threshold

🥅 Allow int thresholds
caikit · Mar 13, 2024 · f55b082 · f55b082
2 parents 01526aa + f2f8380
commit f55b082
Show file tree

Hide file tree

Showing 2 changed files with 20 additions and 7 deletions.
diff --git a/caikit_nlp/modules/token_classification/filtered_span_classification.py b/caikit_nlp/modules/token_classification/filtered_span_classification.py
@@ -17,7 +17,7 @@
 At this time this module is only designed for inference"""
 
 # Standard
-from typing import Iterable, List, Optional
+from typing import Iterable, List, Optional, Union
 import os
 
 # First Party
@@ -122,22 +122,24 @@ def __init__(
 
     @TokenClassificationTask.taskmethod()
     def run(
-        self, text: str, threshold: Optional[float] = None
+        self, text: str, threshold: Optional[Union[float, int]] = None
     ) -> TokenClassificationResults:
         """Run classification on text split into spans. Returns results
         based on score threshold for labels that are to be outputted
 
         Args:
             text: str
                 Document to run classification on
-            threshold: float
+            threshold: float | int
                 (Optional) Threshold based on which to return score results
 
         Returns:
             TokenClassificationResults
         """
         error.type_check("<NLP82129006E>", str, text=text)
-        error.type_check("<NLP01414077E>", float, allow_none=True, threshold=threshold)
+        error.type_check(
+            "<NLP01414077E>", float, int, allow_none=True, threshold=threshold
+        )
 
         if threshold is None:
             threshold = self.default_threshold
@@ -189,7 +191,7 @@ def run(
 
     @TokenClassificationTask.taskmethod(input_streaming=True, output_streaming=True)
     def run_bidi_stream(
-        self, text_stream: Iterable[str], threshold: Optional[float] = None
+        self, text_stream: Iterable[str], threshold: Optional[Union[float, int]] = None
     ) -> Iterable[TokenClassificationStreamResult]:
         """Run bi-directional streaming inferencing for this module.
         Run classification on text split into spans. Returns results
@@ -198,13 +200,15 @@ def run_bidi_stream(
         Args:
             text_stream: Iterable[str]
                 Text stream to run classification on
-            threshold: float
+            threshold: float | int
                 (Optional) Threshold based on which to return score results
 
         Returns:
             Iterable[TokenClassificationStreamResult]
         """
-        error.type_check("<NLP96166348E>", float, allow_none=True, threshold=threshold)
+        error.type_check(
+            "<NLP96166348E>", float, int, allow_none=True, threshold=threshold
+        )
         # TODO: For optimization implement window based approach.
         if threshold is None:
             threshold = self.default_threshold

diff --git a/tests/modules/token_classification/test_filtered_span_classification.py b/tests/modules/token_classification/test_filtered_span_classification.py
@@ -135,6 +135,15 @@ def test_bootstrap_run_with_threshold():
     )  # 4 (all) results over 0.0 expected
 
 
+def test_bootstrap_run_with_int_threshold():
+    """Check if we can bootstrap span classification models with overriden int threshold"""
+    token_classification_result = BOOTSTRAPPED_MODEL.run(DOCUMENT, threshold=0)
+    assert isinstance(token_classification_result, TokenClassificationResults)
+    assert (
+        len(token_classification_result.results) == 4
+    )  # 4 (all) results over 0 expected
+
+
 def test_bootstrap_run_with_optional_labels_to_output():
     """Check if we can run span classification models with labels_to_output"""
     model = FilteredSpanClassification.bootstrap(