jawah · Ousret · May 3, 2022 · Apr 30, 2022 · Apr 30, 2022 · Apr 30, 2022
diff --git a/charset_normalizer/cd.py b/charset_normalizer/cd.py
@@ -175,9 +175,10 @@ def characters_popularity_compare(
         raise ValueError("{} not available".format(language))
 
     character_approved_count = 0  # type: int
-
+    FREQUENCIES_language_set = set(FREQUENCIES[language])
+
     for character in ordered_characters:
-        if character not in FREQUENCIES[language]:
+        if character not in FREQUENCIES_language_set:
             continue
 
         characters_before_source = FREQUENCIES[language][
@@ -186,24 +187,20 @@ def characters_popularity_compare(
         characters_after_source = FREQUENCIES[language][
             FREQUENCIES[language].index(character) :
         ]  # type: List[str]
-
         characters_before = ordered_characters[
             0 : ordered_characters.index(character)
         ]  # type: List[str]
         characters_after = ordered_characters[
             ordered_characters.index(character) :
         ]  # type: List[str]
-
-        before_match_count = [
-            e in characters_before for e in characters_before_source
-        ].count(
-            True
-        )  # type: int
-        after_match_count = [
-            e in characters_after for e in characters_after_source
-        ].count(
-            True
-        )  # type: int
+
+        before_match_count = len(
+            set(characters_before) & set(characters_before_source)
+            )  # type: int
+
+        after_match_count = len(
+            set(characters_after) & set(characters_after_source)
+            )  # type: int
 
         if len(characters_before_source) == 0 and before_match_count <= 4:
             character_approved_count += 1

diff --git a/charset_normalizer/md.py b/charset_normalizer/md.py
@@ -16,6 +16,7 @@
     is_separator,
     is_symbol,
     is_thai,
+    is_unprintable,
     remove_accent,
     unicode_range,
 )
@@ -137,13 +138,9 @@ def __init__(self) -> None:
 
     def eligible(self, character: str) -> bool:
         return True
-
+    
     def feed(self, character: str) -> None:
-        if (
-            character.isspace() is False  # includes \n \t \r \v
-            and character.isprintable() is False
-            and character != "\x1A"  # Why? Its the ASCII substitute character.
-        ):
+        if is_unprintable(character):
             self._unprintable_count += 1
         self._character_count += 1
 
@@ -167,7 +164,7 @@ def __init__(self) -> None:
 
     def eligible(self, character: str) -> bool:
         return character.isalpha() and is_latin(character)
-
+    
     def feed(self, character: str) -> None:
         self._character_count += 1
         if (
@@ -445,7 +442,7 @@ def ratio(self) -> float:
 
         return self._successive_upper_lower_count_final / self._character_count
 
-
+@lru_cache(maxsize=1024)
 def is_suspiciously_successive_range(
     unicode_range_a: Optional[str], unicode_range_b: Optional[str]
 ) -> bool:

diff --git a/charset_normalizer/utils.py b/charset_normalizer/utils.py
@@ -73,6 +73,7 @@ def is_latin(character: str) -> bool:
     return "LATIN" in description
 
 
+@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
 def is_ascii(character: str) -> bool:
     try:
         character.encode("ascii")
@@ -197,6 +198,16 @@ def is_unicode_range_secondary(range_name: str) -> bool:
     return any(keyword in range_name for keyword in UNICODE_SECONDARY_RANGE_KEYWORD)
 
 
+@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
+def is_unprintable(character: str) -> bool:
+    if (
+        character.isspace() is False  # includes \n \t \r \v
+        and character.isprintable() is False
+        and character != "\x1A"  # Why? Its the ASCII substitute character.
+        ):
+        return True
+    return False
+
 def any_specified_encoding(sequence: bytes, search_zone: int = 4096) -> Optional[str]:
     """
     Extract using ASCII-only decoder any specified encoding in the first n-bytes.

diff --git a/setup.py b/setup.py
@@ -7,6 +7,8 @@
 
 from setuptools import find_packages, setup
 
+from mypyc.build import mypycify
+
 
 def get_version():
     with open('charset_normalizer/version.py') as version_file:
@@ -51,6 +53,18 @@ def get_version():
     packages=find_packages(exclude=["tests", "*.tests", "*.tests.*", "tests.*"]),
     install_requires=REQUIRED,
     extras_require=EXTRAS,
+    ext_modules=mypycify([
+        'charset_normalizer/__init__.py',
+        'charset_normalizer/api.py',
+        'charset_normalizer/constant.py',
+        'charset_normalizer/cd.py',
+        'charset_normalizer/md.py',
+        'charset_normalizer/models.py',
+        'charset_normalizer/utils.py',
+        'charset_normalizer/assets/__init__.py',
+        'charset_normalizer/cli/normalizer.py'
+
+    ]),
     include_package_data=True,
     package_data={"charset_normalizer": ["py.typed"]},
     license='MIT',