Skip to content

Commit

Permalink
Merge pull request #291 from lovetox/typing
Browse files Browse the repository at this point in the history
Tests: Add type hints
  • Loading branch information
TahirJalilov committed May 15, 2024
2 parents cf2494c + ff78b93 commit a1332a6
Show file tree
Hide file tree
Showing 10 changed files with 90 additions and 77 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/pythonTests.yml
Expand Up @@ -42,7 +42,7 @@ jobs:

- name: Install Dependencies
run: |
python -m pip install .
python -m pip install .[dev]
- name: Pyright
uses: jakebailey/pyright-action@v2
3 changes: 0 additions & 3 deletions pyproject.toml
Expand Up @@ -67,8 +67,5 @@ exclude = [
".git",
".venv",
"build",
"docs",
"example",
"tests",
"utils",
]
30 changes: 13 additions & 17 deletions tests/test_analyze.py
Expand Up @@ -15,44 +15,57 @@ def test_analyze():

result = list(emoji.analyze('abc\U0001F472'))
assert len(result) == 1
assert not isinstance(result[0].value, str)
assert result[0].value.emoji == '\U0001F472'

result = list(emoji.analyze('abc\U0001F472', non_emoji=True))
assert result[0].value == 'a'
assert not isinstance(result[3].value, str)
assert result[3].value.emoji == '\U0001F472'

result = list(emoji.analyze('\U0001F477\U0001F3FB\U0000200D\U00002640'))
assert len(result) == 1
assert not isinstance(result[0].value, str)
assert result[0].value.emoji == '\U0001F477\U0001F3FB\U0000200D\U00002640'

result = list(emoji.analyze('\U0001F477\U0001F3FC\U0001F477\U0001F3FB\U0000200D\U00002640'))
assert len(result) == 2
assert not isinstance(result[0].value, str)
assert result[0].value.emoji == '\U0001F477\U0001F3FC'
assert not isinstance(result[1].value, str)
assert result[1].value.emoji == '\U0001F477\U0001F3FB\U0000200D\U00002640'


def test_analyze_non_rgi_zwj():
result = list(emoji.analyze('\U0001F468\U0001F3FF\U0000200D\U0001F469\U0001F3FB\U0000200D\U0001F467\U0001F3FD'))
assert len(result) == 1
assert not isinstance(result[0].value, str)
assert result[0].value.emoji == '\U0001F468\U0001F3FF\U0000200D\U0001F469\U0001F3FB\U0000200D\U0001F467\U0001F3FD'

result = list(emoji.analyze('\U0001F468\U0001F3FF\U0000200D\U0001F469\U0001F3FB\U0000200D\U0001F467\U0001F3FD', join_emoji=False))
assert len(result) == 3
assert not isinstance(result[0].value, str)
assert result[0].value.emoji == '\U0001F468\U0001F3FF'
assert not isinstance(result[1].value, str)
assert result[1].value.emoji == '\U0001F469\U0001F3FB'
assert not isinstance(result[2].value, str)
assert result[2].value.emoji == '\U0001F467\U0001F3FD'

result = list(emoji.analyze('\U0001F468\U0001F3FF\U0000200D\U0001F469\U0001F3FB\U0000200D\U0001F467\U0001F3FDx', join_emoji=False, non_emoji=True))
assert len(result) == 6
assert not isinstance(result[0].value, str)
assert result[0].value.emoji == '\U0001F468\U0001F3FF'
assert result[1].value == '\U0000200D'
assert not isinstance(result[2].value, str)
assert result[2].value.emoji == '\U0001F469\U0001F3FB'
assert result[3].value == '\U0000200D'
assert not isinstance(result[4].value, str)
assert result[4].value.emoji == '\U0001F467\U0001F3FD'
assert result[5].value == 'x'

result = list(emoji.analyze('\U0001F468\U0001F3FF\U0000200D\U0001F469\U0001F3FB\U0000200D\U0001F467\U0001F3FDx', join_emoji=True, non_emoji=True))
assert len(result) == 2
assert not isinstance(result[0].value, str)
assert result[0].value.emoji == '\U0001F468\U0001F3FF\U0000200D\U0001F469\U0001F3FB\U0000200D\U0001F467\U0001F3FD'
assert result[1].value == 'x'

Expand All @@ -72,23 +85,6 @@ def test_analyze_non_rgi_zwj():
assert isinstance(result[3].value, emoji.EmojiMatch)


def test_emoji_match():
s = 'a\U0001F309b'
token = next(emoji.analyze(s))
assert isinstance(token, emoji.Token)

assert token.chars == s[1:-1]

match = token.value

assert isinstance(match, emoji.EmojiMatch)
assert match.emoji == s[1:-1]
assert match.start == 1
assert match.end == 2
assert match.is_zwj() == False
assert str(match).startswith('EmojiMatch(')


def test_emoji_match():
s = 'a\U0001F309b'
token = next(emoji.analyze(s))
Expand Down
78 changes: 47 additions & 31 deletions tests/test_core.py
Expand Up @@ -2,17 +2,21 @@

import random
import re
import emoji
from typing import Any, Callable, Dict, List, Tuple, Union
from typing_extensions import Literal
import emoji.unicode_codes
import pytest
import unicodedata

_NormalizationForm = Literal['NFC', 'NFD', 'NFKC', 'NFKD']

# Build all language packs (i.e. fill the cache):
emoji.emojize("", language="alias")
for lang_code in emoji.LANGUAGES:
emoji.emojize("", language=lang_code)


def ascii(s):
def ascii(s: str) -> str:
# return escaped Code points \U000AB123
return s.encode("unicode-escape").decode()

Expand All @@ -24,13 +28,13 @@ def all_language_and_alias_packs():
yield (lang_code, emoji.unicode_codes.get_emoji_unicode_dict(lang_code))


def normalize(form, s):
def normalize(form: _NormalizationForm, s: str) -> str:
return unicodedata.normalize(form, s)


def test_emojize_name_only():
# Check that the regular expression emoji.core._EMOJI_NAME_PATTERN contains all the necesseary characters
from emoji.core import _EMOJI_NAME_PATTERN
from emoji.core import _EMOJI_NAME_PATTERN # pyright: ignore [reportPrivateUsage]

pattern = re.compile('[^%s]' % (_EMOJI_NAME_PATTERN, ))

Expand Down Expand Up @@ -59,7 +63,7 @@ def test_emojize_name_only():

def test_regular_expression_minimal():
# Check that the regular expression emoji.core._EMOJI_NAME_PATTERN only contains the necesseary characters
from emoji.core import _EMOJI_NAME_PATTERN
from emoji.core import _EMOJI_NAME_PATTERN # pyright: ignore [reportPrivateUsage]

pattern_str = '[^%s]' % (_EMOJI_NAME_PATTERN, )
i = 2
Expand All @@ -70,7 +74,7 @@ def test_regular_expression_minimal():
continue
pattern = re.compile(pattern_str.replace(c, ''))
failed = False
for lang_code, emoji_pack in all_language_and_alias_packs():
for _, emoji_pack in all_language_and_alias_packs():
for name_in_db in emoji_pack.keys():
name_in_db = name_in_db[1:-1]
names = [
Expand Down Expand Up @@ -112,45 +116,46 @@ def test_emojize_complicated_string():


def test_emojize_languages():
for lang_code, emoji_pack in emoji.unicode_codes._EMOJI_UNICODE.items():
for lang_code, emoji_pack in emoji.unicode_codes._EMOJI_UNICODE.items(): # pyright: ignore [reportPrivateUsage]
for name, emj in emoji_pack.items():
assert emoji.emojize(name, language=lang_code) == emj


def test_demojize_languages():
for lang_code, emoji_pack in emoji.unicode_codes._EMOJI_UNICODE.items():
for lang_code, emoji_pack in emoji.unicode_codes._EMOJI_UNICODE.items(): # pyright: ignore [reportPrivateUsage]
for name, emj in emoji_pack.items():
assert emoji.demojize(emj, language=lang_code) == name


def test_emojize_variant():
def remove_variant(s): return re.sub('[\ufe0e\ufe0f]$', '', s)
def remove_variant(s: str) -> str:
return re.sub('[\ufe0e\ufe0f]$', '', s)

assert emoji.emojize(
':Taurus:', variant=None) == emoji.unicode_codes._EMOJI_UNICODE['en'][':Taurus:']
':Taurus:', variant=None) == emoji.unicode_codes._EMOJI_UNICODE['en'][':Taurus:'] # pyright: ignore [reportPrivateUsage]
assert emoji.emojize(':Taurus:', variant=None) == emoji.emojize(':Taurus:')
assert emoji.emojize(':Taurus:', variant='text_type') == remove_variant(
emoji.unicode_codes._EMOJI_UNICODE['en'][':Taurus:']) + '\ufe0e'
emoji.unicode_codes._EMOJI_UNICODE['en'][':Taurus:']) + '\ufe0e' # pyright: ignore [reportPrivateUsage]
assert emoji.emojize(':Taurus:', variant='emoji_type') == remove_variant(
emoji.unicode_codes._EMOJI_UNICODE['en'][':Taurus:']) + '\ufe0f'
emoji.unicode_codes._EMOJI_UNICODE['en'][':Taurus:']) + '\ufe0f' # pyright: ignore [reportPrivateUsage]

assert emoji.emojize(
':admission_tickets:', variant=None) == emoji.unicode_codes._EMOJI_UNICODE['en'][':admission_tickets:']
':admission_tickets:', variant=None) == emoji.unicode_codes._EMOJI_UNICODE['en'][':admission_tickets:'] # pyright: ignore [reportPrivateUsage]
assert emoji.emojize(':admission_tickets:', variant=None) == emoji.emojize(
':admission_tickets:')
assert emoji.emojize(':admission_tickets:', variant='text_type') == remove_variant(
emoji.unicode_codes._EMOJI_UNICODE['en'][':admission_tickets:']) + '\ufe0e'
emoji.unicode_codes._EMOJI_UNICODE['en'][':admission_tickets:']) + '\ufe0e' # pyright: ignore [reportPrivateUsage]
assert emoji.emojize(':admission_tickets:', variant='emoji_type') == remove_variant(
emoji.unicode_codes._EMOJI_UNICODE['en'][':admission_tickets:']) + '\ufe0f'
emoji.unicode_codes._EMOJI_UNICODE['en'][':admission_tickets:']) + '\ufe0f' # pyright: ignore [reportPrivateUsage]

with pytest.raises(ValueError):
emoji.emojize(':admission_tickets:', variant=False)
emoji.emojize(':admission_tickets:', variant=False) # pyright: ignore [reportArgumentType]

with pytest.raises(ValueError):
emoji.emojize(':admission_tickets:', variant=True)
emoji.emojize(':admission_tickets:', variant=True) # pyright: ignore [reportArgumentType]

with pytest.raises(ValueError):
emoji.emojize(':admission_tickets:', variant='wrong')
emoji.emojize(':admission_tickets:', variant='wrong') # pyright: ignore [reportArgumentType]

assert emoji.emojize(":football:") == ':football:'
assert emoji.emojize(":football:", variant="text_type") == ':football:'
Expand Down Expand Up @@ -198,11 +203,11 @@ def test_emojize_version():
assert emoji.emojize("Biking :man_biking: is in 4.0", version=3.0, handle_version=lambda e, data: '<emoji>') == "Biking <emoji> is in 4.0"
assert emoji.emojize("Biking :man_biking: is in 4.0", version=3.0, handle_version=lambda e, data: data["fr"]) == "Biking :cycliste_homme: is in 4.0"

def f(emj, data):
def f(emj: str, data: Dict[str, str]) -> str:
assert data['E'] == 5
return ''

assert emoji.emojize(':bowl_with_spoon:', version=-
1, handle_version=f) == ''
assert emoji.emojize(':bowl_with_spoon:', version=-1, handle_version=f) == ''
assert emoji.emojize(':bowl_with_spoon:') == '\U0001F963'
assert emoji.emojize(':bowl_with_spoon:', version=4) == ''
assert emoji.emojize(':bowl_with_spoon:', version=4.9) == ''
Expand Down Expand Up @@ -348,7 +353,7 @@ def test_replace_emoji():
assert emoji.replace_emoji('Hello 🇫🇷👌') == 'Hello '
assert emoji.replace_emoji('Hello 🇫🇷👌', 'x') == 'Hello xx'

def replace(emj, data):
def replace(emj: str, data: Dict[str, str]) -> str:
assert emj in ["🇫🇷", "👌"]
return 'x'
assert emoji.replace_emoji('Hello 🇫🇷👌', replace) == 'Hello xx'
Expand All @@ -373,7 +378,7 @@ def test_long_emoji():


def test_untranslated():
for emj, item in emoji.EMOJI_DATA.items():
for item in emoji.EMOJI_DATA.values():
if item['status'] != emoji.STATUS['fully_qualified']:
continue
if 'es' not in item:
Expand Down Expand Up @@ -408,9 +413,16 @@ def test_text():
Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
"""

def add_random_emoji(text, lst, select=lambda emj_data: emj_data['en']):
def default_select(emj_data: Dict[str, Any]) -> str:
return emj_data['en']

def add_random_emoji(
text: str,
lst: List[Tuple[str, Dict[str, Any]]],
select: Callable[[Dict[str, Any]], Union[str, Literal[False]]] = default_select
) -> Tuple[str, str, List[str]]:

emoji_list = []
emoji_list: List[str] = []
text_with_unicode = ""
text_with_placeholder = ""
for i in range(0, len(text), 10):
Expand Down Expand Up @@ -439,7 +451,7 @@ def add_random_emoji(text, lst, select=lambda emj_data: emj_data['en']):

return text_with_unicode, text_with_placeholder, emoji_list

def clean(s):
def clean(s: str) -> str:
return s.replace('\u200d', '').replace('\ufe0f', '')

all_emoji_list = list(emoji.EMOJI_DATA.items())
Expand All @@ -456,8 +468,10 @@ def clean(s):
assert lis['emoji'] == emoji_list[i]

# qualified emoji from "es"
selector = lambda emoji_data: emoji_data["es"] if "es" in emoji_data else False
text_with_unicode, text_with_placeholder, emoji_list = add_random_emoji(text, qualified_emoji_list, selector)
def select_es(emj_data: Dict[str, Any]) -> Union[str, Literal[False]]:
return emj_data["es"] if "es" in emj_data else False

text_with_unicode, text_with_placeholder, emoji_list = add_random_emoji(text, qualified_emoji_list, select=select_es)
assert emoji.demojize(text_with_unicode, language="es") == text_with_placeholder
assert emoji.emojize(text_with_placeholder, language="es") == text_with_unicode
if not UCS2:
Expand All @@ -467,8 +481,10 @@ def clean(s):
assert lis['emoji'] == emoji_list[i]

# qualified emoji from "alias"
selector = lambda emoji_data: emoji_data["alias"][0] if "alias" in emoji_data else False
text_with_unicode, text_with_placeholder, emoji_list = add_random_emoji(text, qualified_emoji_list, selector)
def select_alias(emj_data: Dict[str, Any]) -> Union[str, Literal[False]]:
return emj_data["alias"][0] if "alias" in emj_data else False

text_with_unicode, text_with_placeholder, emoji_list = add_random_emoji(text, qualified_emoji_list, select=select_alias)
assert emoji.demojize(text_with_unicode, language="alias") == text_with_placeholder
assert emoji.emojize(text_with_placeholder, language="alias") == text_with_unicode
if not UCS2:
Expand All @@ -490,7 +506,7 @@ def clean(s):

def test_text_multiple_times():
# Run test_text() multiple times because it relies on a random text
for i in range(100):
for _ in range(100):
test_text()


Expand Down
6 changes: 3 additions & 3 deletions tests/test_dict.py
Expand Up @@ -8,7 +8,7 @@
def test_all_languages_list():
"""Compare all language keys in EMOJI_DATA with the emoji.LANGUAGES list"""

langs = set()
langs: set[str] = set()
for item in emoji.EMOJI_DATA.values():
langs.update(item.keys())
all_languages = {lang for lang in langs if len(lang) == 2 and lang.lower() == lang}
Expand All @@ -25,10 +25,10 @@ def test_emoji_versions():
assert v >= 0.6


def check_duplicate_names(lang):
def check_duplicate_names(lang: str):
"""Check that there are no duplicate names in the fully_qualified except for different variants"""
seen = {}
for emj, item in emoji.EMOJI_DATA.items():
for item in emoji.EMOJI_DATA.values():
if item["status"] > emoji.STATUS["fully_qualified"]:
continue

Expand Down
7 changes: 5 additions & 2 deletions tests/test_nfkc.py
@@ -1,11 +1,14 @@
"""Unittests for canonically equivalent Unicode sequences"""

import sys
import emoji
import unicodedata
import emoji
from typing_extensions import Literal


_NormalizationForm = Literal['NFC', 'NFD', 'NFKC', 'NFKD']

def is_normalized(form, s):
def is_normalized(form: _NormalizationForm, s: str) -> bool:
if sys.version_info >= (3, 8):
return unicodedata.is_normalized(form, s)
else:
Expand Down
14 changes: 6 additions & 8 deletions tests/test_unicode_codes.py
@@ -1,8 +1,6 @@
"""Unittests for emoji.unicode_codes."""


import emoji

import emoji.unicode_codes

# Build all language packs (i.e. fill the cache):
emoji.emojize("", language="alias")
Expand All @@ -13,8 +11,8 @@
def test_emoji_english_names():

for language, group in (
('en', emoji.unicode_codes._EMOJI_UNICODE['en']),
('alias', emoji.unicode_codes._ALIASES_UNICODE)
('en', emoji.unicode_codes._EMOJI_UNICODE['en']), # pyright: ignore [reportPrivateUsage]
('alias', emoji.unicode_codes._ALIASES_UNICODE) # pyright: ignore [reportPrivateUsage]
):
for name, ucode in group.items():
assert name.startswith(':') and name.endswith(':') and len(name) >= 3
Expand All @@ -26,14 +24,14 @@ def test_compare_normal_and_aliases():
# There should always be more aliases than normal codes
# since the aliases contain the normal codes

assert len(emoji.unicode_codes._EMOJI_UNICODE['en']) < len(
emoji.unicode_codes._ALIASES_UNICODE)
assert len(emoji.unicode_codes._EMOJI_UNICODE['en']) < len( # pyright: ignore [reportPrivateUsage]
emoji.unicode_codes._ALIASES_UNICODE) # pyright: ignore [reportPrivateUsage]


def test_no_alias_duplicates():
# There should not be two emoji with the same alias
# (aliases still can be the same as another 'en'-name)
all_aliases = set()
all_aliases: set[str] = set()
for data in emoji.EMOJI_DATA.values():
if data['status'] <= emoji.STATUS['fully_qualified'] and 'alias' in data:
for alias in data['alias']:
Expand Down

0 comments on commit a1332a6

Please sign in to comment.