Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fail_on_tie feature for process.extractOne #210

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
35 changes: 33 additions & 2 deletions fuzzywuzzy/process.py
Expand Up @@ -194,7 +194,31 @@ def extractBests(query, choices, processor=default_processor, scorer=default_sco
sorted(best_list, key=lambda i: i[1], reverse=True)


def extractOne(query, choices, processor=default_processor, scorer=default_scorer, score_cutoff=0):
def getUniqueMax(best_list):
"""Given a fuzzymatching generator computed by extractWithoutOrder, return
the best 2-tuple if the maximum score is unique, else raise a ValueError.
"""
runmax, runcount = 0, 0
for best_item in best_list:
score = best_item[1]

if score > runmax:
# Record maximum score and corresponding best_item
runmax = score
runcount = 1
match_out = best_item

elif score == runmax:
# Record duplicate max scores
runcount += 1

if runcount > 1:
raise ValueError('Best match is non-unique.')

return match_out


def extractOne(query, choices, processor=default_processor, scorer=default_scorer, score_cutoff=0, fail_on_tie=False):
"""Find the single best match above a score in a list of choices.

This is a convenience method which returns the single best choice.
Expand All @@ -210,14 +234,21 @@ def extractOne(query, choices, processor=default_processor, scorer=default_score
score_cutoff: Optional argument for score threshold. If the best
match is found, but it is not greater than this number, then
return None anyway ("not a good enough match"). Defaults to 0.
fail_on_tie: Optional argument. If True, return None in the case of a
scoring tie.

Returns:
A tuple containing a single match and its score, if a match
was found that was above score_cutoff. Otherwise, returns None.
If fail_on_tie is True, return None in the case of a tie.
"""
best_list = extractWithoutOrder(query, choices, processor, scorer, score_cutoff)

try:
return max(best_list, key=lambda i: i[1])
if fail_on_tie:
return getUniqueMax(best_list)
else:
return max(best_list, key=lambda i: i[1])
except ValueError:
return None

Expand Down
31 changes: 31 additions & 0 deletions test_fuzzywuzzy.py
Expand Up @@ -362,6 +362,26 @@ def testGetBestChoice4(self):
best = process.extractOne(query, self.baseball_strings)
self.assertEqual(best[0], self.baseball_strings[0])

def testGetBestChoiceFailOnTie1(self):
query = "new york mets at atlanta braves"
best = process.extractOne(query, self.baseball_strings, fail_on_tie=True)
self.assertEqual(best[0], "braves vs mets")

def testGetBestChoiceFailOnTie2(self):
query = "philadelphia phillies at atlanta braves"
best = process.extractOne(query, self.baseball_strings, fail_on_tie=True)
self.assertEqual(best[0], self.baseball_strings[2])

def testGetBestChoiceFailOnTie3(self):
query = "atlanta braves at philadelphia phillies"
best = process.extractOne(query, self.baseball_strings, fail_on_tie=True)
self.assertEqual(best, None)

def testGetBestChoiceFailOnTie4(self):
query = "chicago cubs vs new york mets"
best = process.extractOne(query, self.baseball_strings, fail_on_tie=True)
self.assertEqual(best[0], self.baseball_strings[0])

def testWithProcessor(self):
events = [
["chicago cubs vs new york mets", "CitiField", "2011-05-11", "8pm"],
Expand All @@ -373,6 +393,17 @@ def testWithProcessor(self):
best = process.extractOne(query, events, processor=lambda event: event[0])
self.assertEqual(best[0], events[0])

def testFailOnTieWithProcessor(self):
events = [
["chicago cubs vs new york mets", "CitiField", "2011-05-11", "8pm"],
["new york yankees vs boston red sox", "Fenway Park", "2011-05-11", "8pm"],
["atlanta braves vs pittsburgh pirates", "PNC Park", "2011-05-11", "8pm"],
]
query = ["new york mets vs chicago cubs", "CitiField", "2017-03-19", "8pm"],

best = process.extractOne(query, events, processor=lambda event: event[0], fail_on_tie=True)
self.assertEqual(best, None)

def testWithScorer(self):
choices = [
"new york mets vs chicago cubs",
Expand Down