-
-
Notifications
You must be signed in to change notification settings - Fork 559
/
test_feedparser_data.py
103 lines (92 loc) · 3.14 KB
/
test_feedparser_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import os
import re
try:
from rfc822 import Message
except ImportError:
# Python 3
from email import message_from_file as Message
import unittest
from lxml.tests.common_imports import doctest
from lxml.doctestcompare import LHTMLOutputChecker
try:
from lxml.html.clean import clean, Cleaner
html_clean_available = True
except ImportError:
html_clean_available = False
feed_dirs = [
os.path.join(os.path.dirname(__file__), 'feedparser-data'),
os.path.join(os.path.dirname(__file__), 'hackers-org-data'),
]
bar_re = re.compile(r"-----+")
class DummyInput:
def __init__(self, **kw):
for name, value in kw.items():
setattr(self, name, value)
class FeedTestCase(unittest.TestCase):
def __init__(self, filename):
self.filename = filename
unittest.TestCase.__init__(self)
def parse(self):
with open(self.filename) as f:
headers = Message(f)
c = f.read()
if not c.strip():
c = headers.get_payload()
if not headers.keys():
raise Exception(
"File %s has no headers" % self.filename)
self.description = headers['Description']
self.expect = headers.get('Expect', '')
self.ignore = headers.get('Ignore')
self.options = [
o.strip() for o in headers.get('Options', '').split(',')
if o.strip()]
parts = bar_re.split(c)
self.input = parts[0].rstrip() + '\n'
if parts[1:]:
self.expect = parts[1].rstrip() + '\n'
else:
self.expect = None
def runTest(self):
self.parse()
if self.ignore:
# We've marked this test to be ignored.
return
kw = {}
for name in self.options:
if name.startswith('-'):
kw[name[1:]] = False
else:
kw[name] = True
if kw.get('clean', True):
transformed = Cleaner(**kw).clean_html(self.input)
else:
transformed = self.input
assert self.expect is not None, (
"No expected output in %s" % self.filename)
checker = LHTMLOutputChecker()
if not checker.check_output(self.expect, transformed, 0):
result = checker.output_difference(
DummyInput(want=self.expect), transformed, 0)
#result += '\noptions: %s %r' % (', '.join(self.options), kw)
#result += repr(transformed)
raise Exception("\n"+result)
def shortDescription(self):
return self.filename
def test_suite():
suite = unittest.TestSuite()
if not html_clean_available:
print("Skipping tests in feedparser_data - external lxml_html_clean package is not installed")
return suite
for dir in feed_dirs:
for fn in os.listdir(dir):
fn = os.path.join(dir, fn)
if fn.endswith('.data'):
case = FeedTestCase(fn)
suite.addTests([case])
# This is my lazy way of stopping on first error:
try:
case.runTest()
except:
break
return suite