Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Xpath with namespace and position #353

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/lxml/_elementpath.py
Expand Up @@ -86,7 +86,7 @@ def xpath_tokenizer(pattern, namespaces=None):
yield ttype, "{%s}%s" % (namespaces[prefix], uri)
except KeyError:
raise SyntaxError("prefix %r not found in prefix map" % prefix)
elif default_namespace and not parsing_attribute:
elif default_namespace and not parsing_attribute and not tag.isdecimal():
yield ttype, "{%s}%s" % (default_namespace, tag)
else:
yield token
Expand Down
267 changes: 267 additions & 0 deletions src/lxml/tests/test_elementpath.py
Expand Up @@ -85,6 +85,25 @@ def test_tokenizer_predicates(self):
[('', 'a'), ('[', ''), ('.', ''), ('', ''), ('=', ''), ('', ''), ('"abc"', ''), (']', '')],
'a[. = "abc"]',
)
assert_tokens(
[('/', ''), ('', 'a'), ('/', ''), ('', 'b'), ('/', ''), ('', 'c'), ('[', ''), ('', '1'), (']', '')],
'/a/b/c[1]',
)
assert_tokens(
[('/', ''), ('', '{nsnone}a'), ('/', ''), ('', '{nsnone}b'), ('/', ''), ('', '{nsnone}c'), ('[', ''), ('', '1'), (']', '')],
'/a/b/c[1]',
{None:'nsnone'},
)
assert_tokens(
[('/', ''), ('', '{nsnone}a'), ('/', ''), ('', '{nsnone}b'), ('[', ''), ('', '2'), (']', ''), ('/', ''), ('', '{nsnone}c'), ('[', ''), ('', '1'), (']', '')],
'/a/b[2]/c[1]',
{None:'nsnone'},
)
assert_tokens(
[('/', ''), ('', '{nsnone}a'), ('/', ''), ('', '{nsnone}b'), ('[', ''), ('', '100'), (']', '')],
'/a/b[100]',
{None:'nsnone'}
)

def test_xpath_tokenizer(self):
# Test the XPath tokenizer. Copied from CPython's "test_xml_etree.py"
Expand Down Expand Up @@ -144,6 +163,18 @@ def check(p, expected, namespaces=None):
check("@{ns}attr", ['@', '{ns}attr'],
{'': 'http://www.w3.org/2001/XMLSchema',
'ns': 'http://www.w3.org/2001/XMLSchema'})
check("/doc/section[2]",
['/', '{http://www.w3.org/2001/XMLSchema}doc', '/', '{http://www.w3.org/2001/XMLSchema}section', '[', '2', ']'],
{"":"http://www.w3.org/2001/XMLSchema"}
)
check("/doc/section[2]",
['/', '{http://www.w3.org/2001/XMLSchema}doc', '/', '{http://www.w3.org/2001/XMLSchema}section', '[', '2', ']'],
{None:"http://www.w3.org/2001/XMLSchema"}
)
check("/ns:doc/ns:section[2]",
['/', '{http://www.w3.org/2001/XMLSchema}doc', '/', '{http://www.w3.org/2001/XMLSchema}section', '[', '2', ']'],
{"ns":"http://www.w3.org/2001/XMLSchema"}
)

def test_find(self):
"""
Expand Down Expand Up @@ -271,6 +302,15 @@ def test_find(self):
self.assertEqual(summarize_list(etree.ElementTree(elem).findall("./tag")),
['tag', 'tag'])

# use find with path with position index
self.assertEqual(elem.find("tag[1]").tag, "tag")
self.assertEqual(etree.ElementTree(elem).find("tag[1]").tag, "tag")
self.assertEqual(etree.ElementTree(elem).findtext("tag[1]"), "text")
self.assertEqual(summarize_list(elem.findall(".//tag[1]")), ["tag", "tag"])
self.assertEqual(
summarize_list(etree.ElementTree(elem).findall(".//tag[1]")), ["tag", "tag"]
)

# FIXME: ET's Path module handles this case incorrectly; this gives
# a warning in 1.3, and the behaviour will be modified in 1.4.
self.assertEqual(summarize_list(etree.ElementTree(elem).findall("/tag")),
Expand All @@ -285,6 +325,233 @@ def test_find(self):
self.assertEqual(summarize_list(elem.findall(".//tag[@class][@id]")),
['tag', 'tag'])

def test_find_with_namespaces(self):
elem = etree.XML(
"""
<body xmlns="nsnone">
<tag class='a'>text</tag>
<tag class='b'/>
<section>
<tag class='b' id='inner'>subtext</tag>
</section>
</body>
"""
)
namespaces = {None: "nsnone"}

self.assertEqual(elem.find("tag", namespaces=namespaces).tag, "{nsnone}tag")


self.assertEqual(
etree.ElementTree(elem).find("tag", namespaces=namespaces).tag, "{nsnone}tag"
)
self.assertEqual(elem.find("section/tag", namespaces=namespaces).tag, "{nsnone}tag")
self.assertEqual(
etree.ElementTree(elem).find("section/tag", namespaces=namespaces).tag,
"{nsnone}tag",
)
self.assertEqual(elem.findtext("tag", namespaces=namespaces), "text")
self.assertEqual(elem.findtext("tog", namespaces=namespaces), None)
self.assertEqual(elem.findtext("tog", "default", namespaces=namespaces), "default")
self.assertEqual(
etree.ElementTree(elem).findtext("tag", namespaces=namespaces), "text"
)
self.assertEqual(
etree.ElementTree(elem).findtext(
"tog", default="default", namespaces=namespaces
),
"default",
)
self.assertEqual(elem.findtext("section/tag", namespaces=namespaces), "subtext")
self.assertEqual(
etree.ElementTree(elem).findtext("section/tag", namespaces=namespaces),
"subtext",
)
self.assertEqual(elem.find("tag[1]", namespaces=namespaces).tag, "{nsnone}tag")
self.assertEqual(
etree.ElementTree(elem).find("tag[1]", namespaces=namespaces).tag, "{nsnone}tag"
)

self.assertEqual(elem.findtext("tag[1]", namespaces=namespaces), "text")
self.assertTrue(elem.findtext("tag[2]", namespaces=namespaces) in {None, ''})
self.assertEqual(
summarize_list(elem.findall("tag", namespaces=namespaces)),
["{nsnone}tag", "{nsnone}tag"],
)
Comment on lines +377 to +380
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given how many examples you add here, this seems worth its own custom assert method: .assertFindallEqual(element, path, expected, namespaces=None).

Also, do we actually need to add these tests? ISTM that we could get away with running the existing tests three times, once without namespaces dict, once with an empty one, and once with a non-empty one.

self.assertEqual(
summarize_list(elem.findall("*", namespaces=namespaces)),
["{nsnone}tag", "{nsnone}tag", "{nsnone}section"],
)
self.assertEqual(
summarize_list(elem.findall(".//tag", namespaces=namespaces)),
["{nsnone}tag", "{nsnone}tag", "{nsnone}tag"],
)
self.assertEqual(
summarize_list(elem.findall("section/tag", namespaces=namespaces)),
["{nsnone}tag"],
)
self.assertEqual(
summarize_list(elem.findall("section//tag", namespaces=namespaces)),
["{nsnone}tag"],
)

self.assertEqual(
summarize_list(elem.findall("section/*", namespaces=namespaces)),
["{nsnone}tag"],
)
self.assertEqual(
summarize_list(elem.findall("section//*", namespaces=namespaces)),
["{nsnone}tag"],
)
self.assertEqual(
summarize_list(elem.findall("section/.//*", namespaces=namespaces)),
["{nsnone}tag"],
)

self.assertEqual(
summarize_list(elem.findall("*/tag", namespaces=namespaces)), ["{nsnone}tag"]
)
self.assertEqual(
summarize_list(elem.findall("*/./tag", namespaces=namespaces)), ["{nsnone}tag"]
)
self.assertEqual(
summarize_list(elem.findall("./tag", namespaces=namespaces)),
["{nsnone}tag", "{nsnone}tag"],
)
self.assertEqual(
summarize_list(elem.findall(".//tag", namespaces=namespaces)),
["{nsnone}tag", "{nsnone}tag", "{nsnone}tag"],
)
self.assertEqual(
summarize_list(elem.findall("././tag", namespaces=namespaces)),
["{nsnone}tag", "{nsnone}tag"],
)

self.assertEqual(
summarize_list(elem.findall(".//tag[@class]", namespaces=namespaces)),
["{nsnone}tag", "{nsnone}tag", "{nsnone}tag"],
)
self.assertEqual(
summarize_list(elem.findall(".//tag[ @class]", namespaces=namespaces)),
["{nsnone}tag", "{nsnone}tag", "{nsnone}tag"],
)
self.assertEqual(
summarize_list(elem.findall(".//tag[@class ]", namespaces=namespaces)),
["{nsnone}tag", "{nsnone}tag", "{nsnone}tag"],
)
self.assertEqual(
summarize_list(elem.findall(".//tag[ @class ]", namespaces=namespaces)),
["{nsnone}tag", "{nsnone}tag", "{nsnone}tag"],
)
self.assertEqual(
summarize_list(elem.findall(".//tag[@class='a']", namespaces=namespaces)),
["{nsnone}tag"],
)
self.assertEqual(
summarize_list(elem.findall('.//tag[@class="a"]', namespaces=namespaces)),
["{nsnone}tag"],
)
self.assertEqual(
summarize_list(elem.findall(".//tag[@class='b']", namespaces=namespaces)),
["{nsnone}tag", "{nsnone}tag"],
)
self.assertEqual(
summarize_list(elem.findall('.//tag[@class="b"]', namespaces=namespaces)),
["{nsnone}tag", "{nsnone}tag"],
)
self.assertEqual(
summarize_list(elem.findall('.//tag[@class = "b"]', namespaces=namespaces)),
["{nsnone}tag", "{nsnone}tag"],
)
self.assertEqual(
summarize_list(elem.findall(".//tag[@id]", namespaces=namespaces)),
["{nsnone}tag"],
)
self.assertEqual(
summarize_list(elem.findall(".//tag[@class][@id]", namespaces=namespaces)),
["{nsnone}tag"],
)
self.assertEqual(
summarize_list(elem.findall(".//section[tag]", namespaces=namespaces)),
["{nsnone}section"],
)
self.assertEqual(
summarize_list(elem.findall(".//section[element]", namespaces=namespaces)), []
)

self.assertEqual(
summarize_list(
elem.findall(".//section[tag='subtext']", namespaces=namespaces)
),
["{nsnone}section"],
)
self.assertEqual(
summarize_list(
elem.findall(".//section[tag ='subtext']", namespaces=namespaces)
),
["{nsnone}section"],
)
self.assertEqual(
summarize_list(
elem.findall(".//section[tag= 'subtext']", namespaces=namespaces)
),
["{nsnone}section"],
)
self.assertEqual(
summarize_list(
elem.findall(".//section[tag = 'subtext']", namespaces=namespaces)
),
["{nsnone}section"],
)
self.assertEqual(
summarize_list(
elem.findall(".//section[ tag = 'subtext' ]", namespaces=namespaces)
),
["{nsnone}section"],
)
self.assertEqual(
summarize_list(elem.findall(".//tag[.='subtext']", namespaces=namespaces)),
["{nsnone}tag"],
)
self.assertEqual(
summarize_list(elem.findall(".//tag[. ='subtext']", namespaces=namespaces)),
["{nsnone}tag"],
)
self.assertEqual(
summarize_list(elem.findall('.//tag[.= "subtext"]', namespaces=namespaces)),
["{nsnone}tag"],
)
self.assertEqual(
summarize_list(elem.findall(".//tag[. = 'subtext']", namespaces=namespaces)),
["{nsnone}tag"],
)
self.assertEqual(
summarize_list(elem.findall(".//tag[. = 'subtext ']", namespaces=namespaces)),
[],
)
self.assertEqual(
summarize_list(elem.findall(".//tag[.= ' subtext']", namespaces=namespaces)), []
)

self.assertEqual(summarize_list(elem.findall("../tag", namespaces=namespaces)), [])
self.assertEqual(
summarize_list(elem.findall("section/../tag", namespaces=namespaces)),
["{nsnone}tag", "{nsnone}tag"],
)
self.assertEqual(
summarize_list(etree.ElementTree(elem).findall("./tag", namespaces=namespaces)),
["{nsnone}tag", "{nsnone}tag"],
)
self.assertEqual(
summarize_list(elem.findall(".//tag[1]", namespaces=namespaces)),
["{nsnone}tag", "{nsnone}tag"],
)
self.assertEqual(
summarize_list(
etree.ElementTree(elem).findall(".//tag[1]", namespaces=namespaces)
),
["{nsnone}tag", "{nsnone}tag"],
)

#class ElementTreeElementPathTestCase(EtreeElementPathTestCase):
# import xml.etree.ElementTree as etree
Expand Down