Skip to content

Commit

Permalink
Move XML Name pattern to epub3
Browse files Browse the repository at this point in the history
  • Loading branch information
AA-Turner committed Jan 3, 2023
1 parent 5eb79c1 commit f4ab9ad
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 35 deletions.
18 changes: 16 additions & 2 deletions sphinx/builders/epub3.py
Expand Up @@ -6,6 +6,7 @@
from __future__ import annotations

import html
import re
from os import path
from typing import Any, NamedTuple

Expand All @@ -14,7 +15,7 @@
from sphinx.builders import _epub_base
from sphinx.config import ENUM, Config
from sphinx.locale import __
from sphinx.util import logging, xmlname_checker
from sphinx.util import logging
from sphinx.util.fileutil import copy_asset_file
from sphinx.util.i18n import format_date
from sphinx.util.osutil import make_filename
Expand Down Expand Up @@ -50,6 +51,19 @@ class NavPoint(NamedTuple):
'xmlns:epub="http://www.idpf.org/2007/ops">'
)

# https://www.w3.org/TR/REC-xml/#NT-Name
_xml_name_start_char = (
':|[A-Z]|_|[a-z]|[\u00C0-\u00D6]'
'|[\u00D8-\u00F6]|[\u00F8-\u02FF]|[\u0370-\u037D]'
'|[\u037F-\u1FFF]|[\u200C-\u200D]|[\u2070-\u218F]'
'|[\u2C00-\u2FEF]|[\u3001-\uD7FF]|[\uF900-\uFDCF]'
'|[\uFDF0-\uFFFD]|[\U00010000-\U000EFFFF]'
)
_xml_name_char = (
_xml_name_start_char + r'\-|\.' '|[0-9]|\u00B7|[\u0300-\u036F]|[\u203F-\u2040]'
)
_XML_NAME_PATTERN = re.compile(f'({_xml_name_start_char})({_xml_name_char})*')


class Epub3Builder(_epub_base.EpubBuilder):
"""
Expand Down Expand Up @@ -187,7 +201,7 @@ def validate_config_values(app: Sphinx) -> None:
logger.warning(__('conf value "epub_language" (or "language") '
'should not be empty for EPUB3'))
# <package> unique-identifier attribute
if not xmlname_checker().match(app.config.epub_uid):
if not _XML_NAME_PATTERN.match(app.config.epub_uid):
logger.warning(__('conf value "epub_uid" should be XML NAME for EPUB3'))
# dc:title
if not app.config.epub_title:
Expand Down
31 changes: 6 additions & 25 deletions sphinx/util/__init__.py
Expand Up @@ -371,32 +371,11 @@ def isurl(url: str) -> bool:
return bool(url) and '://' in url


def xmlname_checker() -> re.Pattern:
# https://www.w3.org/TR/REC-xml/#NT-Name
name_start_chars = [
':', ['A', 'Z'], '_', ['a', 'z'], ['\u00C0', '\u00D6'],
['\u00D8', '\u00F6'], ['\u00F8', '\u02FF'], ['\u0370', '\u037D'],
['\u037F', '\u1FFF'], ['\u200C', '\u200D'], ['\u2070', '\u218F'],
['\u2C00', '\u2FEF'], ['\u3001', '\uD7FF'], ['\uF900', '\uFDCF'],
['\uFDF0', '\uFFFD'], ['\U00010000', '\U000EFFFF']]

name_chars = [
"\\-", "\\.", ['0', '9'], '\u00B7', ['\u0300', '\u036F'],
['\u203F', '\u2040']
]

def convert(entries: Any, splitter: str = '|') -> str:
results = []
for entry in entries:
if isinstance(entry, list):
results.append('[%s]' % convert(entry, '-'))
else:
results.append(entry)
return splitter.join(results)
def _xml_name_checker():
# to prevent import cycles
from sphinx.builders.epub3 import _XML_NAME_PATTERN

start_chars_regex = convert(name_start_chars)
name_chars_regex = convert(name_chars)
return re.compile(f'({start_chars_regex})({start_chars_regex}|{name_chars_regex})*')
return _XML_NAME_PATTERN


deprecated_alias('sphinx.util',
Expand All @@ -410,6 +389,7 @@ def convert(entries: Any, splitter: str = '|') -> str:
'rfc1123_to_epoch': _http_date.rfc1123_to_epoch,
'save_traceback': _exceptions.save_traceback,
'format_exception_cut_frames': _exceptions.format_exception_cut_frames,
'xmlname_checker': _xml_name_checker,
},
RemovedInSphinx70Warning,
{
Expand All @@ -422,4 +402,5 @@ def convert(entries: Any, splitter: str = '|') -> str:
'rfc1123_to_epoch': 'sphinx.http_date.rfc1123_to_epoch',
'save_traceback': 'sphinx.exceptions.save_traceback',
'format_exception_cut_frames': 'sphinx.exceptions.format_exception_cut_frames', # NoQA: E501
'xmlname_checker': 'sphinx.builders.epub3._XML_NAME_PATTERN',
})
8 changes: 8 additions & 0 deletions tests/test_build_epub.py
Expand Up @@ -7,6 +7,8 @@

import pytest

from sphinx.builders.epub3 import _XML_NAME_PATTERN


# check given command is runnable
def runnable(command):
Expand Down Expand Up @@ -382,3 +384,9 @@ def test_run_epubcheck(app):
print(exc.stdout.decode('utf-8'))
print(exc.stderr.decode('utf-8'))
raise AssertionError('epubcheck exited with return code %s' % exc.returncode)


def test_xml_name_pattern_check():
assert _XML_NAME_PATTERN.match('id-pub')
assert _XML_NAME_PATTERN.match('webpage')
assert not _XML_NAME_PATTERN.match('1bfda21')
9 changes: 1 addition & 8 deletions tests/test_util.py
Expand Up @@ -6,7 +6,7 @@
import pytest

from sphinx.errors import ExtensionError
from sphinx.util import encode_uri, ensuredir, import_object, parselinenos, xmlname_checker
from sphinx.util import encode_uri, ensuredir, import_object, parselinenos


def test_encode_uri():
Expand Down Expand Up @@ -75,10 +75,3 @@ def test_parselinenos():
parselinenos('-', 10)
with pytest.raises(ValueError):
parselinenos('3-1', 10)


def test_xmlname_check():
checker = xmlname_checker()
assert checker.match('id-pub')
assert checker.match('webpage')
assert not checker.match('1bfda21')

0 comments on commit f4ab9ad

Please sign in to comment.