Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cache Publisher objects to speed up Sphinx #10337

Merged
merged 6 commits into from May 7, 2022
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 3 additions & 1 deletion sphinx/builders/__init__.py
Expand Up @@ -464,8 +464,10 @@ def read_doc(self, docname: str) -> None:
if path.isfile(docutilsconf):
self.env.note_dependency(docutilsconf)

filename = self.env.doc2path(docname)
publisher = self.app.registry.create_publisher(self.app, filename)
with sphinx_domains(self.env), rst.default_role(docname, self.config.default_role):
doctree = read_doc(self.app, self.env, self.env.doc2path(docname))
doctree = read_doc(publisher, docname, filename)

# store time of reading, for outdated files detection
# (Some filesystems have coarse timestamp resolution;
Expand Down
27 changes: 19 additions & 8 deletions sphinx/builders/html/__init__.py
Expand Up @@ -10,8 +10,9 @@
from typing import IO, Any, Dict, Iterable, Iterator, List, Optional, Set, Tuple, Type
from urllib.parse import quote

import docutils.readers.doctree
from docutils import nodes
from docutils.core import publish_parts
from docutils.core import Publisher
from docutils.frontend import OptionParser
from docutils.io import DocTreeInput, StringOutput
from docutils.nodes import Node
Expand Down Expand Up @@ -211,6 +212,19 @@ def __init__(self, app: Sphinx) -> None:
# JS files
self.script_files: List[JavaScript] = []

# Cached Publisher for writing doctrees to HTML
reader = docutils.readers.doctree.Reader(parser_name='restructuredtext')
pub = Publisher(
reader=reader,
parser=reader.parser,
writer=HTMLWriter(self),
source_class=DocTreeInput,
destination=StringOutput(encoding='unicode'),
)
op = pub.setup_option_parser(output_encoding='unicode', traceback=True)
pub.settings = op.get_default_values()
self._publisher = pub

def init(self) -> None:
self.build_info = self.create_build_info()
# basename of images directory
Expand Down Expand Up @@ -421,15 +435,12 @@ def render_partial(self, node: Node) -> Dict[str, str]:
"""Utility: Render a lone doctree node."""
if node is None:
return {'fragment': ''}

doc = new_document('<partial node>')
doc.append(node)

writer = HTMLWriter(self)
return publish_parts(reader_name='doctree',
writer=writer,
source_class=DocTreeInput,
settings_overrides={'output_encoding': 'unicode'},
source=doc)
self._publisher.set_source(doc)
self._publisher.publish()
return self._publisher.writer.parts

def prepare_writing(self, docnames: Set[str]) -> None:
# create the search indexer
Expand Down
37 changes: 11 additions & 26 deletions sphinx/io.py
Expand Up @@ -5,9 +5,8 @@
from docutils import nodes
from docutils.core import Publisher
from docutils.frontend import Values
from docutils.io import FileInput, Input, NullOutput
from docutils.io import FileInput, Input
from docutils.parsers import Parser
from docutils.parsers.rst import Parser as RSTParser
from docutils.readers import standalone
from docutils.transforms import Transform
from docutils.transforms.references import DanglingReferences
Expand All @@ -20,7 +19,7 @@
from sphinx.transforms.i18n import (Locale, PreserveTranslatableMessages,
RemoveTranslatableInline)
from sphinx.transforms.references import SphinxDomains
from sphinx.util import UnicodeDecodeErrorHandler, get_filetype, logging
from sphinx.util import UnicodeDecodeErrorHandler, logging
from sphinx.util.docutils import LoggingReporter
from sphinx.versioning import UIDTransform

Expand Down Expand Up @@ -153,30 +152,16 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
super().__init__(*args, **kwargs)


def read_doc(app: "Sphinx", env: BuildEnvironment, filename: str) -> nodes.document:
tk0miya marked this conversation as resolved.
Show resolved Hide resolved
def read_doc(publisher: Publisher, docname: str, filename: str) -> nodes.document:
AA-Turner marked this conversation as resolved.
Show resolved Hide resolved
"""Parse a document and convert to doctree."""
# set up error_handler for the target document
error_handler = UnicodeDecodeErrorHandler(env.docname)
error_handler = UnicodeDecodeErrorHandler(docname)
codecs.register_error('sphinx', error_handler) # type: ignore

reader = SphinxStandaloneReader()
reader.setup(app)
filetype = get_filetype(app.config.source_suffix, filename)
parser = app.registry.create_source_parser(app, filetype)
if parser.__class__.__name__ == 'CommonMarkParser' and parser.settings_spec == ():
# a workaround for recommonmark
# If recommonmark.AutoStrictify is enabled, the parser invokes reST parser
# internally. But recommonmark-0.4.0 does not provide settings_spec for reST
# parser. As a workaround, this copies settings_spec for RSTParser to the
# CommonMarkParser.
parser.settings_spec = RSTParser.settings_spec

pub = Publisher(reader=reader,
parser=parser,
writer=SphinxDummyWriter(),
source_class=SphinxFileInput,
destination=NullOutput())
pub.process_programmatic_settings(None, env.settings, None)
pub.set_source(source_path=filename)
pub.publish()
return pub.document
publisher.set_source(source_path=filename)
publisher.publish()

doctree = publisher.document
# settings get modified in ``write_doctree``; get a local copy
AA-Turner marked this conversation as resolved.
Show resolved Hide resolved
doctree.settings = doctree.settings.copy()
return doctree
44 changes: 42 additions & 2 deletions sphinx/registry.py
Expand Up @@ -8,7 +8,8 @@
Union)

from docutils import nodes
from docutils.io import Input
from docutils.core import Publisher
from docutils.io import Input, NullOutput
from docutils.nodes import Element, Node, TextElement
from docutils.parsers import Parser
from docutils.parsers.rst import Directive
Expand All @@ -27,10 +28,11 @@
from sphinx.environment import BuildEnvironment
from sphinx.errors import ExtensionError, SphinxError, VersionRequirementError
from sphinx.extension import Extension
from sphinx.io import SphinxDummyWriter, SphinxFileInput, SphinxStandaloneReader
from sphinx.locale import __
from sphinx.parsers import Parser as SphinxParser
from sphinx.roles import XRefRole
from sphinx.util import logging
from sphinx.util import get_filetype, logging
from sphinx.util.logging import prefixed_warnings
from sphinx.util.typing import RoleFunction, TitleGetter

Expand Down Expand Up @@ -125,6 +127,9 @@ def __init__(self) -> None:
#: additional transforms; list of transforms
self.transforms: List[Type[Transform]] = []

# private cache of Docutils Publishers (file type -> publisher object)
AA-Turner marked this conversation as resolved.
Show resolved Hide resolved
self._publishers: Dict[str, Publisher] = {}

def add_builder(self, builder: Type[Builder], override: bool = False) -> None:
logger.debug('[app] adding builder: %r', builder)
if not hasattr(builder, 'name'):
Expand Down Expand Up @@ -461,6 +466,41 @@ def get_envversion(self, app: "Sphinx") -> Dict[str, str]:
envversion['sphinx'] = ENV_VERSION
return envversion

def create_publisher(self, app: "Sphinx", filename: str) -> Publisher:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I considers registry is a mere storage of components. So it should not have "features" as possible. So I'd like to move this sphinx.io back again.

Copy link
Member Author

@AA-Turner AA-Turner May 2, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How's the current?

A

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for your update. I prefer the idea of create_publisher(). +1 for this.

filetype = get_filetype(app.config.source_suffix, filename)
try:
return self._publishers[filetype]
except KeyError:
pass

reader = SphinxStandaloneReader()
reader.setup(app)

parser = app.registry.create_source_parser(app, filetype)
if parser.__class__.__name__ == 'CommonMarkParser' and parser.settings_spec == ():
# a workaround for recommonmark
# If recommonmark.AutoStrictify is enabled, the parser invokes reST parser
# internally. But recommonmark-0.4.0 does not provide settings_spec for reST
# parser. As a workaround, this copies settings_spec for RSTParser to the
# CommonMarkParser.
from docutils.parsers.rst import Parser as RSTParser

parser.settings_spec = RSTParser.settings_spec

pub = Publisher(
reader=reader,
parser=parser,
writer=SphinxDummyWriter(),
source_class=SphinxFileInput,
destination=NullOutput()
)
# Propagate exceptions by default when used programmatically:
defaults = {"traceback": True, **app.env.settings}
# Set default settings
pub.settings = pub.setup_option_parser(**defaults).get_default_values() # type: ignore
self._publishers[filetype] = pub
return pub


def merge_source_suffix(app: "Sphinx", config: Config) -> None:
"""Merge any user-specified source_suffix with any added by extensions."""
Expand Down