Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cache Publisher objects to speed up Sphinx #10337

Merged
merged 6 commits into from May 7, 2022
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 6 additions & 2 deletions sphinx/builders/__init__.py
Expand Up @@ -16,7 +16,8 @@
from sphinx.events import EventManager
from sphinx.io import read_doc
from sphinx.locale import __
from sphinx.util import import_object, logging, progress_message, rst, status_iterator
from sphinx.util import (get_filetype, import_object, logging, progress_message, rst,
status_iterator)
from sphinx.util.build_phase import BuildPhase
from sphinx.util.console import bold # type: ignore
from sphinx.util.docutils import sphinx_domains
Expand Down Expand Up @@ -464,8 +465,11 @@ def read_doc(self, docname: str) -> None:
if path.isfile(docutilsconf):
self.env.note_dependency(docutilsconf)

filename = self.env.doc2path(docname)
filetype = get_filetype(self.app.config.source_suffix, filename)
publisher = self.app.registry.get_publisher(self.app, filetype)
with sphinx_domains(self.env), rst.default_role(docname, self.config.default_role):
doctree = read_doc(self.app, self.env, self.env.doc2path(docname))
doctree = read_doc(publisher, docname, filename)

# store time of reading, for outdated files detection
# (Some filesystems have coarse timestamp resolution;
Expand Down
27 changes: 19 additions & 8 deletions sphinx/builders/html/__init__.py
Expand Up @@ -10,8 +10,9 @@
from typing import IO, Any, Dict, Iterable, Iterator, List, Optional, Set, Tuple, Type
from urllib.parse import quote

import docutils.readers.doctree
from docutils import nodes
from docutils.core import publish_parts
from docutils.core import Publisher
from docutils.frontend import OptionParser
from docutils.io import DocTreeInput, StringOutput
from docutils.nodes import Node
Expand Down Expand Up @@ -211,6 +212,19 @@ def __init__(self, app: Sphinx) -> None:
# JS files
self.script_files: List[JavaScript] = []

# Cached Publisher for writing doctrees to HTML
reader = docutils.readers.doctree.Reader(parser_name='restructuredtext')
pub = Publisher(
reader=reader,
parser=reader.parser,
writer=HTMLWriter(self),
source_class=DocTreeInput,
destination=StringOutput(encoding='unicode'),
)
op = pub.setup_option_parser(output_encoding='unicode', traceback=True)
pub.settings = op.get_default_values()
self._publisher = pub

def init(self) -> None:
self.build_info = self.create_build_info()
# basename of images directory
Expand Down Expand Up @@ -421,15 +435,12 @@ def render_partial(self, node: Node) -> Dict[str, str]:
"""Utility: Render a lone doctree node."""
if node is None:
return {'fragment': ''}

doc = new_document('<partial node>')
doc.append(node)

writer = HTMLWriter(self)
return publish_parts(reader_name='doctree',
writer=writer,
source_class=DocTreeInput,
settings_overrides={'output_encoding': 'unicode'},
source=doc)
self._publisher.set_source(doc)
self._publisher.publish()
return self._publisher.writer.parts

def prepare_writing(self, docnames: Set[str]) -> None:
# create the search indexer
Expand Down
42 changes: 28 additions & 14 deletions sphinx/io.py
Expand Up @@ -7,7 +7,6 @@
from docutils.frontend import Values
from docutils.io import FileInput, Input, NullOutput
from docutils.parsers import Parser
from docutils.parsers.rst import Parser as RSTParser
from docutils.readers import standalone
from docutils.transforms import Transform
from docutils.transforms.references import DanglingReferences
Expand All @@ -20,7 +19,7 @@
from sphinx.transforms.i18n import (Locale, PreserveTranslatableMessages,
RemoveTranslatableInline)
from sphinx.transforms.references import SphinxDomains
from sphinx.util import UnicodeDecodeErrorHandler, get_filetype, logging
from sphinx.util import UnicodeDecodeErrorHandler, logging
from sphinx.util.docutils import LoggingReporter
from sphinx.versioning import UIDTransform

Expand Down Expand Up @@ -153,30 +152,45 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
super().__init__(*args, **kwargs)


def read_doc(app: "Sphinx", env: BuildEnvironment, filename: str) -> nodes.document:
tk0miya marked this conversation as resolved.
Show resolved Hide resolved
def read_doc(publisher: Publisher, docname: str, filename: str) -> nodes.document:
AA-Turner marked this conversation as resolved.
Show resolved Hide resolved
"""Parse a document and convert to doctree."""
# set up error_handler for the target document
error_handler = UnicodeDecodeErrorHandler(env.docname)
error_handler = UnicodeDecodeErrorHandler(docname)
codecs.register_error('sphinx', error_handler) # type: ignore

publisher.set_source(source_path=filename)
publisher.publish()

doctree = publisher.document
# settings get modified in ``write_doctree``; get a local copy
AA-Turner marked this conversation as resolved.
Show resolved Hide resolved
doctree.settings = doctree.settings.copy()
return doctree


def create_publisher(app: "Sphinx", filetype: str) -> Publisher:
reader = SphinxStandaloneReader()
reader.setup(app)
filetype = get_filetype(app.config.source_suffix, filename)

parser = app.registry.create_source_parser(app, filetype)
if parser.__class__.__name__ == 'CommonMarkParser' and parser.settings_spec == ():
# a workaround for recommonmark
# If recommonmark.AutoStrictify is enabled, the parser invokes reST parser
# internally. But recommonmark-0.4.0 does not provide settings_spec for reST
# parser. As a workaround, this copies settings_spec for RSTParser to the
# CommonMarkParser.
from docutils.parsers.rst import Parser as RSTParser

parser.settings_spec = RSTParser.settings_spec

pub = Publisher(reader=reader,
parser=parser,
writer=SphinxDummyWriter(),
source_class=SphinxFileInput,
destination=NullOutput())
pub.process_programmatic_settings(None, env.settings, None)
pub.set_source(source_path=filename)
pub.publish()
return pub.document
pub = Publisher(
reader=reader,
parser=parser,
writer=SphinxDummyWriter(),
source_class=SphinxFileInput,
destination=NullOutput()
)
# Propagate exceptions by default when used programmatically:
defaults = {"traceback": True, **app.env.settings}
# Set default settings
pub.settings = pub.setup_option_parser(**defaults).get_default_values() # type: ignore
return pub
14 changes: 14 additions & 0 deletions sphinx/registry.py
Expand Up @@ -8,6 +8,7 @@
Union)

from docutils import nodes
from docutils.core import Publisher
from docutils.io import Input
from docutils.nodes import Element, Node, TextElement
from docutils.parsers import Parser
Expand All @@ -27,6 +28,7 @@
from sphinx.environment import BuildEnvironment
from sphinx.errors import ExtensionError, SphinxError, VersionRequirementError
from sphinx.extension import Extension
from sphinx.io import create_publisher
from sphinx.locale import __
from sphinx.parsers import Parser as SphinxParser
from sphinx.roles import XRefRole
Expand Down Expand Up @@ -125,6 +127,9 @@ def __init__(self) -> None:
#: additional transforms; list of transforms
self.transforms: List[Type[Transform]] = []

# private cache of Docutils Publishers (file type -> publisher object)
AA-Turner marked this conversation as resolved.
Show resolved Hide resolved
self.publishers: Dict[str, Publisher] = {}

def add_builder(self, builder: Type[Builder], override: bool = False) -> None:
logger.debug('[app] adding builder: %r', builder)
if not hasattr(builder, 'name'):
Expand Down Expand Up @@ -461,6 +466,15 @@ def get_envversion(self, app: "Sphinx") -> Dict[str, str]:
envversion['sphinx'] = ENV_VERSION
return envversion

def get_publisher(self, app: "Sphinx", filetype: str) -> Publisher:
try:
return self.publishers[filetype]
except KeyError:
pass
publisher = create_publisher(app, filetype)
self.publishers[filetype] = publisher
return publisher


def merge_source_suffix(app: "Sphinx", config: Config) -> None:
"""Merge any user-specified source_suffix with any added by extensions."""
Expand Down