Skip to content

Commit

Permalink
Merge pull request #10337 from AA-Turner/reuse-publisher
Browse files Browse the repository at this point in the history
Cache `Publisher` objects to speed up Sphinx
  • Loading branch information
tk0miya committed May 7, 2022
2 parents 19c347e + f9dce57 commit 431caac
Show file tree
Hide file tree
Showing 6 changed files with 90 additions and 11 deletions.
1 change: 1 addition & 0 deletions CHANGES
Expand Up @@ -54,6 +54,7 @@ Deprecated
* The ``language`` argument of ``sphinx.util.i18n:format_date()`` becomes
required
* ``sphinx.builders.html.html5_ready``
* ``sphinx.io.read_doc()``
* ``sphinx.util.docutils.__version_info__``
* ``sphinx.util.docutils.is_html5_writer_available()``
* ``sphinx.writers.latex.LaTeXWriter.docclasses``
Expand Down
5 changes: 5 additions & 0 deletions doc/extdev/deprecated.rst
Expand Up @@ -47,6 +47,11 @@ The following is a list of deprecated interfaces.
- 7.0
- N/A

* - ``sphinx.io.read_doc()``
- 5.0
- 7.0
- ``sphinx.builders.Builder.read_doc()``

* - ``sphinx.util.docutils.__version_info__``
- 5.0
- 7.0
Expand Down
20 changes: 17 additions & 3 deletions sphinx/builders/__init__.py
@@ -1,5 +1,6 @@
"""Builder superclass for all builders."""

import codecs
import pickle
import time
from os import path
Expand All @@ -14,9 +15,9 @@
from sphinx.environment.adapters.asset import ImageAdapter
from sphinx.errors import SphinxError
from sphinx.events import EventManager
from sphinx.io import read_doc
from sphinx.locale import __
from sphinx.util import import_object, logging, progress_message, rst, status_iterator
from sphinx.util import (UnicodeDecodeErrorHandler, get_filetype, import_object, logging,
progress_message, rst, status_iterator)
from sphinx.util.build_phase import BuildPhase
from sphinx.util.console import bold # type: ignore
from sphinx.util.docutils import sphinx_domains
Expand Down Expand Up @@ -464,8 +465,21 @@ def read_doc(self, docname: str) -> None:
if path.isfile(docutilsconf):
self.env.note_dependency(docutilsconf)

filename = self.env.doc2path(docname)
filetype = get_filetype(self.app.config.source_suffix, filename)
publisher = self.app.registry.get_publisher(self.app, filetype)
with sphinx_domains(self.env), rst.default_role(docname, self.config.default_role):
doctree = read_doc(self.app, self.env, self.env.doc2path(docname))
# set up error_handler for the target document
codecs.register_error('sphinx', UnicodeDecodeErrorHandler(docname)) # type: ignore

publisher.set_source(source_path=filename)
publisher.publish()
doctree = publisher.document

# The settings object is reused by the Publisher for each document.
# Becuase we modify the settings object in ``write_doctree``, we
# need to ensure that each doctree has an independent copy.
doctree.settings = doctree.settings.copy()

# store time of reading, for outdated files detection
# (Some filesystems have coarse timestamp resolution;
Expand Down
27 changes: 19 additions & 8 deletions sphinx/builders/html/__init__.py
Expand Up @@ -11,8 +11,9 @@
from typing import IO, Any, Dict, Iterable, Iterator, List, Optional, Set, Tuple, Type
from urllib.parse import quote

import docutils.readers.doctree
from docutils import nodes
from docutils.core import publish_parts
from docutils.core import Publisher
from docutils.frontend import OptionParser
from docutils.io import DocTreeInput, StringOutput
from docutils.nodes import Node
Expand Down Expand Up @@ -207,6 +208,19 @@ def __init__(self, app: Sphinx) -> None:
# JS files
self.script_files: List[JavaScript] = []

# Cached Publisher for writing doctrees to HTML
reader = docutils.readers.doctree.Reader(parser_name='restructuredtext')
pub = Publisher(
reader=reader,
parser=reader.parser,
writer=HTMLWriter(self),
source_class=DocTreeInput,
destination=StringOutput(encoding='unicode'),
)
op = pub.setup_option_parser(output_encoding='unicode', traceback=True)
pub.settings = op.get_default_values()
self._publisher = pub

def init(self) -> None:
self.build_info = self.create_build_info()
# basename of images directory
Expand Down Expand Up @@ -417,15 +431,12 @@ def render_partial(self, node: Node) -> Dict[str, str]:
"""Utility: Render a lone doctree node."""
if node is None:
return {'fragment': ''}

doc = new_document('<partial node>')
doc.append(node)

writer = HTMLWriter(self)
return publish_parts(reader_name='doctree',
writer=writer,
source_class=DocTreeInput,
settings_overrides={'output_encoding': 'unicode'},
source=doc)
self._publisher.set_source(doc)
self._publisher.publish()
return self._publisher.writer.parts

def prepare_writing(self, docnames: Set[str]) -> None:
# create the search indexer
Expand Down
34 changes: 34 additions & 0 deletions sphinx/io.py
@@ -1,5 +1,6 @@
"""Input/Output files"""
import codecs
import warnings
from typing import TYPE_CHECKING, Any, List, Type

from docutils import nodes
Expand All @@ -14,6 +15,7 @@
from docutils.writers import UnfilteredWriter

from sphinx import addnodes
from sphinx.deprecation import RemovedInSphinx70Warning
from sphinx.environment import BuildEnvironment
from sphinx.transforms import (AutoIndexUpgrader, DoctreeReadEvent, FigureAligner,
SphinxTransformer)
Expand Down Expand Up @@ -155,6 +157,9 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:

def read_doc(app: "Sphinx", env: BuildEnvironment, filename: str) -> nodes.document:
"""Parse a document and convert to doctree."""
warnings.warn('sphinx.io.read_doc() is deprecated.',
RemovedInSphinx70Warning, stacklevel=2)

# set up error_handler for the target document
error_handler = UnicodeDecodeErrorHandler(env.docname)
codecs.register_error('sphinx', error_handler) # type: ignore
Expand All @@ -180,3 +185,32 @@ def read_doc(app: "Sphinx", env: BuildEnvironment, filename: str) -> nodes.docum
pub.set_source(source_path=filename)
pub.publish()
return pub.document


def create_publisher(app: "Sphinx", filetype: str) -> Publisher:
reader = SphinxStandaloneReader()
reader.setup(app)

parser = app.registry.create_source_parser(app, filetype)
if parser.__class__.__name__ == 'CommonMarkParser' and parser.settings_spec == ():
# a workaround for recommonmark
# If recommonmark.AutoStrictify is enabled, the parser invokes reST parser
# internally. But recommonmark-0.4.0 does not provide settings_spec for reST
# parser. As a workaround, this copies settings_spec for RSTParser to the
# CommonMarkParser.
from docutils.parsers.rst import Parser as RSTParser

parser.settings_spec = RSTParser.settings_spec

pub = Publisher(
reader=reader,
parser=parser,
writer=SphinxDummyWriter(),
source_class=SphinxFileInput,
destination=NullOutput()
)
# Propagate exceptions by default when used programmatically:
defaults = {"traceback": True, **app.env.settings}
# Set default settings
pub.settings = pub.setup_option_parser(**defaults).get_default_values() # type: ignore
return pub
14 changes: 14 additions & 0 deletions sphinx/registry.py
Expand Up @@ -8,6 +8,7 @@
Union)

from docutils import nodes
from docutils.core import Publisher
from docutils.io import Input
from docutils.nodes import Element, Node, TextElement
from docutils.parsers import Parser
Expand All @@ -27,6 +28,7 @@
from sphinx.environment import BuildEnvironment
from sphinx.errors import ExtensionError, SphinxError, VersionRequirementError
from sphinx.extension import Extension
from sphinx.io import create_publisher
from sphinx.locale import __
from sphinx.parsers import Parser as SphinxParser
from sphinx.roles import XRefRole
Expand Down Expand Up @@ -125,6 +127,9 @@ def __init__(self) -> None:
#: additional transforms; list of transforms
self.transforms: List[Type[Transform]] = []

# private cache of Docutils Publishers (file type -> publisher object)
self.publishers: Dict[str, Publisher] = {}

def add_builder(self, builder: Type[Builder], override: bool = False) -> None:
logger.debug('[app] adding builder: %r', builder)
if not hasattr(builder, 'name'):
Expand Down Expand Up @@ -461,6 +466,15 @@ def get_envversion(self, app: "Sphinx") -> Dict[str, str]:
envversion['sphinx'] = ENV_VERSION
return envversion

def get_publisher(self, app: "Sphinx", filetype: str) -> Publisher:
try:
return self.publishers[filetype]
except KeyError:
pass
publisher = create_publisher(app, filetype)
self.publishers[filetype] = publisher
return publisher


def merge_source_suffix(app: "Sphinx", config: Config) -> None:
"""Merge any user-specified source_suffix with any added by extensions."""
Expand Down

0 comments on commit 431caac

Please sign in to comment.