Source code for sphinx.transforms

"""Docutils transforms used by Sphinx when reading documents."""

from __future__ import annotations

import re
import unicodedata
from typing import TYPE_CHECKING, cast

from docutils import nodes
from docutils.transforms import Transform, Transformer
from docutils.transforms.parts import ContentsFilter
from docutils.transforms.references import Footnotes
from docutils.transforms.universal import SmartQuotes
from docutils.utils import normalize_language_tag
from docutils.utils.smartquotes import smartchars

from sphinx import addnodes
from sphinx.deprecation import _deprecation_warning
from sphinx.locale import _, __
from sphinx.util import logging
from sphinx.util.i18n import format_date
from sphinx.util.nodes import apply_source_workaround, is_smartquotable

if TYPE_CHECKING:
    from collections.abc import Iterable, Iterator
    from typing import Any, ClassVar, Literal

    from docutils.nodes import Node
    from typing_extensions import TypeIs

    from sphinx.application import Sphinx
    from sphinx.config import Config
    from sphinx.domains.std import StandardDomain
    from sphinx.environment import BuildEnvironment
    from sphinx.util.typing import ExtensionMetadata

    type _DEFAULT_SUBSTITUTION_NAMES = Literal[
        'version',
        'release',
        'today',
        'translation progress',
    ]


logger = logging.getLogger(__name__)

_DEFAULT_SUBSTITUTIONS = frozenset({
    'version',
    'release',
    'today',
    'translation progress',
})



[docs]
class SphinxTransform(Transform):
    """A base class of Transforms.

    Compared with ``docutils.transforms.Transform``, this class improves accessibility to
    Sphinx APIs.
    """

    @property
    def app(self) -> Sphinx:
        """Reference to the :class:`.Sphinx` object."""
        cls_module = self.__class__.__module__
        cls_name = self.__class__.__qualname__
        _deprecation_warning(cls_module, f'{cls_name}.app', remove=(11, 0))
        return self.env._app

    @property
    def env(self) -> BuildEnvironment:
        """Reference to the :class:`.BuildEnvironment` object."""
        return self.document.settings.env

    @property
    def config(self) -> Config:
        """Reference to the :class:`.Config` object."""
        return self.env.config



class SphinxTransformer(Transformer):
    """A transformer for Sphinx."""

    document: nodes.document
    env: BuildEnvironment | None = None

    def set_environment(self, env: BuildEnvironment) -> None:
        self.env = env

    def apply_transforms(self) -> None:
        if isinstance(self.document, nodes.document):
            if not hasattr(self.document.settings, 'env') and self.env:
                self.document.settings.env = self.env

            super().apply_transforms()
        else:
            # wrap the target node by document node during transforming
            try:
                from sphinx.util.docutils import new_document

                document = new_document('')
                if self.env:
                    document.settings.env = self.env
                document += self.document
                self.document = document
                super().apply_transforms()
            finally:
                self.document = self.document[0]


class DefaultSubstitutions(SphinxTransform):
    """Replace some substitutions if they aren't defined in the document."""

    # run before the default Substitutions
    default_priority = 210

    def apply(self, **kwargs: Any) -> None:
        # only handle those not otherwise defined in the document
        to_handle = _DEFAULT_SUBSTITUTIONS - set(self.document.substitution_defs)
        for ref in self.document.findall(nodes.substitution_reference):
            if (name := ref['refname']) in to_handle:
                ref.replace_self(self._handle_default_substitution(name))

    def _handle_default_substitution(
        self, name: _DEFAULT_SUBSTITUTION_NAMES
    ) -> nodes.Text:
        if name == 'translation progress':
            # special handling: calculate translation progress
            return nodes.Text(_calculate_translation_progress(self.document))
        if name == 'today':
            if text := self.config.today:
                return nodes.Text(text)
            # special handling: can also specify a strftime format
            today_fmt = self.config.today_fmt or _('%b %d, %Y')
            return nodes.Text(format_date(today_fmt, language=self.config.language))
        # config.version and config.release
        return nodes.Text(getattr(self.config, name))


def _calculate_translation_progress(document: nodes.document) -> str:
    try:
        translation_progress = document['translation_progress']
    except KeyError:
        return _('could not calculate translation progress!')

    total = translation_progress['total']
    translated = translation_progress['translated']
    if total <= 0:
        return _('no translated elements!')
    return f'{translated / total:.2%}'


class MoveModuleTargets(SphinxTransform):
    """Move module targets that are the first thing in a section to the section
    title.

    XXX Python specific
    """

    default_priority = 210

    def apply(self, **kwargs: Any) -> None:
        for node in list(self.document.findall(nodes.target)):
            if not node['ids']:
                continue
            if (
                'ismod' in node
                and type(node.parent) is nodes.section
                # index 0: section title node
                # index 1: index node
                # index 2: target node
                and node.parent.index(node) == 2
            ):
                node.parent['ids'][0:0] = node['ids']
                node.parent.remove(node)


class HandleCodeBlocks(SphinxTransform):
    """Several code block related transformations."""

    default_priority = 210

    def apply(self, **kwargs: Any) -> None:
        # move doctest blocks out of blockquotes
        for node in self.document.findall(nodes.block_quote):
            if all(isinstance(child, nodes.doctest_block) for child in node.children):
                node.replace_self(node.children)
        # combine successive doctest blocks
        # for node in self.document.findall(nodes.doctest_block):
        #    if node not in node.parent.children:
        #        continue
        #    parindex = node.parent.index(node)
        #    while len(node.parent) > parindex+1 and \
        #            isinstance(node.parent[parindex+1], nodes.doctest_block):
        #        node[0] = nodes.Text(node[0] + '\n\n' +
        #                             node.parent[parindex+1][0])
        #        del node.parent[parindex+1]


class AutoNumbering(SphinxTransform):
    """Register IDs of tables, figures and literal_blocks to assign numbers."""

    default_priority = 210

    def apply(self, **kwargs: Any) -> None:
        domain: StandardDomain = self.env.domains.standard_domain

        for node in self.document.findall(nodes.Element):
            if (
                domain.is_enumerable_node(node)
                and domain.get_numfig_title(node) is not None
                and node['ids'] == []
            ):
                self.document.note_implicit_target(node)


class SortIds(SphinxTransform):
    """Sort section IDs so that the "id[0-9]+" one comes last."""

    default_priority = 261

    def apply(self, **kwargs: Any) -> None:
        for node in self.document.findall(nodes.section):
            if len(node['ids']) > 1 and node['ids'][0].startswith('id'):
                node['ids'] = [*node['ids'][1:], node['ids'][0]]


TRANSLATABLE_NODES = {
    'literal-block': nodes.literal_block,
    'doctest-block': nodes.doctest_block,
    'raw': nodes.raw,
    'index': addnodes.index,
    'image': nodes.image,
}


class ApplySourceWorkaround(SphinxTransform):
    """Update source and rawsource attributes"""

    default_priority = 10

    def apply(self, **kwargs: Any) -> None:
        for node in self.document.findall():
            if isinstance(node, (nodes.TextElement, nodes.image, nodes.topic)):
                apply_source_workaround(node)


class AutoIndexUpgrader(SphinxTransform):
    """Detect old style (4 column based indices) and automatically upgrade to new style."""

    default_priority = 210

    def apply(self, **kwargs: Any) -> None:
        for node in self.document.findall(addnodes.index):
            if 'entries' in node and any(len(entry) == 4 for entry in node['entries']):
                msg = (
                    __(
                        '4 column based index found. '
                        'It might be a bug of extensions you use: %r'
                    )
                    % node['entries']
                )
                logger.warning(msg, location=node)
                for i, entry in enumerate(node['entries']):
                    if len(entry) == 4:
                        node['entries'][i] = (*entry, None)


class ExtraTranslatableNodes(SphinxTransform):
    """Make nodes translatable"""

    default_priority = 10

    def apply(self, **kwargs: Any) -> None:
        targets = frozenset(self.config.gettext_additional_targets)
        target_nodes = tuple(v for k, v in TRANSLATABLE_NODES.items() if k in targets)
        if not target_nodes:
            return

        def is_translatable_node(node: Node) -> TypeIs[nodes.Element]:
            return isinstance(node, target_nodes)

        for node in self.document.findall(is_translatable_node):
            assert isinstance(node, nodes.Element)
            node['translatable'] = True


class UnreferencedFootnotesDetector(SphinxTransform):
    """Detect unreferenced footnotes and emit warnings"""

    default_priority = Footnotes.default_priority + 2

    def apply(self, **kwargs: Any) -> None:
        for node in self.document.footnotes:
            # note we do not warn on duplicate footnotes here
            # (i.e. where the name has been moved to dupnames)
            # since this is already reported by docutils
            if not node['backrefs'] and node['names']:
                logger.warning(
                    __('Footnote [%s] is not referenced.'),
                    node['names'][0] if node['names'] else node['dupnames'][0],
                    type='ref',
                    subtype='footnote',
                    location=node,
                )
        for node in self.document.symbol_footnotes:
            if not node['backrefs']:
                logger.warning(
                    __('Footnote [*] is not referenced.'),
                    type='ref',
                    subtype='footnote',
                    location=node,
                )
        for node in self.document.autofootnotes:
            # note we do not warn on duplicate footnotes here
            # (i.e. where the name has been moved to dupnames)
            # since this is already reported by docutils
            if not node['backrefs'] and node['names']:
                logger.warning(
                    __('Footnote [#] is not referenced.'),
                    type='ref',
                    subtype='footnote',
                    location=node,
                )


class DoctestTransform(SphinxTransform):
    """Set "doctest" style to each doctest_block node"""

    default_priority = 500

    def apply(self, **kwargs: Any) -> None:
        for node in self.document.findall(nodes.doctest_block):
            node['classes'].append('doctest')


class FilterSystemMessages(SphinxTransform):
    """Filter system messages from a doctree."""

    default_priority = 999

    def apply(self, **kwargs: Any) -> None:
        filterlevel = 2 if self.config.keep_warnings else 5
        for node in list(self.document.findall(nodes.system_message)):
            if node['level'] < filterlevel:
                logger.debug('%s [filtered system message]', node.astext())
                node.parent.remove(node)


class SphinxContentsFilter(ContentsFilter):
    """Used with BuildEnvironment.add_toc_from() to discard cross-file links
    within table-of-contents link nodes.
    """

    visit_pending_xref = ContentsFilter.ignore_node_but_process_children

    def visit_image(self, node: nodes.image) -> None:
        raise nodes.SkipNode


class SphinxSmartQuotes(SmartQuotes, SphinxTransform):
    """Customized SmartQuotes to avoid transform for some extra node types.

    refs: sphinx.parsers.RSTParser
    """

    default_priority = 750
    smartquotes_action: ClassVar[str] = SmartQuotes.smartquotes_action

    def apply(self, **kwargs: Any) -> None:
        if not self.is_available():
            return

        # override default settings with :confval:`smartquotes_action`
        # TODO: TYPING: Upstream docutils should be updated so that
        #       smartquotes_action accepts any iterable of characters
        #       and can be overridden per-instance.
        self.smartquotes_action = self.config.smartquotes_action  # type: ignore[misc]

        super().apply()

    def is_available(self) -> bool:
        builders = self.config.smartquotes_excludes.get('builders', [])
        languages = self.config.smartquotes_excludes.get('languages', [])

        if self.document.settings.smart_quotes is False:
            # disabled by 3rd party extension (workaround)
            return False
        if self.config.smartquotes is False:
            # disabled by confval smartquotes
            return False
        if self.env._builder_cls.name in builders:
            # disabled by confval smartquotes_excludes['builders']
            return False
        if self.config.language in languages:
            # disabled by confval smartquotes_excludes['languages']
            return False

        # confirm selected language supports smart_quotes or not
        language = self.env.settings['language_code']
        return any(tag in smartchars.quotes for tag in normalize_language_tag(language))

    def get_tokens(  # type: ignore[override]
        self, txtnodes: Iterable[Node]
    ) -> Iterator[tuple[Literal['literal', 'plain'], str]]:
        # A generator that yields ``(texttype, nodetext)`` tuples for a list
        # of "Text" nodes (interface to ``smartquotes.educate_tokens()``).
        for txtnode in txtnodes:
            if is_smartquotable(txtnode):
                # SmartQuotes uses backslash escapes instead of null-escapes
                text = re.sub(r'(?<=\x00)([-\\\'".`])', r'\\\1', str(txtnode))
                yield 'plain', text
            else:
                # skip smart quotes
                yield 'literal', txtnode.astext()


class DoctreeReadEvent(SphinxTransform):
    """Emit :event:`doctree-read` event."""

    default_priority = 880

    def apply(self, **kwargs: Any) -> None:
        self.env.events.emit('doctree-read', self.document)


class GlossarySorter(SphinxTransform):
    """Sort glossaries that have the ``sorted`` flag."""

    # This must be done after i18n, therefore not right
    # away in the glossary directive.
    default_priority = 500

    def apply(self, **kwargs: Any) -> None:
        for glossary in self.document.findall(addnodes.glossary):
            if glossary['sorted']:
                definition_list = cast('nodes.definition_list', glossary[0])
                definition_list[:] = sorted(
                    definition_list,
                    key=lambda item: unicodedata.normalize(
                        'NFD', cast('nodes.term', item)[0].astext().lower()
                    ),
                )


class ReorderConsecutiveTargetAndIndexNodes(SphinxTransform):
    """Index nodes interspersed between target nodes prevent other
    Transformations from combining those target nodes,
    e.g. ``PropagateTargets``.  This transformation reorders them:

    Given the following ``document`` as input::

        <document>
            <target ids="id1" ...>
            <index entries="...1...">
            <target ids="id2" ...>
            <target ids="id3" ...>
            <index entries="...2...">
            <target ids="id4" ...>

    The transformed result will be::

        <document>
            <index entries="...1...">
            <index entries="...2...">
            <target ids="id1" ...>
            <target ids="id2" ...>
            <target ids="id3" ...>
            <target ids="id4" ...>
    """

    # This transform MUST run before ``PropagateTargets``.
    default_priority = 220

    def apply(self, **kwargs: Any) -> None:
        for target in self.document.findall(nodes.target):
            _reorder_index_target_nodes(target)


def _reorder_index_target_nodes(start_node: nodes.target) -> None:
    """Sort target and index nodes.

    Find all consecutive target and index nodes starting from ``start_node``,
    and move all index nodes to before the first target node.
    """
    nodes_to_reorder: list[nodes.target | addnodes.index] = []

    # Note that we cannot use 'condition' to filter,
    # as we want *consecutive* target & index nodes.
    node: nodes.Node
    for node in start_node.findall(descend=False, siblings=True):
        if isinstance(node, (nodes.target, addnodes.index)):
            nodes_to_reorder.append(node)
            continue
        break  # must be a consecutive run of target or index nodes

    if len(nodes_to_reorder) < 2:
        return  # Nothing to reorder

    parent = nodes_to_reorder[0].parent
    if parent == nodes_to_reorder[-1].parent:
        first_idx = parent.index(nodes_to_reorder[0])
        last_idx = parent.index(nodes_to_reorder[-1])
        if first_idx + len(nodes_to_reorder) - 1 == last_idx:
            parent[first_idx : last_idx + 1] = sorted(nodes_to_reorder, key=_sort_key)


def _sort_key(node: nodes.Node) -> int:
    # Must be a stable sort.
    if isinstance(node, addnodes.index):
        return 0
    if isinstance(node, nodes.target):
        return 1
    msg = f'_sort_key called with unexpected node type {type(node)!r}'
    raise ValueError(msg)


def setup(app: Sphinx) -> ExtensionMetadata:
    app.add_transform(ApplySourceWorkaround)
    app.add_transform(ExtraTranslatableNodes)
    app.add_transform(DefaultSubstitutions)
    app.add_transform(MoveModuleTargets)
    app.add_transform(HandleCodeBlocks)
    app.add_transform(SortIds)
    app.add_transform(DoctestTransform)
    app.add_transform(AutoNumbering)
    app.add_transform(AutoIndexUpgrader)
    app.add_transform(FilterSystemMessages)
    app.add_transform(UnreferencedFootnotesDetector)
    app.add_transform(SphinxSmartQuotes)
    app.add_transform(DoctreeReadEvent)
    app.add_transform(GlossarySorter)
    app.add_transform(ReorderConsecutiveTargetAndIndexNodes)

    return {
        'version': 'builtin',
        'parallel_read_safe': True,
        'parallel_write_safe': True,
    }