"""The MessageCatalogBuilder class."""
from __future__ import annotations
import operator
import time
from codecs import open
from collections import defaultdict
from os import getenv, path, walk
from pathlib import Path
from typing import TYPE_CHECKING, Any, Literal
from uuid import uuid4
from docutils import nodes
from sphinx import addnodes, package_dir
from sphinx.builders import Builder
from sphinx.errors import ThemeError
from sphinx.locale import __
from sphinx.util import logging
from sphinx.util.console import bold
from sphinx.util.display import status_iterator
from sphinx.util.i18n import CatalogInfo, docname_to_domain
from sphinx.util.index_entries import split_index_msg
from sphinx.util.nodes import extract_messages, traverse_translatable_index
from sphinx.util.osutil import canon_path, ensuredir, relpath
from sphinx.util.tags import Tags
from sphinx.util.template import SphinxRenderer
if TYPE_CHECKING:
import os
from collections.abc import Iterable, Iterator, Sequence
from docutils.nodes import Element
from sphinx.application import Sphinx
from sphinx.config import Config
from sphinx.util.typing import ExtensionMetadata
DEFAULT_TEMPLATE_PATH = Path(package_dir, 'templates', 'gettext')
logger = logging.getLogger(__name__)
class Message:
"""An entry of translatable message."""
def __init__(
self, text: str, locations: list[tuple[str, int]], uuids: list[str]
) -> None:
self.text = text
self.locations = locations
self.uuids = uuids
class Catalog:
"""Catalog of translatable messages."""
def __init__(self) -> None:
self.messages: list[str] = [] # retain insertion order
# msgid -> file, line, uid
self.metadata: dict[str, list[tuple[str, int, str]]] = {}
def add(self, msg: str, origin: Element | MsgOrigin) -> None:
if not hasattr(origin, 'uid'):
# Nodes that are replicated like todo don't have a uid,
# however i18n is also unnecessary.
return
if msg not in self.metadata: # faster lookup in hash
self.messages.append(msg)
self.metadata[msg] = []
line = origin.line
if line is None:
line = -1
self.metadata[msg].append((origin.source, line, origin.uid)) # type: ignore[arg-type]
def __iter__(self) -> Iterator[Message]:
for message in self.messages:
positions = sorted({
(source, line) for source, line, uuid in self.metadata[message]
})
uuids = [uuid for source, line, uuid in self.metadata[message]]
yield Message(message, positions, uuids)
class MsgOrigin:
"""
Origin holder for Catalog message origin.
"""
def __init__(self, source: str, line: int) -> None:
self.source = source
self.line = line
self.uid = uuid4().hex
class GettextRenderer(SphinxRenderer):
def __init__(
self,
template_path: Sequence[str | os.PathLike[str]] | None = None,
outdir: str | os.PathLike[str] | None = None,
) -> None:
self.outdir = outdir
if template_path is None:
super().__init__([DEFAULT_TEMPLATE_PATH])
else:
super().__init__([*template_path, DEFAULT_TEMPLATE_PATH])
def escape(s: str) -> str:
s = s.replace('\\', r'\\')
s = s.replace('"', r'\"')
return s.replace('\n', '\\n"\n"')
# use texescape as escape filter
self.env.filters['e'] = escape
self.env.filters['escape'] = escape
def render(self, filename: str, context: dict[str, Any]) -> str:
def _relpath(s: str) -> str:
return canon_path(relpath(s, self.outdir))
context['relpath'] = _relpath
return super().render(filename, context)
class I18nTags(Tags):
"""Dummy tags module for I18nBuilder.
To ensure that all text inside ``only`` nodes is translated,
this class always returns ``True`` regardless the defined tags.
"""
def eval_condition(self, condition: Any) -> bool:
return True
class I18nBuilder(Builder):
"""
General i18n builder.
"""
name = 'i18n'
versioning_method = 'text'
use_message_catalog = False
def init(self) -> None:
super().init()
self.env.set_versioning_method(
self.versioning_method, self.env.config.gettext_uuid
)
self.tags = I18nTags()
self.catalogs: defaultdict[str, Catalog] = defaultdict(Catalog)
def get_target_uri(self, docname: str, typ: str | None = None) -> str:
return ''
def get_outdated_docs(self) -> set[str]:
return self.env.found_docs
def compile_catalogs(self, catalogs: set[CatalogInfo], message: str) -> None:
return
def write_doc(self, docname: str, doctree: nodes.document) -> None:
catalog = self.catalogs[docname_to_domain(docname, self.config.gettext_compact)]
for toctree in self.env.tocs[docname].findall(addnodes.toctree):
for node, msg in extract_messages(toctree):
node.uid = '' # type: ignore[attr-defined] # Hack UUID model
catalog.add(msg, node)
for node, msg in extract_messages(doctree):
# Do not extract messages from within substitution definitions.
if not _is_node_in_substitution_definition(node):
catalog.add(msg, node)
if 'index' in self.env.config.gettext_additional_targets:
# Extract translatable messages from index entries.
for node, entries in traverse_translatable_index(doctree):
for entry_type, value, _target_id, _main, _category_key in entries:
for m in split_index_msg(entry_type, value):
catalog.add(m, node)
# If set, use the timestamp from SOURCE_DATE_EPOCH
# https://reproducible-builds.org/specs/source-date-epoch/
if (source_date_epoch := getenv('SOURCE_DATE_EPOCH')) is not None:
timestamp = time.gmtime(float(source_date_epoch))
else:
# determine timestamp once to remain unaffected by DST changes during build
timestamp = time.localtime()
ctime = time.strftime('%Y-%m-%d %H:%M%z', timestamp)
def should_write(filepath: str, new_content: str) -> bool:
if not path.exists(filepath):
return True
try:
with open(filepath, encoding='utf-8') as oldpot:
old_content = oldpot.read()
old_header_index = old_content.index('"POT-Creation-Date:')
new_header_index = new_content.index('"POT-Creation-Date:')
old_body_index = old_content.index('"PO-Revision-Date:')
new_body_index = new_content.index('"PO-Revision-Date:')
return (
old_content[:old_header_index] != new_content[:new_header_index]
or new_content[new_body_index:] != old_content[old_body_index:]
)
except ValueError:
pass
return True
def _is_node_in_substitution_definition(node: nodes.Node) -> bool:
"""Check "node" to test if it is in a substitution definition."""
while node.parent:
if isinstance(node, nodes.substitution_definition):
return True
node = node.parent
return False
[docs]
class MessageCatalogBuilder(I18nBuilder):
"""
Builds gettext-style message catalogs (.pot files).
"""
name = 'gettext'
epilog = __('The message catalogs are in %(outdir)s.')
def init(self) -> None:
super().init()
self.create_template_bridge()
self.templates.init(self)
def _collect_templates(self) -> set[str]:
template_files = set()
for template_path in self.config.templates_path:
tmpl_abs_path = path.join(self.app.srcdir, template_path)
for dirpath, _dirs, files in walk(tmpl_abs_path):
for fn in files:
if fn.endswith('.html'):
filename = canon_path(path.join(dirpath, fn))
template_files.add(filename)
return template_files
def _extract_from_template(self) -> None:
files = list(self._collect_templates())
files.sort()
logger.info(bold(__('building [%s]: ')), self.name, nonl=True)
logger.info(__('targets for %d template files'), len(files))
extract_translations = self.templates.environment.extract_translations
for template in status_iterator(
files, __('reading templates... '), 'purple', len(files), self.app.verbosity
):
try:
with open(template, encoding='utf-8') as f:
context = f.read()
for line, _meth, msg in extract_translations(context):
origin = MsgOrigin(template, line)
self.catalogs['sphinx'].add(msg, origin)
except Exception as exc:
msg = f'{template}: {exc!r}'
raise ThemeError(msg) from exc
def build( # type: ignore[misc]
self,
docnames: Iterable[str] | None,
summary: str | None = None,
method: Literal['all', 'specific', 'update'] = 'update',
) -> None:
self._extract_from_template()
super().build(docnames, summary, method)
def finish(self) -> None:
super().finish()
context = {
'version': self.config.version,
'copyright': self.config.copyright,
'project': self.config.project,
'last_translator': self.config.gettext_last_translator,
'language_team': self.config.gettext_language_team,
'ctime': ctime,
'display_location': self.config.gettext_location,
'display_uuid': self.config.gettext_uuid,
}
for textdomain, catalog in status_iterator(
self.catalogs.items(),
__('writing message catalogs... '),
'darkgreen',
len(self.catalogs),
self.app.verbosity,
operator.itemgetter(0),
):
# noop if config.gettext_compact is set
ensuredir(path.join(self.outdir, path.dirname(textdomain)))
context['messages'] = list(catalog)
template_path = [
self.app.srcdir / rel_path for rel_path in self.config.templates_path
]
renderer = GettextRenderer(template_path, outdir=self.outdir)
content = renderer.render('message.pot.jinja', context)
pofn = path.join(self.outdir, textdomain + '.pot')
if should_write(pofn, content):
with open(pofn, 'w', encoding='utf-8') as pofile:
pofile.write(content)
def _gettext_compact_validator(app: Sphinx, config: Config) -> None:
gettext_compact = config.gettext_compact
# Convert 0/1 from the command line to ``bool`` types
if gettext_compact == '0':
config.gettext_compact = False
elif gettext_compact == '1':
config.gettext_compact = True
def setup(app: Sphinx) -> ExtensionMetadata:
app.add_builder(MessageCatalogBuilder)
app.add_config_value('gettext_compact', True, 'gettext', {bool, str})
app.add_config_value('gettext_location', True, 'gettext')
app.add_config_value('gettext_uuid', False, 'gettext')
app.add_config_value('gettext_auto_build', True, 'env')
app.add_config_value('gettext_additional_targets', [], 'env', types={set, list})
app.add_config_value(
'gettext_last_translator', 'FULL NAME <EMAIL@ADDRESS>', 'gettext'
)
app.add_config_value('gettext_language_team', 'LANGUAGE <LL@li.org>', 'gettext')
app.connect('config-inited', _gettext_compact_validator, priority=800)
return {
'version': 'builtin',
'parallel_read_safe': True,
'parallel_write_safe': True,
}