"""
In the Sphinx build configuration file (``conf.py``), ``translate`` codelist CSV files and JSON Schema files.
.. code:: python
import os
from glob import glob
from pathlib import Path
from ocds_babel.translate import translate
def setup(app):
basedir = Path(__file__).resolve().parents[1]
localedir = basedir / 'locale'
language = app.config.overrides.get('language', 'en')
headers = ['Title', 'Description', 'Extension']
translate([
(glob(str(basedir / 'schema' / '*-schema.json')), basedir / 'build' / language, 'schema'),
(glob(str(basedir / 'schema' / 'codelists')), basedir / 'build' / language, 'codelists'),
], localedir, language, headers)
:code:`translate` automatically determines the translation method to used based on filenames.
The arguments to :code:`translate` are:
#. A list of tuples. Each tuple has three values:
#. Input files (a list of paths of files to translate)
#. Output directory (the path of the directory in which to write translated files)
#. Gettext domain (the filename without extension of the message catalog to use)
#. Locale directory (the path of the directory containing message catalog files)
#. Target language (the code of the language to translate to)
#. Optional keyword arguments to replace ``{{marker}}`` markers with values, e.g. :code:`version='1.1'`
Methods are also available for translating ``extension.json``, Markdown files and YAML files.
Install requirements for Markdown translation
---------------------------------------------
To translate Markdown files, you must install:
.. code-block:: bash
pip install ocds-babel[markdown]
Install requirements for YAML translation
-----------------------------------------
To translate YAML files, you must install:
.. code-block:: bash
pip install ocds-babel[yaml]
"""
import contextlib
import csv
import gettext
import json
import logging
import os
from copy import deepcopy
from io import StringIO
from ocds_babel import TRANSLATABLE_EXTENSION_METADATA_KEYWORDS, TRANSLATABLE_SCHEMA_KEYWORDS
from ocds_babel.util import text_to_translate
with contextlib.suppress(ImportError):
from ocds_babel.translate_markdown import translate_markdown, translate_markdown_data # noqa: F401
with contextlib.suppress(ImportError):
from ocds_babel.translate_yaml import translate_yaml, translate_yaml_data # noqa: F401
logger = logging.getLogger("ocds_babel")
[docs]
def translate(configuration, localedir, language, headers, keys=None, **kwargs):
"""
Write files, translating any translatable strings.
For translated strings in schema files, replace `{{lang}}` with the language code.
Keyword arguments may specify additional replacements.
"""
translators = {}
for sources, target, domain in configuration:
logger.info('Translating to %s using "%s" domain, into %s', language, domain, target)
translators.setdefault(
domain, gettext.translation(domain, localedir, languages=[language], fallback=language == "en")
)
os.makedirs(target, exist_ok=True)
for source in sources:
basename = os.path.basename(source)
if basename == "extension.json":
method = translate_extension_metadata
new_kwargs = {"lang": language}
elif source.endswith(".csv"):
method = translate_codelist
new_kwargs = {"headers": headers}
elif source.endswith(".json"):
method = translate_schema
new_kwargs = {"lang": language}
elif source.endswith(".md"):
method = translate_markdown
new_kwargs = {}
elif source.endswith(".yaml"):
method = translate_yaml
new_kwargs = {"keys": keys}
else:
raise NotImplementedError(basename)
with open(source) as r, open(os.path.join(target, basename), "w") as w:
w.write(method(r, translators[domain], **new_kwargs, **kwargs))
# This should roughly match the logic of `extract_codelist`.
[docs]
def translate_codelist(io, translator, headers=(), **kwargs):
"""Accept a CSV file as an IO object, and return its translated contents in CSV format."""
reader = csv.DictReader(io)
fieldnames = [translator.gettext(fieldname) for fieldname in reader.fieldnames]
rows = translate_codelist_data(reader, translator, headers, **kwargs)
io = StringIO()
writer = csv.DictWriter(io, fieldnames, lineterminator="\n")
writer.writeheader()
writer.writerows(rows)
return io.getvalue()
[docs]
def translate_codelist_data(source, translator, headers=(), **kwargs):
"""Accept CSV rows as an iterable object (e.g. a list of dictionaries), and return translated rows."""
rows = []
for row in source:
data = {}
for key, value in row.items():
text = text_to_translate(value, key in headers)
data[translator.gettext(key)] = translator.gettext(text) if text else value
rows.append(data)
return rows
# This should roughly match the logic of `extract_schema`.
[docs]
def translate_schema(io, translator, **kwargs):
"""Accept a JSON file as an IO object, and return its translated contents in JSON format."""
data = json.load(io)
data = translate_schema_data(data, translator, **kwargs)
return _json_dumps(data)
[docs]
def translate_schema_data(source, translator, **kwargs):
"""Accept JSON data, and return translated data."""
def _translate_schema_data(data):
if isinstance(data, list):
for item in data:
_translate_schema_data(item)
elif isinstance(data, dict):
for key, value in data.items():
_translate_schema_data(value)
text = text_to_translate(value, key in TRANSLATABLE_SCHEMA_KEYWORDS)
if text:
data[key] = translator.gettext(text)
for old, new in kwargs.items():
data[key] = data[key].replace("{{" + old + "}}", new)
data = deepcopy(source)
_translate_schema_data(data)
return data
# This should roughly match the logic of `extract_extension_metadata`.
def _json_dumps(data):
return json.dumps(data, ensure_ascii=False, indent=2)