"""
In the Sphinx build configuration file (``conf.py``), you can use :code:`translate` to translate codelist CSV files and JSON Schema files:
.. code:: python
import os
from glob import glob
from pathlib import Path
from ocds_babel.translate import translate
def setup(app):
basedir = Path(__file__).resolve().parents[1]
localedir = basedir / 'locale'
language = app.config.overrides.get('language', 'en')
headers = ['Title', 'Description', 'Extension']
translate([
(glob(str(basedir / 'schema' / '*-schema.json')), basedir / 'build' / language, 'schema'),
(glob(str(basedir / 'schema' / 'codelists')), basedir / 'build' / language, 'codelists'),
], localedir, language, headers)
:code:`translate` automatically determines the translation method to used based on filenames. The arguments to :code:`translate` are:
#. A list of tuples. Each tuple has three values:
#. Input files (a list of paths of files to translate)
#. Output directory (the path of the directory in which to write translated files)
#. Gettext domain (the filename without extension of the message catalog to use)
#. Locale directory (the path of the directory containing message catalog files)
#. Target language (the code of the language to translate to)
#. Optional keyword arguments to replace ``{{marker}}`` markers with values, e.g. :code:`version='1.1'`
Methods are also available for translating ``extension.json`` and for translating Markdown files.
Install requirements for Markdown translation
---------------------------------------------
To translate Markdown files, you must install:
.. code-block:: bash
pip install ocds-babel[markdown]
""" # noqa: E501
import csv
import gettext
import json
import logging
import os
from copy import deepcopy
from io import StringIO
from ocds_babel import TRANSLATABLE_EXTENSION_METADATA_KEYWORDS, TRANSLATABLE_SCHEMA_KEYWORDS
from ocds_babel.util import text_to_translate
try:
from ocds_babel.translate_markdown import translate_markdown, translate_markdown_data # noqa: F401
except ImportError:
pass
logger = logging.getLogger('ocds_babel')
[docs]
def translate(configuration, localedir, language, headers, **kwargs):
"""
Writes files, translating any translatable strings.
For translated strings in schema files, replaces `{{lang}}` with the language code. Keyword arguments may specify
additional replacements.
"""
translators = {}
for sources, target, domain in configuration:
logger.info('Translating to %s using "%s" domain, into %s', language, domain, target)
translators.setdefault(domain, gettext.translation(
domain, localedir, languages=[language], fallback=language == 'en'))
os.makedirs(target, exist_ok=True)
for source in sources:
basename = os.path.basename(source)
with open(source) as r, open(os.path.join(target, basename), 'w') as w:
if basename == 'extension.json':
method = translate_extension_metadata
kwargs.update(lang=language)
elif source.endswith('.csv'):
method = translate_codelist
kwargs.update(headers=headers)
elif source.endswith('.json'):
method = translate_schema
kwargs.update(lang=language)
elif source.endswith('.md'):
method = translate_markdown
else:
raise NotImplementedError(basename)
w.write(method(r, translators[domain], **kwargs))
# This should roughly match the logic of `extract_codelist`.
[docs]
def translate_codelist(io, translator, headers=[], **kwargs):
"""
Accepts a CSV file as an IO object, and returns its translated contents in CSV format.
"""
reader = csv.DictReader(io)
fieldnames = [translator.gettext(fieldname) for fieldname in reader.fieldnames]
rows = translate_codelist_data(reader, translator, headers, **kwargs)
io = StringIO()
writer = csv.DictWriter(io, fieldnames, lineterminator='\n')
writer.writeheader()
writer.writerows(rows)
return io.getvalue()
[docs]
def translate_codelist_data(source, translator, headers=[], **kwargs):
"""
Accepts CSV rows as an iterable object (e.g. a list of dictionaries), and returns translated rows.
"""
rows = []
for row in source:
data = {}
for key, value in row.items():
text = text_to_translate(value, key in headers)
if text:
value = translator.gettext(text)
data[translator.gettext(key)] = value
rows.append(data)
return rows
# This should roughly match the logic of `extract_schema`.
[docs]
def translate_schema(io, translator, **kwargs):
"""
Accepts a JSON file as an IO object, and returns its translated contents in JSON format.
"""
data = json.load(io)
data = translate_schema_data(data, translator, **kwargs)
return _json_dumps(data)
[docs]
def translate_schema_data(source, translator, **kwargs):
"""
Accepts JSON data, and returns translated data.
"""
def _translate_schema_data(data):
if isinstance(data, list):
for item in data:
_translate_schema_data(item)
elif isinstance(data, dict):
for key, value in data.items():
_translate_schema_data(value)
text = text_to_translate(value, key in TRANSLATABLE_SCHEMA_KEYWORDS)
if text:
data[key] = translator.gettext(text)
for old, new in kwargs.items():
data[key] = data[key].replace('{{' + old + '}}', new)
data = deepcopy(source)
_translate_schema_data(data)
return data
# This should roughly match the logic of `extract_extension_metadata`.
def _json_dumps(data):
return json.dumps(data, ensure_ascii=False, indent=2)