Source code for ocds_babel.extract
"""
Babel extractors can be specified in configuration files.
For OCDS, you can specify::
[ocds_codelist: schema/*/codelists/*.csv]
headers = Title,Description,Extension
ignore = currency.csv
in ``babel_ocds_codelist.cfg``, and::
[ocds_schema: schema/*/*-schema.json]
in ``babel_ocds_schema.cfg``.
For BODS, you can specify::
[ocds_codelist: schema/codelists/*.csv]
headers = title,description,technical note
in ``babel_bods_codelist.cfg``, and::
[ocds_schema: schema/*.json]
in ``babel_bods_schema.cfg``.
"""
import csv
import json
import os
from io import StringIO
from ocds_babel import TRANSLATABLE_EXTENSION_METADATA_KEYWORDS, TRANSLATABLE_SCHEMA_KEYWORDS
from ocds_babel.util import text_to_translate
[docs]
def extract_codelist(fileobj, keywords, comment_tags, options):
"""Yield each header, and the specified field values of a codelist CSV file."""
headers = _get_option_as_list(options, 'headers')
ignore = _get_option_as_list(options, 'ignore')
# Use universal newlines mode, to avoid parsing errors.
reader = csv.DictReader(StringIO(fileobj.read().decode(), newline=''))
for fieldname in reader.fieldnames:
if fieldname:
yield 0, '', fieldname, ''
if os.path.basename(fileobj.name) not in ignore:
for lineno, row in enumerate(reader, 1):
for key, value in row.items():
text = text_to_translate(value, key in headers)
if text:
yield lineno, '', text, [key]
[docs]
def extract_schema(fileobj, keywords, comment_tags, options):
"""Yield the "title" and "description" values of a JSON Schema file."""
def _extract_schema(data, pointer=''):
if isinstance(data, list):
for index, item in enumerate(data):
yield from _extract_schema(item, pointer=f'{pointer}/{index}')
elif isinstance(data, dict):
for key, value in data.items():
yield from _extract_schema(value, pointer=f'{pointer}/{key}')
text = text_to_translate(value, key in TRANSLATABLE_SCHEMA_KEYWORDS)
if text:
yield text, f'{pointer}/{key}'
data = json.loads(fileobj.read().decode())
for text, pointer in _extract_schema(data):
yield 1, '', text, [pointer]
[docs]
def extract_extension_metadata(fileobj, keywords, comment_tags, options):
"""Yield the "name" and "description" values of an extension.json file."""
data = json.loads(fileobj.read().decode())
for key in TRANSLATABLE_EXTENSION_METADATA_KEYWORDS:
value = data.get(key)
if isinstance(value, dict):
comment = f'/{key}/en'
value = value.get('en')
else:
# old extension.json format
comment = f'/{key}'
text = text_to_translate(value)
if text:
yield 1, '', text, [comment]
def _get_option_as_list(options, key):
if options:
return options.get(key, '').split(',')
return []