aelnor
/
magic-judge-telegram-bot


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113
							import json
import io

data = {'glossary': {}, 'sections': {}}

part = None
parts = {'Glossary': 'glossary', 'Credits': 'sections', 'X-Z        300': 'sections', '1. Словарь терминов': 'glossary'}

key = None
value = None

with open('cr.txt') as file:
    for line in file:
        content = line.strip()
        if content in parts:
            part = parts[content]
            continue

        if part == 'glossary':
            if line == '\n':
                if key and value:
                    data[part][key] = value.strip()
                key, value = None, None

            elif key:
                if value:
                    value += '\n' + content
                else:
                    value = content
            else:
                key = content

        if part == 'sections':
            if line == '\n':
                if key and value:
                    data[part][key] = {'en': value.strip()}
                key, value = None, None

            elif key:
                value += '\n' + content

            else:
                if content.startswith(tuple('0123456789')):
                    key, value = content.split(' ', 1)


part = None
eng_key = None
try:
    with open('cr_ru.txt') as file:
        for line in file:
            content = line.strip()
            if content in parts:
                part = parts[content]
                continue

            if content.startswith('1. ') and 'ru' in data['sections']['1.']:
                content = content.split('1. ', 1)[1]

            if part == 'glossary':
                if line == '\n':
                    if key and eng_key and value:
                        data[part][key] = eng_key +'\n' + value
                    key, eng_key, value = None, None, None

                elif key:
                    if value:
                        value += '\n' + content
                    else:
                        value = content

                elif eng_key:
                    key = content

                else:
                    eng_key = content

            if part == 'sections':
                if line == '\n':
                    if key and value:
                        if key in data[part]:
                            if 'ru' in data[part][key]:
                                print('{:8} ! duplicate'.format(key))
                            else:
                                data[part][key]['ru'] = value
                        else:
                            print('{:8} ! unknown'.format(key))
                    key, value = None, None

                elif key:
                    value += '\n' + content

                else:
                    if content.startswith(tuple('0123456789')) and content.count(' ') > 0:
                        key, value = content.split(' ', 1)
                        if key.endswith('.') and not key[:-1].endswith(tuple('0123456789')):
                            print('{:8} extra dot'.format(key))
                            key = key[:-1]
                        if key.endswith(tuple('0123456789')):
                            print('{:8} no dot'.format(key))
                            key += '.'
                        if key.endswith(tuple('ас')):
                            print('{:8} russian letter'.format(key))
                            key = key.replace('а', 'a').replace('с', 'c')
except:
    pass

for section in data['sections']:
    if 'ru' not in data['sections'][section]:
        print('{:8} ! no ru translation'.format(section))

with io.open('data/cr.json', 'w', encoding = 'utf8') as file:
    json.dump(data, file, ensure_ascii = False, indent = 4, sort_keys = True)