You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

update_cr.py 3.6KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113
  1. import json
  2. import io
  3. data = {'glossary': {}, 'sections': {}}
  4. part = None
  5. parts = {'Glossary': 'glossary', 'Credits': 'sections', 'X-Z 300': 'sections', '1. Словарь терминов': 'glossary'}
  6. key = None
  7. value = None
  8. with open('cr.txt') as file:
  9. for line in file:
  10. content = line.strip()
  11. if content in parts:
  12. part = parts[content]
  13. continue
  14. if part == 'glossary':
  15. if line == '\n':
  16. if key and value:
  17. data[part][key] = value.strip()
  18. key, value = None, None
  19. elif key:
  20. if value:
  21. value += '\n' + content
  22. else:
  23. value = content
  24. else:
  25. key = content
  26. if part == 'sections':
  27. if line == '\n':
  28. if key and value:
  29. data[part][key] = {'en': value.strip()}
  30. key, value = None, None
  31. elif key:
  32. value += '\n' + content
  33. else:
  34. if content.startswith(tuple('0123456789')):
  35. key, value = content.split(' ', 1)
  36. part = None
  37. eng_key = None
  38. try:
  39. with open('cr_ru.txt') as file:
  40. for line in file:
  41. content = line.strip()
  42. if content in parts:
  43. part = parts[content]
  44. continue
  45. if content.startswith('1. ') and 'ru' in data['sections']['1.']:
  46. content = content.split('1. ', 1)[1]
  47. if part == 'glossary':
  48. if line == '\n':
  49. if key and eng_key and value:
  50. data[part][key] = eng_key +'\n' + value
  51. key, eng_key, value = None, None, None
  52. elif key:
  53. if value:
  54. value += '\n' + content
  55. else:
  56. value = content
  57. elif eng_key:
  58. key = content
  59. else:
  60. eng_key = content
  61. if part == 'sections':
  62. if line == '\n':
  63. if key and value:
  64. if key in data[part]:
  65. if 'ru' in data[part][key]:
  66. print('{:8} ! duplicate'.format(key))
  67. else:
  68. data[part][key]['ru'] = value
  69. else:
  70. print('{:8} ! unknown'.format(key))
  71. key, value = None, None
  72. elif key:
  73. value += '\n' + content
  74. else:
  75. if content.startswith(tuple('0123456789')) and content.count(' ') > 0:
  76. key, value = content.split(' ', 1)
  77. if key.endswith('.') and not key[:-1].endswith(tuple('0123456789')):
  78. print('{:8} extra dot'.format(key))
  79. key = key[:-1]
  80. if key.endswith(tuple('0123456789')):
  81. print('{:8} no dot'.format(key))
  82. key += '.'
  83. if key.endswith(tuple('ас')):
  84. print('{:8} russian letter'.format(key))
  85. key = key.replace('а', 'a').replace('с', 'c')
  86. except:
  87. pass
  88. for section in data['sections']:
  89. if 'ru' not in data['sections'][section]:
  90. print('{:8} ! no ru translation'.format(section))
  91. with io.open('data/cr.json', 'w', encoding = 'utf8') as file:
  92. json.dump(data, file, ensure_ascii = False, indent = 4, sort_keys = True)