Browse Source

Data generation scripts

master
LeoTheHuman 7 years ago
parent
commit
e15192c317
6 changed files with 189 additions and 3 deletions
  1. 4
    2
      Dockerfile
  2. 71
    0
      scripts/update_cards.py
  3. 113
    0
      scripts/update_cr.py
  4. 1
    1
      src/documents.py
  5. 0
    0
      src/magic-judge-telegram-bot.py
  6. 0
    0
      src/oracle.py

+ 4
- 2
Dockerfile View File

@@ -7,6 +7,8 @@ RUN pip install --no-cache-dir -r requirements.txt

ENV NAME magic-judge-telegram-bot

COPY . .
COPY src/* ./src/
COPY data/* ./data/
COPY config.json ./

CMD ["python", "magic-judge-telegram-bot.py"]
CMD ["python", "src/magic-judge-telegram-bot.py"]

+ 71
- 0
scripts/update_cards.py View File

@@ -0,0 +1,71 @@
from urllib.request import Request, urlopen
import json

def save(name, data):
with open(name, 'w', encoding = 'utf8') as file:
json.dump(data, file, ensure_ascii = False, indent = 4, sort_keys = True)

def download():
cards = list()
page = 1
nonEmpty = True
while nonEmpty:
request_url = 'https://api.magicthegathering.io/v1/cards?page={}'.format(page)
print('Loading page {}...'.format(page))
try:
req = Request(request_url, headers={'User-Agent': 'Mozilla/5.0'})
response = json.loads(urlopen(req).read().decode("utf-8"))
cardsPage = response['cards']
nonEmpty = bool(cardsPage)
if nonEmpty:
cards.extend(cardsPage)
page += 1
except HTTPError:
print ('Failed at page {}'.format(page))
return cards

# Can load or download and save card info

# Load
#with open('data/cards.json') as file:
# cards = json.load(file)

# Download and save for the future
cards = download()
save('data/cards.json', cards)

languages = ['Russian']
ignore = [
{'language': 'Russian', 'name': 'Plunder'},
{'language': 'Russian', 'name': 'Goblin Spelunkers'},
{'language': 'Russian', 'name': 'Raise Dead'},
]
copy = ['name', 'flavor', 'power', 'toughness', 'colors', 'printings', 'legality', 'manaCost', 'subtypes', 'text', 'layout', 'colorIdentity', 'type', 'types', 'cmc', 'loyalty', 'rulings']

names = {}
oracle = {}

for card in cards:
if card['name'] in names:
if not card['name'] in names[card['name']]:
names[card['name']].append(card['name'])
else:
names[card['name']] = [card['name']]

if 'foreignNames' in card:
for translation in card['foreignNames']:
if translation['name'] and translation['language'] in languages and not {'language': translation['language'], 'name': card['name']} in ignore:
if translation['name'] in names:
if not card['name'] in names[translation['name']]:
print('{} version of "{}": "{}" is named as another card "{}"'.format(translation['language'], card['name'], translation['name'], names[translation['name']]))
names[translation['name']].append(card['name'])

else:
names[translation['name']] = [card['name']]

if not card['name'] in oracle:
oracle[card['name']] = {k: card[k] for k in card if k in copy}

save('data/names.json', names)
save('data/oracle.json', oracle)


+ 113
- 0
scripts/update_cr.py View File

@@ -0,0 +1,113 @@
import json
import io

data = {'glossary': {}, 'sections': {}}

part = None
parts = {'Glossary': 'glossary', 'Credits': 'sections', 'X-Z 300': 'sections', '1. Словарь терминов': 'glossary'}

key = None
value = None

with open('cr.txt') as file:
for line in file:
content = line.strip()
if content in parts:
part = parts[content]
continue

if part == 'glossary':
if line == '\n':
if key and value:
data[part][key] = value.strip()
key, value = None, None

elif key:
if value:
value += '\n' + content
else:
value = content
else:
key = content

if part == 'sections':
if line == '\n':
if key and value:
data[part][key] = {'en': value.strip()}
key, value = None, None

elif key:
value += '\n' + content

else:
if content.startswith(tuple('0123456789')):
key, value = content.split(' ', 1)


part = None
eng_key = None
try:
with open('cr_ru.txt') as file:
for line in file:
content = line.strip()
if content in parts:
part = parts[content]
continue

if content.startswith('1. ') and 'ru' in data['sections']['1.']:
content = content.split('1. ', 1)[1]

if part == 'glossary':
if line == '\n':
if key and eng_key and value:
data[part][key] = eng_key +'\n' + value
key, eng_key, value = None, None, None

elif key:
if value:
value += '\n' + content
else:
value = content

elif eng_key:
key = content

else:
eng_key = content

if part == 'sections':
if line == '\n':
if key and value:
if key in data[part]:
if 'ru' in data[part][key]:
print('{:8} ! duplicate'.format(key))
else:
data[part][key]['ru'] = value
else:
print('{:8} ! unknown'.format(key))
key, value = None, None

elif key:
value += '\n' + content

else:
if content.startswith(tuple('0123456789')) and content.count(' ') > 0:
key, value = content.split(' ', 1)
if key.endswith('.') and not key[:-1].endswith(tuple('0123456789')):
print('{:8} extra dot'.format(key))
key = key[:-1]
if key.endswith(tuple('0123456789')):
print('{:8} no dot'.format(key))
key += '.'
if key.endswith(tuple('ас')):
print('{:8} russian letter'.format(key))
key = key.replace('а', 'a').replace('с', 'c')
except:
pass

for section in data['sections']:
if 'ru' not in data['sections'][section]:
print('{:8} ! no ru translation'.format(section))

with io.open('data/cr.json', 'w', encoding = 'utf8') as file:
json.dump(data, file, ensure_ascii = False, indent = 4, sort_keys = True)

documents.py → src/documents.py View File

@@ -62,6 +62,6 @@ def cr_search(words):
if len(nameCandidates) > 20:
return 'I need more specific clues, my master! This would return {} names'.format(len(nameCandidates))

text = '\n'.join(['<b>{}</b> {}'.format(name, crData['glossary'][name]) for name in sorted(nameCandidates)])
text = '\n'.join(['<b>{}</b>\n{}'.format(name, crData['glossary'][name]) for name in sorted(nameCandidates)])

return text

magic-judge-telegram-bot.py → src/magic-judge-telegram-bot.py View File


oracle.py → src/oracle.py View File


Loading…
Cancel
Save