Seu aplicativo precisa saber quais relações de link estão oficialmente registradas na IANA? Este script os busca e então imprime um arquivo python contendo um dicionário de relacionamentos para descrições de relacionamento.
Enquanto o escrevia, descobri o maravilhoso módulo objectify do lxml, que permite ao script navegar rapidamente pela estrutura xml. Em seguida, ele usa uma função de quebra de linha personalizada para garantir que a saída python resultante seja compatível com pep8.
from __future__ import print_function
from __future__ import unicode_literals
import requests
from lxml import objectify
import datetime
# location where registry is stored in xml format
iana_xml = 'http://www.iana.org/assignments/link-relations/link-relations.xml'
def linewrap(chunks, width = None, sep = ' ', preamble = '', line_prefix = '',
return_gen = False):
r'''Takes an iterator of strings, and attempts to wrap them in whole chunks
to fit within width. Takes an optional preamble which is prepended before
the first line, and an optional per-line prefix (which is appended to every
line but the first). If return_gen is true, this function returns a
generator that will produce the lines of output as needed, otherwise it
returns a single joined string'''
sep_len = len(sep)
width = width or terminal_width()
if isinstance(chunks, basestring):
chunks = chunks.split()
def line_len(line):
r'Gets the full length of a line passed a list of strings'
word_len = sum(len(l) for l in line)
seps_len = sep_len * (len(line) - 1)
return word_len + seps_len if seps_len >= 0 else 0
def gen():
r'The generator to incrementally create lines from the input'
sep_len = len(sep)
line = [preamble] if preamble else []
for chunk in chunks:
chunk_len = len(chunk)
if line_len(line) + len(chunk) + sep_len > width:
yield sep.join(line)
line = [line_prefix + chunk]
else:
line.append(chunk)
if line:
yield sep.join(line)
return gen() if return_gen else 'n'.join(gen())
if __name__ == '__main__':
text = requests.get(iana_xml).text.encode('ascii', 'ignore')
xml = objectify.fromstring(text)
iana_rels = {str(rec.value): str(rec.description)
for rec in xml.registry.record}
keys = sorted(iana_rels)
print('# This file was autogenerated')
print()
print('# Registry last updated on:', xml.updated)
print('# This file generated on:', datetime.date.today())
print()
print('iana_rels = {')
for key in keys:
print(' {!r}: ('.format(key))
desc_list = list(linewrap(iana_rels[key], width=68, return_gen=True))
for i, line in enumerate(desc_list):
line_ = line.replace('"', '\"') # escape double quotes
if i < len(desc_list) - 1:
print(' "{} "'.format(line_))
else:
print(' "{}"'.format(line_))
print(' ),')
print('}')
Além disso, a essência está aqui: