🥇 Buscar relações de link registradas da IANA

Seu aplicativo precisa saber quais relações de link estão oficialmente registradas na IANA? Este script os busca e então imprime um arquivo python contendo um dicionário de relacionamentos para descrições de relacionamento.

Enquanto o escrevia, descobri o maravilhoso módulo objectify do lxml, que permite ao script navegar rapidamente pela estrutura xml. Em seguida, ele usa uma função de quebra de linha personalizada para garantir que a saída python resultante seja compatível com pep8.

from __future__ import print_function
from __future__ import unicode_literals

import requests
from lxml import objectify
import datetime

# location where registry is stored in xml format
iana_xml = 'http://www.iana.org/assignments/link-relations/link-relations.xml'


def linewrap(chunks, width = None, sep = ' ', preamble = '', line_prefix = '',
             return_gen = False):
    r'''Takes an iterator of strings, and attempts to wrap them in whole chunks
    to fit within width. Takes an optional preamble which is prepended before
    the first line, and an optional per-line prefix (which is appended to every
    line but the first). If return_gen is true, this function returns a
    generator that will produce the lines of output as needed, otherwise it
    returns a single joined string'''
    sep_len = len(sep)
    width = width or terminal_width()
    if isinstance(chunks, basestring):
        chunks = chunks.split()
    def line_len(line):
        r'Gets the full length of a line passed a list of strings'
        word_len = sum(len(l) for l in line)
        seps_len = sep_len * (len(line) - 1)
        return word_len + seps_len if seps_len >= 0 else 0

    def gen():
        r'The generator to incrementally create lines from the input'
        sep_len = len(sep)
        line = [preamble] if preamble else []
        for chunk in chunks:
            chunk_len = len(chunk)
            if line_len(line) + len(chunk) + sep_len > width:
                yield sep.join(line)
                line = [line_prefix + chunk]
            else:
                line.append(chunk)
        if line:
            yield sep.join(line)

    return gen() if return_gen else 'n'.join(gen())


if __name__ == '__main__':
    text = requests.get(iana_xml).text.encode('ascii', 'ignore')
    xml = objectify.fromstring(text)
    iana_rels = {str(rec.value): str(rec.description)
                 for rec in xml.registry.record}
    keys = sorted(iana_rels)
    print('# This file was autogenerated')
    print()
    print('# Registry last updated on:', xml.updated)
    print('# This file generated on:', datetime.date.today())
    print()
    print('iana_rels = {')
    for key in keys:
        print('    {!r}: ('.format(key))
        desc_list = list(linewrap(iana_rels[key], width=68, return_gen=True))
        for i, line in enumerate(desc_list):
            line_ = line.replace('"', '\"')  # escape double quotes
            if i < len(desc_list) - 1:
                print('        "{} "'.format(line_))
            else:
                print('        "{}"'.format(line_))
        print('    ),')
    print('}')

Além disso, a essência está aqui:

https://gist.github.com/deontologician/5647004

Related Posts