Este trecho de código, por meio da mallet-to-map
função, lerá um arquivo de saída MALLET LDA bruto em um mapa clojure para estudo / manipulação posterior.
(ns nlp.lda.util
"A set of utilities for reading the output of MALLET's LDA experiments,
and transforming the results into clojure data scrutures."
(:require [clojure.string :as string]
[clojure-csv.core :as csv]))
(defn- compositions-to-map [compositions]
(map vec (partition 2 compositions)))
(defn- name-to-details [x]
(map vec (partition 2 (interleave
["topic" "set" "document"]
(string/split x #"/")))))
(defn- entry-to-map [x]
(into {} (concat (name-to-details (first x)) (compositions-to-map (rest x)))))
(defn mallet-to-map [filename]
(map entry-to-map (csv/parse-csv (slurp filename))))