;; Discussed in detail at http://technomancy.us/130
(ns wide-finder
  "A basic map/reduce approach to the wide finder using agents.
  Optimized for being idiomatic and readable rather than speed."
  (:use [clojure.contrib.duck-streams :only [reader]]))

(def re #"GET /(\d+) ")

(defn inc-or-init [i]
  (if i (inc i) 1))

(defn count-line
  "Increment the relevant entry in the counts map."
  [counts line]
  (if-let [[_ hit] (re-find re line)]
    (update-in counts [hit] inc-or-init)
    counts))

(defn find-widely
  "Return a map of pages to hit counts in filename."
  [filename n]
  ;; each agent begins as an empty map.
  (let [agents (map agent (repeat n {}))]
    ;; For each line in the file, send an agent the job of counting it.
    (dorun (map #(send %1 count-line %2)
                (cycle agents) ; infinite seq of all agents
                (line-seq (reader filename))))
    ;; Wait for each agent to finish.
    (doseq [a agents] (await a))
    ;; Reduce the results into a single count value.
    (apply merge-with + (map deref agents))))

Generated by Phil Hagelberg using scpaste at Sat Nov 14 20:35:41 2009. PST. (raw)