# HG changeset patch # User k0s # Date 1265953105 18000 # Node ID df84e61ae1e4cb53ff1a735b080fc3c1c8168028 # Parent af19f44024d3f85dde5104779976fdc30562879e add dissociation diff -r af19f44024d3 -r df84e61ae1e4 setup.py --- a/setup.py Fri Jan 01 20:01:02 2010 -0500 +++ b/setup.py Fri Feb 12 00:38:25 2010 -0500 @@ -1,6 +1,6 @@ from setuptools import setup, find_packages -version = "0.0" +version = "0.1" setup(name='wordstream', version=version, @@ -29,6 +29,7 @@ [console_scripts] wordstream = wordstream.main:main + dissociate = wordstream.dissociate:dissociate """, ) diff -r af19f44024d3 -r df84e61ae1e4 wordstream/api.py --- a/wordstream/api.py Fri Jan 01 20:01:02 2010 -0500 +++ b/wordstream/api.py Fri Feb 12 00:38:25 2010 -0500 @@ -1,4 +1,5 @@ import urllib2 +from random import shuffle class Corpus(dict): @@ -9,8 +10,11 @@ self.setdefault(word, []).append(association) def eat(self, word): - if word in self and self[word]: - return self[word].pop() + if word in self: + if self[word]: + return self[word].pop() + else: + del self[word] def feed_stream(self, stream): if isinstance(stream, basestring): @@ -27,6 +31,10 @@ text = file(arg).read() self.feed_stream(text) + def scramble(self): + for i in self: + shuffle(self[i]) + def save(self, filename): named = False if isinstance(f, basestring): diff -r af19f44024d3 -r df84e61ae1e4 wordstream/dissociate.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/wordstream/dissociate.py Fri Feb 12 00:38:25 2010 -0500 @@ -0,0 +1,30 @@ +#!/usr/bin/env python + +import random +import sys +import urllib2 + +from optparse import OptionParser +from pprint import pprint +from wordstream.api import Corpus + +def dissociate(args=sys.argv[1:]): + parser = OptionParser() + options, args = parser.parse_args() + + corpus = Corpus() + corpus.feed_stuff(*args) + corpus.scramble() + + while corpus: + word = random.choice(corpus.keys()) + inedible = True + while corpus.get(word): + inedible = False + print word + ' ', + word = corpus.eat(word) + if inedible: # eat it anyway + corpus.eat(word) + +if __name__ == '__main__': + dissociate()