Mercurial > hg > wordstream
changeset 2:df84e61ae1e4
add dissociation
author | k0s <k0scist@gmail.com> |
---|---|
date | Fri, 12 Feb 2010 00:38:25 -0500 |
parents | af19f44024d3 |
children | e21f53582267 |
files | setup.py wordstream/api.py wordstream/dissociate.py |
diffstat | 3 files changed, 42 insertions(+), 3 deletions(-) [+] |
line wrap: on
line diff
--- a/setup.py Fri Jan 01 20:01:02 2010 -0500 +++ b/setup.py Fri Feb 12 00:38:25 2010 -0500 @@ -1,6 +1,6 @@ from setuptools import setup, find_packages -version = "0.0" +version = "0.1" setup(name='wordstream', version=version, @@ -29,6 +29,7 @@ [console_scripts] wordstream = wordstream.main:main + dissociate = wordstream.dissociate:dissociate """, )
--- a/wordstream/api.py Fri Jan 01 20:01:02 2010 -0500 +++ b/wordstream/api.py Fri Feb 12 00:38:25 2010 -0500 @@ -1,4 +1,5 @@ import urllib2 +from random import shuffle class Corpus(dict): @@ -9,8 +10,11 @@ self.setdefault(word, []).append(association) def eat(self, word): - if word in self and self[word]: - return self[word].pop() + if word in self: + if self[word]: + return self[word].pop() + else: + del self[word] def feed_stream(self, stream): if isinstance(stream, basestring): @@ -27,6 +31,10 @@ text = file(arg).read() self.feed_stream(text) + def scramble(self): + for i in self: + shuffle(self[i]) + def save(self, filename): named = False if isinstance(f, basestring):
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/wordstream/dissociate.py Fri Feb 12 00:38:25 2010 -0500 @@ -0,0 +1,30 @@ +#!/usr/bin/env python + +import random +import sys +import urllib2 + +from optparse import OptionParser +from pprint import pprint +from wordstream.api import Corpus + +def dissociate(args=sys.argv[1:]): + parser = OptionParser() + options, args = parser.parse_args() + + corpus = Corpus() + corpus.feed_stuff(*args) + corpus.scramble() + + while corpus: + word = random.choice(corpus.keys()) + inedible = True + while corpus.get(word): + inedible = False + print word + ' ', + word = corpus.eat(word) + if inedible: # eat it anyway + corpus.eat(word) + +if __name__ == '__main__': + dissociate()