Mercurial > hg > wordstream
view wordstream/api.py @ 2:df84e61ae1e4
add dissociation
author | k0s <k0scist@gmail.com> |
---|---|
date | Fri, 12 Feb 2010 00:38:25 -0500 |
parents | 8af3412e907a |
children | bad7e66f4f24 |
line wrap: on
line source
import urllib2 from random import shuffle class Corpus(dict): def __init__(self, corpus=None): dict.__init__(corpus or {}) def feed(self, word, association): self.setdefault(word, []).append(association) def eat(self, word): if word in self: if self[word]: return self[word].pop() else: del self[word] def feed_stream(self, stream): if isinstance(stream, basestring): stream = stream.split() while len(stream) > 1: self.feed(stream[-2], stream[-1]) stream.pop() def feed_stuff(self, *args): for arg in args: if arg.startswith('https://') or arg.startswith('http://'): text = urllib2.urlopen(arg) else: text = file(arg).read() self.feed_stream(text) def scramble(self): for i in self: shuffle(self[i]) def save(self, filename): named = False if isinstance(f, basestring): named = True f = file(f) for key in sorted(self.keys()): print >> f, "%s %s" % (key, ' '.join(self[key])) if named: f.close() def load(self, f): if isinstance(f, basestring): f = file(f) @classmethod def restore(cls, filename): corpus = cls() corpus.load(filename)