comparison wordstream/api.py @ 0:8af3412e907a

initial import of wordstream
author k0s <k0scist@gmail.com>
date Fri, 01 Jan 2010 19:35:32 -0500
parents
children df84e61ae1e4
comparison
equal deleted inserted replaced
-1:000000000000 0:8af3412e907a
1 import urllib2
2
3 class Corpus(dict):
4
5 def __init__(self, corpus=None):
6 dict.__init__(corpus or {})
7
8 def feed(self, word, association):
9 self.setdefault(word, []).append(association)
10
11 def eat(self, word):
12 if word in self and self[word]:
13 return self[word].pop()
14
15 def feed_stream(self, stream):
16 if isinstance(stream, basestring):
17 stream = stream.split()
18 while len(stream) > 1:
19 self.feed(stream[-2], stream[-1])
20 stream.pop()
21
22 def feed_stuff(self, *args):
23 for arg in args:
24 if arg.startswith('https://') or arg.startswith('http://'):
25 text = urllib2.urlopen(arg)
26 else:
27 text = file(arg).read()
28 self.feed_stream(text)
29
30 def save(self, filename):
31 named = False
32 if isinstance(f, basestring):
33 named = True
34 f = file(f)
35 for key in sorted(self.keys()):
36 print >> f, "%s %s" % (key, ' '.join(self[key]))
37 if named:
38 f.close()
39
40 def load(self, f):
41 if isinstance(f, basestring):
42 f = file(f)
43
44
45 @classmethod
46 def restore(cls, filename):
47 corpus = cls()
48 corpus.load(filename)
49