annotate wordstream/api.py @ 10:bad7e66f4f24

py3
author Jeff Hammel <k0scist@gmail.com>
date Tue, 24 Nov 2020 10:33:22 -0800
parents df84e61ae1e4
children d2060c9bdb31
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
10
Jeff Hammel <k0scist@gmail.com>
parents: 2
diff changeset
1 from urllib.request import urlopen
2
df84e61ae1e4 add dissociation
k0s <k0scist@gmail.com>
parents: 0
diff changeset
2 from random import shuffle
0
8af3412e907a initial import of wordstream
k0s <k0scist@gmail.com>
parents:
diff changeset
3
8af3412e907a initial import of wordstream
k0s <k0scist@gmail.com>
parents:
diff changeset
4 class Corpus(dict):
8af3412e907a initial import of wordstream
k0s <k0scist@gmail.com>
parents:
diff changeset
5
8af3412e907a initial import of wordstream
k0s <k0scist@gmail.com>
parents:
diff changeset
6 def __init__(self, corpus=None):
8af3412e907a initial import of wordstream
k0s <k0scist@gmail.com>
parents:
diff changeset
7 dict.__init__(corpus or {})
8af3412e907a initial import of wordstream
k0s <k0scist@gmail.com>
parents:
diff changeset
8
8af3412e907a initial import of wordstream
k0s <k0scist@gmail.com>
parents:
diff changeset
9 def feed(self, word, association):
8af3412e907a initial import of wordstream
k0s <k0scist@gmail.com>
parents:
diff changeset
10 self.setdefault(word, []).append(association)
8af3412e907a initial import of wordstream
k0s <k0scist@gmail.com>
parents:
diff changeset
11
8af3412e907a initial import of wordstream
k0s <k0scist@gmail.com>
parents:
diff changeset
12 def eat(self, word):
2
df84e61ae1e4 add dissociation
k0s <k0scist@gmail.com>
parents: 0
diff changeset
13 if word in self:
df84e61ae1e4 add dissociation
k0s <k0scist@gmail.com>
parents: 0
diff changeset
14 if self[word]:
df84e61ae1e4 add dissociation
k0s <k0scist@gmail.com>
parents: 0
diff changeset
15 return self[word].pop()
df84e61ae1e4 add dissociation
k0s <k0scist@gmail.com>
parents: 0
diff changeset
16 else:
df84e61ae1e4 add dissociation
k0s <k0scist@gmail.com>
parents: 0
diff changeset
17 del self[word]
0
8af3412e907a initial import of wordstream
k0s <k0scist@gmail.com>
parents:
diff changeset
18
8af3412e907a initial import of wordstream
k0s <k0scist@gmail.com>
parents:
diff changeset
19 def feed_stream(self, stream):
8af3412e907a initial import of wordstream
k0s <k0scist@gmail.com>
parents:
diff changeset
20 if isinstance(stream, basestring):
8af3412e907a initial import of wordstream
k0s <k0scist@gmail.com>
parents:
diff changeset
21 stream = stream.split()
8af3412e907a initial import of wordstream
k0s <k0scist@gmail.com>
parents:
diff changeset
22 while len(stream) > 1:
8af3412e907a initial import of wordstream
k0s <k0scist@gmail.com>
parents:
diff changeset
23 self.feed(stream[-2], stream[-1])
8af3412e907a initial import of wordstream
k0s <k0scist@gmail.com>
parents:
diff changeset
24 stream.pop()
8af3412e907a initial import of wordstream
k0s <k0scist@gmail.com>
parents:
diff changeset
25
8af3412e907a initial import of wordstream
k0s <k0scist@gmail.com>
parents:
diff changeset
26 def feed_stuff(self, *args):
8af3412e907a initial import of wordstream
k0s <k0scist@gmail.com>
parents:
diff changeset
27 for arg in args:
8af3412e907a initial import of wordstream
k0s <k0scist@gmail.com>
parents:
diff changeset
28 if arg.startswith('https://') or arg.startswith('http://'):
10
Jeff Hammel <k0scist@gmail.com>
parents: 2
diff changeset
29 with urlopen(arg) as response:
Jeff Hammel <k0scist@gmail.com>
parents: 2
diff changeset
30 text = response.read()
0
8af3412e907a initial import of wordstream
k0s <k0scist@gmail.com>
parents:
diff changeset
31 else:
8af3412e907a initial import of wordstream
k0s <k0scist@gmail.com>
parents:
diff changeset
32 text = file(arg).read()
8af3412e907a initial import of wordstream
k0s <k0scist@gmail.com>
parents:
diff changeset
33 self.feed_stream(text)
8af3412e907a initial import of wordstream
k0s <k0scist@gmail.com>
parents:
diff changeset
34
2
df84e61ae1e4 add dissociation
k0s <k0scist@gmail.com>
parents: 0
diff changeset
35 def scramble(self):
df84e61ae1e4 add dissociation
k0s <k0scist@gmail.com>
parents: 0
diff changeset
36 for i in self:
df84e61ae1e4 add dissociation
k0s <k0scist@gmail.com>
parents: 0
diff changeset
37 shuffle(self[i])
df84e61ae1e4 add dissociation
k0s <k0scist@gmail.com>
parents: 0
diff changeset
38
0
8af3412e907a initial import of wordstream
k0s <k0scist@gmail.com>
parents:
diff changeset
39 def save(self, filename):
8af3412e907a initial import of wordstream
k0s <k0scist@gmail.com>
parents:
diff changeset
40 named = False
8af3412e907a initial import of wordstream
k0s <k0scist@gmail.com>
parents:
diff changeset
41 if isinstance(f, basestring):
8af3412e907a initial import of wordstream
k0s <k0scist@gmail.com>
parents:
diff changeset
42 named = True
8af3412e907a initial import of wordstream
k0s <k0scist@gmail.com>
parents:
diff changeset
43 f = file(f)
8af3412e907a initial import of wordstream
k0s <k0scist@gmail.com>
parents:
diff changeset
44 for key in sorted(self.keys()):
8af3412e907a initial import of wordstream
k0s <k0scist@gmail.com>
parents:
diff changeset
45 print >> f, "%s %s" % (key, ' '.join(self[key]))
8af3412e907a initial import of wordstream
k0s <k0scist@gmail.com>
parents:
diff changeset
46 if named:
8af3412e907a initial import of wordstream
k0s <k0scist@gmail.com>
parents:
diff changeset
47 f.close()
8af3412e907a initial import of wordstream
k0s <k0scist@gmail.com>
parents:
diff changeset
48
8af3412e907a initial import of wordstream
k0s <k0scist@gmail.com>
parents:
diff changeset
49 def load(self, f):
8af3412e907a initial import of wordstream
k0s <k0scist@gmail.com>
parents:
diff changeset
50 if isinstance(f, basestring):
8af3412e907a initial import of wordstream
k0s <k0scist@gmail.com>
parents:
diff changeset
51 f = file(f)
8af3412e907a initial import of wordstream
k0s <k0scist@gmail.com>
parents:
diff changeset
52
8af3412e907a initial import of wordstream
k0s <k0scist@gmail.com>
parents:
diff changeset
53
8af3412e907a initial import of wordstream
k0s <k0scist@gmail.com>
parents:
diff changeset
54 @classmethod
8af3412e907a initial import of wordstream
k0s <k0scist@gmail.com>
parents:
diff changeset
55 def restore(cls, filename):
8af3412e907a initial import of wordstream
k0s <k0scist@gmail.com>
parents:
diff changeset
56 corpus = cls()
8af3412e907a initial import of wordstream
k0s <k0scist@gmail.com>
parents:
diff changeset
57 corpus.load(filename)
8af3412e907a initial import of wordstream
k0s <k0scist@gmail.com>
parents:
diff changeset
58