annotate tvii/dataset/gauss.py @ 87:9d5a5e9f5c3b

add kmeans + dataset
author Jeff Hammel <k0scist@gmail.com>
date Sun, 17 Dec 2017 14:05:57 -0800
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
87
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
1 #!/usr/bin/env python
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
2
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
3 """
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
4 generate a Gaussian dataset
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
5 """
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
6
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
7 import random
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
8 import sys
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
9 from .cli import DatasetGenerationParser
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
10
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
11
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
12 class GaussianSampler(object):
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
13
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
14 def __init__(self, D, sigma=1., center=None):
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
15 assert D > -1
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
16 self.D = D
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
17 if center is None:
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
18 self.center = [0]*self.D
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
19 else:
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
20 assert len(center) == self.D
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
21 self.center = center
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
22 self.sigma = sigma
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
23
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
24 def __call__(self):
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
25 """yield one point of a Gaussian distribution"""
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
26
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
27 return [random.gauss(x, self.sigma)
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
28 for x in self.center]
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
29
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
30
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
31 def main(args=sys.argv[1:]):
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
32 """CLI"""
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
33
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
34 # parse command line
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
35 parser = DatasetGenerationParser(description=__doc__)
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
36 options = parser.parse_args(args)
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
37
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
38 # point generator
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
39 generator = GaussianSampler(D=options.dimensions)
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
40 points = [generator() for _ in xrange(options.number)]
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
41
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
42 # output
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
43 parser.writer().writerows(points)
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
44
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
45 if __name__ == '__main__':
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
46 main()