annotate tvii/dataset/rand.py @ 87:9d5a5e9f5c3b

add kmeans + dataset
author Jeff Hammel <k0scist@gmail.com>
date Sun, 17 Dec 2017 14:05:57 -0800
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
87
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
1 #!/usr/bin/env python
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
2 # -*- coding: utf-8 -*-
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
3
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
4 """
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
5 generate a random dataset
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
6 """
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
7
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
8 import csv
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
9 import random
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
10 import sys
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
11 from .cli import DatasetGenerationParser
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
12
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
13
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
14 class RandomDataset(object):
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
15
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
16 def __init__(self, N, D, bounds=(0., 1.)):
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
17 assert len(bounds) == 2
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
18 self.N = N
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
19 self.D = D
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
20 self.bounds = bounds
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
21 self.length = bounds[-1] - bounds[0]
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
22
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
23 self.points = [self() for _ in xrange(self.N)]
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
24
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
25 def translate(self, fraction):
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
26 return self.length*fraction + self.bounds[0]
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
27
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
28 def __call__(self):
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
29 """generate one `D`-dimensional point of data"""
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
30
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
31 return [self.translate(random.random()) for i in xrange(self.D)]
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
32
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
33
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
34 def main(args=sys.argv[1:]):
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
35 """CLI"""
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
36
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
37 # parse command line
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
38 parser = DatasetGenerationParser(description=__doc__)
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
39 options = parser.parse_args(args)
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
40
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
41 # make some random points
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
42 points = RandomDataset(N=options.number, D=options.dimensions)
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
43
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
44 # output them
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
45 parser.writer().writerows(points.points)
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
46
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
47
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
48 if __name__ == '__main__':
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
49 main()