annotate tests/test_kmeans.py @ 87:9d5a5e9f5c3b

add kmeans + dataset
author Jeff Hammel <k0scist@gmail.com>
date Sun, 17 Dec 2017 14:05:57 -0800
parents
children 596dac7f3e98
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
87
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
1 #!/usr/bin/env python
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
2
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
3 """
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
4 tests K means algorithm
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
5 """
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
6
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
7 import unittest
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
8 from tvii import kmeans
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
9 from nettwerk.dataset.circle import CircularRandom
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
10
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
11
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
12 class TestKMeans(unittest.TestCase):
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
13
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
14 def test_dualing_gaussians(self):
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
15 """tests two gaussian distributions; first, cut overlap"""
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
16 # TODO
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
17
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
18 def test_circles(self):
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
19 """test with two circles of points"""
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
20
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
21 # generate two non-overlapping circles
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
22 n_points = 10000 # per circle
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
23 p1 = CircularRandom((-1.5, 0), 1)(n_points)
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
24 p2 = CircularRandom((1.5, 0), 1)(n_points)
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
25
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
26 # run kmeans
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
27 classes, centroids = kmeans.kmeans(p1+p2, 2)
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
28
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
29 # sanity
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
30 assert len(centroids) == 2
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
31 assert len(classes) == 2
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
32
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
33 # the centroids should have opposite x values
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
34 xprod = centroids[0][0] * centroids[1][0]
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
35 assert xprod < 0.
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
36 assert abs(xprod + 2.25) < 0.1
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
37
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
38 # assert we're kinda close
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
39 for c in centroids:
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
40 c = [abs(i) for i in c]
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
41 assert abs(c[0]-1.5) < 0.1
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
42 assert abs(c[1]) < 0.1
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
43
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
44 # its a pretty clean break; our points should be exact, most likely
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
45 if centroids[0][0] < 0.:
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
46 left = 0
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
47 right = 1
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
48 else:
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
49 left = 1
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
50 right = 0
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
51 assert sorted(p1) == sorted(classes[left])
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
52 assert sorted(p2) == sorted(classes[right])
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
53
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
54 def test_help(self):
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
55 """smoketest for CLI"""
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
56
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
57 try:
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
58 kmeans.main(['--help'])
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
59 except SystemExit:
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
60 # this is expected
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
61 pass
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
62
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
63
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
64 if __name__ == '__main__':
9d5a5e9f5c3b add kmeans + dataset
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
65 unittest.main()