Mercurial > hg > GlobalNeighbors
comparison globalneighbors/schema.py @ 0:5dba84370182
initial commit; half-working prototype
author | Jeff Hammel <k0scist@gmail.com> |
---|---|
date | Sat, 24 Jun 2017 12:03:39 -0700 |
parents | |
children | 1b94f3bf97e5 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:5dba84370182 |
---|---|
1 """ | |
2 Schema for cities 1000 | |
3 From http://download.geonames.org/export/dump/ | |
4 """ | |
5 | |
6 from collections import OrderedDict | |
7 | |
8 | |
9 # column descriptions | |
10 descriptions = { | |
11 'geonameid' : "integer id of record in geonames database", | |
12 'name' : "name of geographical point (utf8) varchar(200)", | |
13 'asciiname' : "name of geographical point in plain ascii characters, varchar(200)", | |
14 'alternatenames' : "alternatenames, comma separated, ascii names automatically transliterated, convenience attribute from alternatename table, varchar(10000)", | |
15 'latitude' : "latitude in decimal degrees (wgs84)", | |
16 'longitude' : "longitude in decimal degrees (wgs84)", | |
17 'feature class' : "see http://www.geonames.org/export/codes.html, char(1)", | |
18 'feature code' : "see http://www.geonames.org/export/codes.html, varchar(10)", | |
19 'country code' : "ISO-3166 2-letter country code, 2 characters", | |
20 'cc2' : "alternate country codes, comma separated, ISO-3166 2-letter country code, 200 characters", | |
21 'admin1 code' : "fipscode (subject to change to iso code), see exceptions below, see file admin1Codes.txt for display names of this code; varchar(20)", | |
22 'admin2 code' : "code for the second administrative division, a county in the US, see file admin2Codes.txt; varchar(80)", | |
23 'admin3 code' : "code for third level administrative division, varchar(20)", | |
24 'admin4 code' : "code for fourth level administrative division, varchar(20)", | |
25 'population' : "bigint (8 byte int)", | |
26 'elevation' : "in meters, integer", | |
27 'dem' : "digital elevation model, srtm3 or gtopo30, average elevation of 3''x3'' (ca 90mx90m) or 30''x30'' (ca 900mx900m) area in meters, integer. srtm processed by cgiar/ciat.", | |
28 'timezone' : "the iana timezone id (see file timeZone.txt) varchar(40)", | |
29 'modification date' : "date of last modification in yyyy-MM-dd format" | |
30 } | |
31 | |
32 | |
33 # schema of python types to cast to | |
34 types = OrderedDict([ | |
35 ('geonameid', int), | |
36 ('name', str), | |
37 ('asciiname', str), | |
38 ('alternatenames', str), | |
39 ('latitude', float), | |
40 ('longitude', float), | |
41 ('feature class', str), | |
42 ('feature code', str), | |
43 ('country code', str), | |
44 ('cc2', str), | |
45 ('admin1 code', str), | |
46 ('admin2 code', str), | |
47 ('admin3 code', str), | |
48 ('admin4 code', str), | |
49 ('population', int), | |
50 ('elevation', int), | |
51 ('dem', int), | |
52 ('timezone', str), | |
53 ('modification date', str) | |
54 ]) | |
55 | |
56 | |
57 # Fields we care about; we'll discard the rest | |
58 fields = ( | |
59 'geonameid', | |
60 'name', | |
61 'asciiname', | |
62 'latitude', | |
63 'longitude', | |
64 'country code', | |
65 'population', | |
66 ) | |
67 | |
68 # Keys we care about | |
69 name = 'asciiname' | |
70 primary_key = 'geonameid' | |
71 | |
72 | |
73 def cast_row(row, types=types): | |
74 """ | |
75 cast an iterable `row` of data to | |
76 a `dict` according to `types` casting rule | |
77 """ | |
78 | |
79 if len(row) != len(types): | |
80 raise AssertionError("Length of row {} != length of types {}: {}".format(len(row), len(types), row)) | |
81 | |
82 retval = {} | |
83 for value, (key, _type) in zip(row, types.items()): | |
84 try: | |
85 retval[key] = _type(value) | |
86 except ValueError as e: | |
87 # Allow exceptions as elevation isn't always recorded | |
88 # TODO make this configurable | |
89 if not value and key in ('elevation',): | |
90 retval[key] = None | |
91 else: | |
92 raise | |
93 return retval |