comparison globalneighbors/schema.py @ 0:5dba84370182

initial commit; half-working prototype
author Jeff Hammel <k0scist@gmail.com>
date Sat, 24 Jun 2017 12:03:39 -0700
parents
children 1b94f3bf97e5
comparison
equal deleted inserted replaced
-1:000000000000 0:5dba84370182
1 """
2 Schema for cities 1000
3 From http://download.geonames.org/export/dump/
4 """
5
6 from collections import OrderedDict
7
8
9 # column descriptions
10 descriptions = {
11 'geonameid' : "integer id of record in geonames database",
12 'name' : "name of geographical point (utf8) varchar(200)",
13 'asciiname' : "name of geographical point in plain ascii characters, varchar(200)",
14 'alternatenames' : "alternatenames, comma separated, ascii names automatically transliterated, convenience attribute from alternatename table, varchar(10000)",
15 'latitude' : "latitude in decimal degrees (wgs84)",
16 'longitude' : "longitude in decimal degrees (wgs84)",
17 'feature class' : "see http://www.geonames.org/export/codes.html, char(1)",
18 'feature code' : "see http://www.geonames.org/export/codes.html, varchar(10)",
19 'country code' : "ISO-3166 2-letter country code, 2 characters",
20 'cc2' : "alternate country codes, comma separated, ISO-3166 2-letter country code, 200 characters",
21 'admin1 code' : "fipscode (subject to change to iso code), see exceptions below, see file admin1Codes.txt for display names of this code; varchar(20)",
22 'admin2 code' : "code for the second administrative division, a county in the US, see file admin2Codes.txt; varchar(80)",
23 'admin3 code' : "code for third level administrative division, varchar(20)",
24 'admin4 code' : "code for fourth level administrative division, varchar(20)",
25 'population' : "bigint (8 byte int)",
26 'elevation' : "in meters, integer",
27 'dem' : "digital elevation model, srtm3 or gtopo30, average elevation of 3''x3'' (ca 90mx90m) or 30''x30'' (ca 900mx900m) area in meters, integer. srtm processed by cgiar/ciat.",
28 'timezone' : "the iana timezone id (see file timeZone.txt) varchar(40)",
29 'modification date' : "date of last modification in yyyy-MM-dd format"
30 }
31
32
33 # schema of python types to cast to
34 types = OrderedDict([
35 ('geonameid', int),
36 ('name', str),
37 ('asciiname', str),
38 ('alternatenames', str),
39 ('latitude', float),
40 ('longitude', float),
41 ('feature class', str),
42 ('feature code', str),
43 ('country code', str),
44 ('cc2', str),
45 ('admin1 code', str),
46 ('admin2 code', str),
47 ('admin3 code', str),
48 ('admin4 code', str),
49 ('population', int),
50 ('elevation', int),
51 ('dem', int),
52 ('timezone', str),
53 ('modification date', str)
54 ])
55
56
57 # Fields we care about; we'll discard the rest
58 fields = (
59 'geonameid',
60 'name',
61 'asciiname',
62 'latitude',
63 'longitude',
64 'country code',
65 'population',
66 )
67
68 # Keys we care about
69 name = 'asciiname'
70 primary_key = 'geonameid'
71
72
73 def cast_row(row, types=types):
74 """
75 cast an iterable `row` of data to
76 a `dict` according to `types` casting rule
77 """
78
79 if len(row) != len(types):
80 raise AssertionError("Length of row {} != length of types {}: {}".format(len(row), len(types), row))
81
82 retval = {}
83 for value, (key, _type) in zip(row, types.items()):
84 try:
85 retval[key] = _type(value)
86 except ValueError as e:
87 # Allow exceptions as elevation isn't always recorded
88 # TODO make this configurable
89 if not value and key in ('elevation',):
90 retval[key] = None
91 else:
92 raise
93 return retval