Mercurial > hg > GlobalNeighbors
view globalneighbors/schema.py @ 11:d1b99c695511
remove obselete data
author | Jeff Hammel <k0scist@gmail.com> |
---|---|
date | Sun, 25 Jun 2017 12:54:05 -0700 |
parents | 1b94f3bf97e5 |
children |
line wrap: on
line source
""" Schema for cities 1000 From http://download.geonames.org/export/dump/ """ from collections import OrderedDict # column descriptions descriptions = { 'geonameid' : "integer id of record in geonames database", 'name' : "name of geographical point (utf8) varchar(200)", 'asciiname' : "name of geographical point in plain ascii characters, varchar(200)", 'alternatenames' : "alternatenames, comma separated, ascii names automatically transliterated, convenience attribute from alternatename table, varchar(10000)", 'latitude' : "latitude in decimal degrees (wgs84)", 'longitude' : "longitude in decimal degrees (wgs84)", 'feature class' : "see http://www.geonames.org/export/codes.html, char(1)", 'feature code' : "see http://www.geonames.org/export/codes.html, varchar(10)", 'country code' : "ISO-3166 2-letter country code, 2 characters", 'cc2' : "alternate country codes, comma separated, ISO-3166 2-letter country code, 200 characters", 'admin1 code' : "fipscode (subject to change to iso code), see exceptions below, see file admin1Codes.txt for display names of this code; varchar(20)", 'admin2 code' : "code for the second administrative division, a county in the US, see file admin2Codes.txt; varchar(80)", 'admin3 code' : "code for third level administrative division, varchar(20)", 'admin4 code' : "code for fourth level administrative division, varchar(20)", 'population' : "bigint (8 byte int)", 'elevation' : "in meters, integer", 'dem' : "digital elevation model, srtm3 or gtopo30, average elevation of 3''x3'' (ca 90mx90m) or 30''x30'' (ca 900mx900m) area in meters, integer. srtm processed by cgiar/ciat.", 'timezone' : "the iana timezone id (see file timeZone.txt) varchar(40)", 'modification date' : "date of last modification in yyyy-MM-dd format" } # schema of python types to cast to types = OrderedDict([ ('geonameid', int), ('name', str), ('asciiname', str), ('alternatenames', str), ('latitude', float), ('longitude', float), ('feature class', str), ('feature code', str), ('country code', str), ('cc2', str), ('admin1 code', str), ('admin2 code', str), ('admin3 code', str), ('admin4 code', str), ('population', int), ('elevation', int), ('dem', int), ('timezone', str), ('modification date', str) ]) # Fields we care about; we'll discard the rest fields = ( 'geonameid', 'name', 'asciiname', 'latitude', 'longitude', 'country code', 'population', ) # Keys we care about name = 'asciiname' primary_key = 'geonameid' # fields that should be unicode unicode_fields = ('name', 'asciiname') def cast_row(row, types=types): """ cast an iterable `row` of data to a `dict` according to `types` casting rule """ if len(row) != len(types): raise AssertionError("Length of row {} != length of types {}: {}".format(len(row), len(types), row)) retval = {} for value, (key, _type) in zip(row, types.items()): try: retval[key] = _type(value) except ValueError as e: # Allow exceptions as elevation isn't always recorded # TODO make this configurable if not value and key in ('elevation',): retval[key] = None else: raise if key in unicode_fields: retval[key] = retval[key].decode('utf-8') return retval