comparison lemuriformes/csv2sqlite.py @ 11:afc259799019

[CSV] add script to convert to SQL
author Jeff Hammel <k0scist@gmail.com>
date Sun, 10 Dec 2017 13:58:55 -0800
parents
children
comparison
equal deleted inserted replaced
10:ad1bf59eedb0 11:afc259799019
1 #!/usr/bin/env python
2
3 """
4 convert CSV files to a SQLite DB file
5 """
6
7 # For SQLite types see
8 # http://www.sqlite.org/datatype3.html
9
10 import argparse
11 import csv
12 import os
13 import sqlite3
14 import sys
15 from collections import OrderedDict
16 from .cast import string
17 from .sql import SQLConnection
18
19
20 def read_csv(csv_file):
21 """read csv file with header and return list of dicts"""
22
23 reader = csv.DictReader(csv_file)
24 return [row for row in reader]
25
26
27 def path_root(path):
28 """return basename file root sans extension"""
29
30 return os.path.splitext(os.path.basename(path))[0]
31
32
33 class SQLiteConnection(SQLConnection):
34 """connection class to SQLite database"""
35
36 # mapping of python types to SQLite types
37 types = {int: "INTEGER",
38 float: "REAL",
39 str: "TEXT"}
40
41 def __init__(self, db_file):
42 if not os.path.exists(db_file):
43 with open(db_file, 'wb') as f:
44 # touch file
45 pass
46 self.db_file = db_file
47 self.conn = sqlite3.connect(self.db_file)
48
49 def __call__(self, sql, *args):
50 c = self.conn.cursor()
51 c.execute(sql, args)
52 retval = c.fetchall()
53 self.conn.commit()
54 return retval
55
56 def __del__(self):
57 self.conn.close()
58
59 def tables(self):
60 """
61 return tables in the SQLite database
62 Ref: https://stackoverflow.com/questions/82875/how-to-list-the-tables-in-an-sqlite-database-file-that-was-opened-with-attach
63 """
64 sql = "SELECT name FROM sqlite_master WHERE type='table';"
65 results = self(sql)
66 return [row[0] for row in results]
67
68 def create_table(self, table, **columns):
69 """add a table to the database"""
70
71 columns = ', '.join(["{key} {value}".format(key=key, value=value)
72 for key, value in columns.items()])
73 self("CREATE TABLE IF NOT EXISTS {table} ({columns});".format(table=table,
74 columns=columns))
75
76 def schema(self, table):
77 """get dictionary of typing for a table"""
78 # http://www.sqlite.org/pragma.html#pragma_table_info
79
80 sql = "PRAGMA table_info('{}')".format(table)
81 results = self(sql)
82 # TODO: get data types from e.g.
83 # [(0, u'how', u'', 0, None, 0), (1, u'are', u'', 0, None, 0), (2, u'you', u'', 0, None, 0)]
84 # ... now
85 # [(0, u'how', u'TEXT', 0, None, 0), (1, u'you', u'TEXT', 0, None, 0), (2, u'are', u'TEXT', 0, None, 0)]
86 return OrderedDict([(result[1], result[2])
87 for result in results])
88
89 def columns(self, table):
90 """return ordered list of column names"""
91
92 return self.schema(table).keys()
93
94 def select(self, table):
95 """just `select *` for now"""
96
97 sql = "SELECT * FROM {table};"
98 sql = sql.format(table=table)
99 data = self(sql)
100 columns = self.columns(table)
101 return [OrderedDict(zip(columns, row))
102 for row in data]
103
104 select_all = select
105
106
107 class CSV2SQLite(object):
108 """
109 convert Comma Separated Value input to SQLite
110 """
111 # TODO: allow conversion to arbitrary SQL
112
113 def __init__(self, output):
114 self.conn = SQLiteConnection(output)
115
116 def __call__(self, *csv_files, **csv_dict):
117
118 # allow tables of default(ing) name
119 csv_dict.update({csv_file: None
120 for csv_file in csv_files})
121
122 for csv_file, tablename in csv_dict.items():
123 self.csv2table(csv_file, tablename=tablename)
124
125 def csv2table(self, csv_file, tablename=None):
126 if isinstance(csv_file, string):
127 with open(csv_file) as f:
128 return self.csv2table(f, tablename=tablename)
129 if tablename is None:
130 # TODO: allow lookup from scheme
131 tablename = path_root(csv_file.name)
132 # read csv
133 data = read_csv(csv_file)
134 assert data
135
136 # infer schema from data
137 # TODO: allow lookup from scheme
138 columns = {column: "TEXT" for column in data[0].keys()}
139
140 # create table
141 self.conn.create_table(tablename, **columns)
142
143 # inseert data
144 for row in data:
145 self.conn.insert_row(tablename, **row)
146
147
148 def main(args=sys.argv[1:]):
149 """CLI"""
150
151 # parse command line
152 parser = argparse.ArgumentParser(description=__doc__)
153 parser.add_argument('input', nargs='+',
154 type=argparse.FileType('r'),
155 help="input CSV files; table names taken from file names")
156 parser.add_argument('-o', '--output', dest='output',
157 required=True,
158 help="output SQLite file")
159 options = parser.parse_args(args)
160
161 # overwrite the file
162 # TODO: deprecate and allow appending
163 with open(options.output, 'w') as f:
164 pass
165
166 # instantiate converter
167 conn = CSV2SQLite(options.output)
168
169 # convert input CSV to SQLite tables
170 conn(*options.input)
171
172
173 if __name__ == '__main__':
174 main()