Project

General

Profile

catalog.py

Lucas Beaufore, 06/07/2013 07:57 AM

 
1
#  A simple module to handle DES IQ catalog files
2
#
3
#       downsample   - downsamples a numpy array by a given factor
4
#       getdata      -  read an IQ catalog file and return a python
5
#                      dictionary using the catalog column names as keys
6
#       read_csv     - reads in a csv file, e.g. from a telemetry DB query
7
#                      optionally writes out the data in iq catalog format
8
#       convert_date - converts timestamp strings to datetime objects
9
#       reduce_catalog - reduce catalog using a numpy.where selection
10
#       write_catalog - write a catalog dictionary to disk (header comments are not included)
11
#       dict2db       - convert catalog dictionary to SQLite DB
12

    
13
import numpy as np
14
import pylab
15
import csv
16
import datetime
17
import sqlite3 as lite
18
import numbers
19

    
20
def downsample(myarr,factor):
21
    """
22
    Downsample a 2D array by averaging over *factor* pixels in each axis.
23
    Crops upper edge if the shape is not a multiple of factor.
24

25
    This code is pure np and should be fast.
26
    """
27
    xs,ys = myarr.shape
28
    crarr = myarr[:xs-(xs % int(factor)),:ys-(ys % int(factor))]
29
    dsarr = np.concatenate([[crarr[i::factor,j::factor] 
30
        for i in range(factor)] 
31
        for j in range(factor)]).mean(axis=0)
32
    return dsarr
33

    
34
def getdata(catalog_file):
35
    """
36

37
    Creates a dictionary of data from an IQ catalog. Catalog_file is the name
38
    of the file to be converted. The keys of the python dictionary represent columns, 
39
    and each corresponds to an array of values, each element representing a row of data.
40

41
    """
42
    headers = []
43
    cat = open(catalog_file,'r')
44
    c = cat.readlines()
45
    a=list(c)
46
    catalog = {}
47
    for item in a:
48
        if item.startswith("#"):
49
            i = item.lower().replace(' ','')
50
            if i.startswith("#"):
51
                h = item.strip("#").rstrip(',').strip().split(',')
52
                for i in xrange(len(h)):
53
                    headers.append(h[i].strip())
54
                    catalog[h[i].strip()] = []
55
            c.remove(item)
56
    print '# of columns: ', len(headers)
57
    for item in c:
58
        elements = item.strip().rstrip(',').split(',')
59
        if len(elements) != len(headers):
60
            print 'Error: # elements %d, # of columns %d (%s)' % (len(elements), len(headers), str(elements[0]))
61
        for i in xrange(len(elements)):
62
            if type(elements[i]) is str:
63
                elements[i] = elements[i].strip()
64
            try:
65
                f = float(elements[i])
66
                elements[i] = f
67
            except:
68

    
69
                if str(elements[i]).lower().strip() == 'true':
70
                    elements[i] = True
71
                elif str(elements[i]).lower().strip() == 'false':
72
                    elements[i] = False
73
                elif str(elements[i]).lower().strip() == 'none':
74
                    elements[i] = None
75
            catalog[headers[i]].append(elements[i])
76
    for item in catalog:
77
        catalog[item] = np.array(catalog[item])
78
    return catalog
79

    
80
# convert dictionary to SQLite DB
81
# inputs:  dictionary, filename
82
def dict2db(dictionary, filename):
83
    # string for table creation
84
    cat = dict(dictionary)
85
    keys = cat.keys()
86
    for key in keys:
87
        if '-' in key:
88
            oldkey = key
89
            key = key.replace('-','_')
90
            cat[key] = cat[oldkey]
91
            del cat[oldkey]
92
    keys = cat.keys()
93
    table = ''
94
    for key in keys:
95
        if key=='expid':
96
            table += "expid INTEGER PRIMARY KEY,"
97
        elif type(cat[key][0]) is bool:
98
            table +='%s INT,' % key
99
        elif isinstance(cat[key][0], numbers.Integral):
100
            table +='%s INT,' % key
101
        elif isinstance(cat[key][0], numbers.Real):
102
            table +='%s FLOAT,' % key
103
        else:
104
            table +='%s TEXT,' % key
105
    table = table.rstrip(',')
106
    try:
107
        con = lite.connect(filename)
108
        cur = con.cursor()
109
        cur.execute("DROP TABLE IF EXISTS Catalog")
110
        create = "CREATE TABLE Catalog(%s)" % table
111
        cur.execute(create)
112
        q = "INSERT INTO Catalog VALUES (%s)" % ','.join('? ' for x in keys)
113
        for rec in xrange(len(cat[keys[0]])):
114
            values = []
115
            for key in keys:
116
                v = cat[key][rec]
117
                if str(v).upper() == 'TRUE':
118
                    v = 1
119
                elif str(v).upper() == 'FALSE':
120
                    v = 0
121
                values.append(v)
122
            cur.execute(q,values)
123
            con.commit()
124
    except lite.Error, e:
125
        if con:
126
            con.rollback()
127
        print 'Error %s:' % e.args[0]
128
        return
129

    
130

    
131
# write catalog file to disk
132
# if header is given, write only these columns in this order
133
# otherwise the entire catalog dictionary is dumped
134
# a file name is required
135
def write_catalog(cat, filename, header= None):
136
    if type(header) is not list:
137
        header = cat.keys()
138
    f=open(filename,'w')
139
    f.write('#'+','.join(str(x).strip() for x in header)+'\n')
140
    for rec in xrange(len(cat[header[0]])):
141
        out = []
142
        for key in header:
143
            out.append(str(cat[key][rec]).strip())
144
        f.write(','.join(str(x).strip() for x in out)+'\n')
145
    f.close()
146

    
147
# read csv file
148
def read_csv(csvfile, return_header = False, catalog_file = None):
149
    # if catalog_file is not None, the data will be written to a file
150
    # with this name.
151
    c=open(csvfile,'r')
152
    reader = csv.reader(c)
153
    data = []
154
    for i in reader:
155
        data.append(i)
156
    header = list(data[0])
157
    del data[0]
158
    catalog = {}
159
    for i in header:
160
        if i == '':
161
            catalog['empty'] = []
162
        else:
163
            catalog[i] = []
164

    
165
    # fill dictionary
166
    for row in data:
167
        for i in xrange(len(header)):
168
            key = header[i]
169
            if key == '':
170
                catalog['empty'].append(str(row[i]))
171
            else:
172
                try:
173
                    v = float(row[i])
174
                    v = round(v,6)
175
                    if float(v).is_integer():
176
                        v = int(v)
177
                except:
178
                    if '[' in row[i]:
179
                        x = row[i].strip().strip('[]').split(',')[0].strip()
180
                        try:
181
                            v = float(x)
182
                            v = round(v,6)
183
                            if float(v).is_integer():
184
                                v = int(v)
185
                        except:
186
                            v = x                            
187
                    else:
188
                        v = row[i].strip().strip("+00:00")
189
                if str(v).upper() == 'NONE' or str(v) == '':
190
                    v = -99999
191
                catalog[key].append(v)
192
    if header[-1] == '':
193
        del header[-1]
194
    # Write catalog if requested
195
    if catalog_file != None:
196
        out_file = open(catalog_file,'w')
197
        header_string = ', '.join(str(x) for x in header)
198
        out_file.write('#%s\n' % header_string)
199
        for i in xrange(len(catalog[header[0]])):
200
            iq = []
201
            for key in header:
202
                iq.append(catalog[key][i])
203
            iq_string = repr(iq).strip('[]').replace("'","")
204
            print iq_string
205
            out_file.write(iq_string + '\n')
206
        out_file.close()
207
    # convert to np array
208
    for key in catalog.keys():
209
        catalog[key] = np.array(catalog[key])
210
    c.close()
211
    if return_header:
212
        return catalog, header
213
    else:
214
        return catalog
215

    
216
# convert date-obs from string to datetime object
217
# Input: array with time stamp strings
218
def convert_date(d):
219
    l = []
220
    for i in xrange(len(d)):
221
        if 'T' in d[i]:
222
            l.append(datetime.datetime.strptime(d[i].strip(),'%Y-%m-%dT%H:%M:%S.%f'))
223
        else:
224
            l.append(datetime.datetime.strptime(d[i].strip(),'%Y-%m-%d %H:%M:%S.%f'))
225
    return np.array(l)
226

    
227
# reduce catalog (using a numpy.where selection)
228
def reduce_catalog(old, sel):
229
    new = {}
230
    for key in old.keys():
231
        new[key]=old[key][sel]
232
    return new
233

    
234
def getsexcat(catalog_file):
235
    cat = open(catalog_file,'r')
236
    c = cat.readlines()
237
    a=list(c)
238
    catalog = {}
239
    headers = []
240
    for item in a:
241
        if item.startswith("#"):
242
            i = item.split()[2].strip()
243
            headers.append(i)
244
            c.remove(item)
245
    for i in headers:
246
        catalog[i.strip()] = []
247

    
248
    for item in c:
249
        elements = item.strip().rstrip(',').split()
250
            for i in xrange(len(elements)):
251
            if type(elements[i]) is str:
252
                elements[i] = elements[i].strip()
253
            try:
254
                f = float(elements[i])
255
                elements[i] = f
256
            except:
257
                if str(elements[i]).lower().strip() == 'true':
258
                    elements[i] = True
259
                elif str(elements[i]).lower().strip() == 'false':
260
                    elements[i] = False
261
                elif str(elements[i]).lower().strip() == 'none':
262
                    elements[i] = None
263
            catalog[headers[i]].append(elements[i])
264
    for item in catalog:
265
        catalog[item] = np.array(catalog[item])
266
    return catalog
267

    
268
class Catalog(object):
269
    def __init__(self,filename):
270
        self.dbcon=lite.connect(filename)
271
        self.dbcur = self.dbcon.cursor()
272
        self.cuts = {}
273

    
274
    def cut(self,id=None,condition=None):
275
        if id is None:
276
            return self.cuts
277
        if condition is None:
278
            return self.cuts[id]
279
        self.cuts[id]=condition
280
        return 'SUCCESS'
281

    
282
    def select(self,v,cuts=[]):
283
        if len(cuts) == 0:
284
            where = ''
285
        else:
286
            where = ' WHERE %s' % ' AND '.join(str(self.cuts[x]) for x in cuts)
287
        if type(v) is not list:
288
            v = [v]
289
        results = {}
290
        for c in v:
291
            what = 'SELECT %s FROM Catalog' % str(c)
292
            self.dbcur.execute(what+where)
293
            r = self.dbcur.fetchall()
294
            results[c]=[x[0] for x in r] 
295
        return results