Project

General

Profile

catalog.py

catalog utilities (python) - Klaus Honscheid, 04/26/2013 05:15 PM

 
1
#  A simple module to handle DES IQ catalog files
2
#
3
#       downsample   - downsamples a numpy array by a given factor
4
#       getdata      -  read an IQ catalog file and return a python
5
#                      dictionary using the catalog column names as keys
6
#       read_csv     - reads in a csv file, e.g. from a telemetry DB query
7
#                      optionally writes out the data in iq catalog format
8
#       convert_date - converts timestamp strings to datetime objects
9
#       reduce_catalog - reduce catalog using a numpy.where selection
10
#       write_catalog - write a catalog to disk (header comments are not included)
11
#       cat2db       - convert catalog dictionary to SQLite DB
12

    
13
import numpy as np
14
import pylab
15
import csv
16
import datetime
17
import sqlite3 as lite
18
import numbers
19

    
20
def downsample(myarr,factor):
21
    """
22
    Downsample a 2D array by averaging over *factor* pixels in each axis.
23
    Crops upper edge if the shape is not a multiple of factor.
24

25
    This code is pure np and should be fast.
26
    """
27
    xs,ys = myarr.shape
28
    crarr = myarr[:xs-(xs % int(factor)),:ys-(ys % int(factor))]
29
    dsarr = np.concatenate([[crarr[i::factor,j::factor] 
30
        for i in range(factor)] 
31
        for j in range(factor)]).mean(axis=0)
32
    return dsarr
33

    
34
def getdata(catalog_file, headers = []):
35
    cat = open(catalog_file,'r')
36
    c = cat.readlines()
37
    a=list(c)
38
    catalog = {}
39
    for item in a:
40
        if item.startswith("#"):
41
            i = item.lower().replace(' ','')
42
            if i.startswith("#expid"):
43
                h = item.strip("#").rstrip(',').strip().split(',')
44
                for i in xrange(len(h)):
45
                    headers.append(h[i].strip())
46
                    catalog[h[i].strip()] = []
47
            c.remove(item)
48
    print '# of columns: ', len(headers)
49
    for item in c:
50
        elements = item.strip().rstrip(',').split(',')
51
        if len(elements) != len(headers):
52
            print 'Error: # elements %d, # of columns %d (%s)' % (len(elements), len(headers), str(elements[0]))
53
        for i in xrange(len(elements)):
54
            if type(elements[i]) is str:
55
                elements[i] = elements[i].strip()
56
            try:
57
                f = float(elements[i])
58
                elements[i] = f
59
            except:
60

    
61
                if str(elements[i]).lower().strip() == 'true':
62
                    elements[i] = True
63
                elif str(elements[i]).lower().strip() == 'false':
64
                    elements[i] = False
65
                elif str(elements[i]).lower().strip() == 'none':
66
                    elements[i] = None
67
            catalog[headers[i]].append(elements[i])
68
    for item in catalog:
69
        catalog[item] = np.array(catalog[item])
70
    return catalog
71

    
72
# convert catalog to SQLite DB
73
# inputs:  catalog, filename
74
def cat2db(c, filename):
75
    # string for table creation
76
    cat = dict(c)
77
    keys = cat.keys()
78
    for key in keys:
79
        if '-' in key:
80
            oldkey = key
81
            key = key.replace('-','_')
82
            cat[key] = cat[oldkey]
83
            del cat[oldkey]
84
    keys = cat.keys()
85
    table = ''
86
    for key in keys:
87
        if key=='expid':
88
            table += "expid INTEGER PRIMARY KEY,"
89
        elif type(cat[key][0]) is bool:
90
            table +='%s INT,' % key
91
        elif isinstance(cat[key][0], numbers.Integral):
92
            table +='%s INT,' % key
93
        elif isinstance(cat[key][0], numbers.Real):
94
            table +='%s FLOAT,' % key
95
        else:
96
            table +='%s TEXT,' % key
97
    table = table.rstrip(',')
98
    try:
99
        con = lite.connect(filename)
100
        cur = con.cursor()
101
        cur.execute("DROP TABLE IF EXISTS Catalog")
102
        create = "CREATE TABLE Catalog(%s)" % table
103
        cur.execute(create)
104
        q = "INSERT INTO Catalog VALUES (%s)" % ','.join('? ' for x in keys)
105
        for rec in xrange(len(cat[keys[0]])):
106
            values = []
107
            for key in keys:
108
                v = cat[key][rec]
109
                if str(v).upper() == 'TRUE':
110
                    v = 1
111
                elif str(v).upper() == 'FALSE':
112
                    v = 0
113
                values.append(v)
114
            cur.execute(q,values)
115
            con.commit()
116
    except lite.Error, e:
117
        if con:
118
            con.rollback()
119
        print 'Error %s:' % e.args[0]
120
        return
121

    
122

    
123
# write catalog file to disk
124
# if header is given, write only these columns in this order
125
# otherwise the entire catalog dictionary is dumped
126
# a file name is required
127
def write_catalog(cat, filename, header= None):
128
    if type(header) is not list:
129
        header = cat.keys()
130
    f=open(filename,'w')
131
    f.write('#'+','.join(str(x).strip() for x in header)+'\n')
132
    for rec in xrange(len(cat[header[0]])):
133
        out = []
134
        for key in header:
135
            out.append(str(cat[key][rec]).strip())
136
        f.write(','.join(str(x).strip() for x in out)+'\n')
137
    f.close()
138

    
139
# read csv file
140
def read_csv(csvfile, return_header = False, catalog_file = None):
141
    # if catalog_file is not None, the data will be written to a file
142
    # with this name.
143
    c=open(csvfile,'r')
144
    reader = csv.reader(c)
145
    data = []
146
    for i in reader:
147
        data.append(i)
148
    header = list(data[0])
149
    del data[0]
150
    catalog = {}
151
    for i in header:
152
        if i == '':
153
            catalog['empty'] = []
154
        else:
155
            catalog[i] = []
156

    
157
    # fill dictionary
158
    for row in data:
159
        for i in xrange(len(header)):
160
            key = header[i]
161
            if key == '':
162
                catalog['empty'].append(str(row[i]))
163
            else:
164
                try:
165
                    v = float(row[i])
166
                    v = round(v,6)
167
                    if float(v).is_integer():
168
                        v = int(v)
169
                except:
170
                    if '[' in row[i]:
171
                        x = row[i].strip().strip('[]').split(',')[0].strip()
172
                        try:
173
                            v = float(x)
174
                            v = round(v,6)
175
                            if float(v).is_integer():
176
                                v = int(v)
177
                        except:
178
                            v = x                            
179
                    else:
180
                        v = row[i].strip().strip("+00:00")
181
                if str(v).upper() == 'NONE' or str(v) == '':
182
                    v = -99999
183
                catalog[key].append(v)
184
    if header[-1] == '':
185
        del header[-1]
186
    # Write catalog if requested
187
    if catalog_file != None:
188
        out_file = open(catalog_file,'w')
189
        header_string = ', '.join(str(x) for x in header)
190
        out_file.write('#%s\n' % header_string)
191
        for i in xrange(len(catalog[header[0]])):
192
            iq = []
193
            for key in header:
194
                iq.append(catalog[key][i])
195
            iq_string = repr(iq).strip('[]').replace("'","")
196
            print iq_string
197
            out_file.write(iq_string + '\n')
198
        out_file.close()
199
    # convert to np array
200
    for key in catalog.keys():
201
        catalog[key] = np.array(catalog[key])
202
    c.close()
203
    if return_header:
204
        return catalog, header
205
    else:
206
        return catalog
207

    
208
# convert date-obs from string to datetime object
209
# Input: array with time stamp strings
210
def convert_date(d):
211
    l = []
212
    for i in xrange(len(d)):
213
        if 'T' in d[i]:
214
            l.append(datetime.datetime.strptime(d[i].strip(),'%Y-%m-%dT%H:%M:%S.%f'))
215
        else:
216
            l.append(datetime.datetime.strptime(d[i].strip(),'%Y-%m-%d %H:%M:%S.%f'))
217
    return np.array(l)
218

    
219
# reduce catalog (using a numpy.where selection)
220
def reduce_catalog(old, sel):
221
    new = {}
222
    for key in old.keys():
223
        new[key]=old[key][sel]
224
    return new
225

    
226
def getsexcat(catalog_file):
227
    cat = open(catalog_file,'r')
228
    c = cat.readlines()
229
    a=list(c)
230
    catalog = {}
231
    headers = []
232
    for item in a:
233
        if item.startswith("#"):
234
            i = item.split()[2].strip()
235
            headers.append(i)
236
            c.remove(item)
237
    for i in headers:
238
        catalog[i.strip()] = []
239

    
240
    for item in c:
241
        elements = item.strip().rstrip(',').split()
242
        for i in xrange(len(elements)):
243
            if type(elements[i]) is str:
244
                elements[i] = elements[i].strip()
245
            try:
246
                f = float(elements[i])
247
                elements[i] = f
248
            except:
249
                if str(elements[i]).lower().strip() == 'true':
250
                    elements[i] = True
251
                elif str(elements[i]).lower().strip() == 'false':
252
                    elements[i] = False
253
                elif str(elements[i]).lower().strip() == 'none':
254
                    elements[i] = None
255
            catalog[headers[i]].append(elements[i])
256
    for item in catalog:
257
        catalog[item] = np.array(catalog[item])
258
    return catalog
259

    
260
class Catalog(object):
261
    def __init__(self,filename):
262
        self.dbcon=lite.connect(filename)
263
        self.dbcur = self.dbcon.cursor()
264
        self.cuts = {}
265

    
266
    def cut(self,id=None,condition=None):
267
        if id is None:
268
            return self.cuts
269
        if condition is None:
270
            return self.cuts[id]
271
        self.cuts[id]=condition
272
        return 'SUCCESS'
273

    
274
    def select(self,v,cuts=[]):
275
        if len(cuts) == 0:
276
            where = ''
277
        else:
278
            where = ' WHERE %s' % ' AND '.join(str(self.cuts[x]) for x in cuts)
279
        if type(v) is not list:
280
            v = [v]
281
        results = {}
282
        for c in v:
283
            what = 'SELECT %s FROM Catalog' % str(c)
284
            self.dbcur.execute(what+where)
285
            r = self.dbcur.fetchall()
286
            results[c]=[x[0] for x in r] 
287
        return results