-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsplitndgrid.py
More file actions
185 lines (135 loc) · 6.59 KB
/
splitndgrid.py
File metadata and controls
185 lines (135 loc) · 6.59 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
#!/usr/bin/python
def usage():
import sys
print """
###########
splitndgrid
###########
Program to read an ASCII list of scattered points in nD space and bin
them into files representing bins on an N-dimesional cartesian grid. A
border may defined around each bin, where border width is constant for
each dimension. Borders of adjacent bins may overlap so that points may
be binned into more than one bin.
Usage:
splitndgrid INFILE OUTPATH NDIMS MINS MAXS BINSIZE BORDERSIZE
Arguments:
INFILE - Path to the ASCII source file containing the points. Each
record must be formatted: D1 D2...DN\\n where e.g. D1 is
the position of the point in dimension 1. Note that fields
are whitespace separated and each record is terminated with
a newline character.
OUTPATH - The output path. Output file names for each bin on the
grid are made by suffixing OUTPATH with a string made of
underscore-separated bin numbers (which start at 0).
NDIMS - The number of dimensions, the first n fields to
read from each line in INFILE that describe the location
of the point.
MINS - Comma separated string containing the minimum values of the
grid to bin onto for dimensions 1..n.
MAXS - Comma separated string containing the maximum values of the
grid to bin onto for dimensions 1..n.
BINSIZE - Comma separated string containing the bin widths for each
dimension.
BORDERSIZE - Comma separated string containing the border widths for
each dimension.
Example:
To bin a list of points on a lat lon grid where each record contains
the fields:
LAT LON ALTITUDE
We only want to bin the records into 5x5 degree lat lon bins. The
altitude field is propagated into the binned files. We assign no
border in the longitude direction, but one of 0.02 degrees in the
latitude direction.
splitndgrid globalelev.txt ~/elevbins/bin_ 2 -90,0 90,360 5,5 0.02,0
Note that points may be in more than one bin in the latitude
direction, because the 0.02 degree borders of adjacent bins overlap
in that dimension.
This example will produce 2592 files from:
~/elevbins/bin_01_01.txt
to:
~/elevbins/bin_36_72.txt
Author: Elliot Sefton-Nash (e.sefton-nash@uclmail.net)
Changelog:
2014-01-13 Original
"""
sys.exit()
def warn(msg):
import sys
print >> sys.stderr, 'splitndgrid: WARNING // '+msg
def error(msg):
import sys
print >> sys.stderr, 'splitndgrid: ERROR // '+msg
sys.exit()
def parseArgs(argv):
argnames = ('INFILE','OUTPATH','NDIMS','MINS','MAXS','BINSIZE','BORDERSIZE')
for i,a in enumerate(argv):
if i == 2:
try:
exec(argnames[i]+'=int(a)')
except:
usage()
elif i>2:
# At this point nDims has been defined, if it doesn't equal the size
# of mins, maxs, binSize and buffs then error.
try:
exec(argnames[i]+'=[float(s) for s in a.split(\',\')]')
except:
usage()
exec('thisLen=len('+argnames[i]+')')
if thisLen != NDIMS:
error(argnames[i]+' must have NDIMS elements')
elif i<2:
exec(argnames[i]+'=a')
return (INFILE,OUTPATH,NDIMS,MINS,MAXS,BINSIZE,BORDERSIZE)
def doBin(infilepath,outstem,nDims,mins,maxs,binSize,buffs):
print infilepath,outstem,nDims,mins,maxs,binSize,buffs
import numpy as np
import itertools
lowers,uppers = [],[]
for id in range(nDims):
# Vector of bin boundaries for each dimension, including buffers
tmp=np.arange(mins[id],maxs[id],binSize[id])
lowers.append(tmp-buffs[id])
uppers.append(tmp+binSize[id]+buffs[id])
# Make two lists containing filenames and tuples of coordinates in the grid
shapearg = ','.join( [ 'range('+str(len(lowers[i]))+')' for i in range(nDims) ] )
fPath,gridCoords = [],[] # Lists of file paths and objects
for thisBin in eval('itertools.product('+shapearg+')'):
fPath.append(outstem+'_'.join([ str(thisBin[i]+1) for i in range(len(thisBin)) ])+'.txt')
gridCoords.append(thisBin)
openFileList,fObj = [],[]
try:
fin = open(infilepath,'r')
except IOError:
error('Unable to open '+infilepath)
for line in fin:
# Get numbers out of line
vec = map(float, line.strip().split())[0:nDims]
# For each dimension, which bins is it in?
inBins=[]
for id in range(nDims):
inBins.append(np.where((vec[id] >= lowers[id]) & (vec[id] < uppers[id]))[0])
# For every bin that the grid cell is in. Similar itertool trick to what we used
# for file opening.
argStr = ','.join( [ 'inBins['+str(i)+']' for i in range(nDims) ] )
for thisBin in eval('itertools.product('+argStr+')'):
# Returns file name for this record.
thisfPath = fPath[gridCoords.index(thisBin)]
# If the list file objects doesn't contain the path then file isn't
# open. Open it.
if not openFileList.__contains__(thisfPath):
# Add file object to list
openFileList.append(thisfPath)
fObj.append( open(thisfPath,'w'))
# Write the record to the appropriate file.
fObj[openFileList.index(thisfPath)].write(line)
# Close all open files
fin.close()
for thisfObj in fObj:
thisfObj.close()
if __name__ == '__main__':
import sys
if len(sys.argv) != 8:
usage()
infilepath,outstem,nDims,mins,maxs,binSize,buffs = parseArgs(sys.argv[1:])
doBin(infilepath,outstem,nDims,mins,maxs,binSize,buffs)