splitndgrid/splitndgrid.py at master · elliotsn/splitndgrid · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
#!/usr/bin/python
def usage():
    import sys
    print """
        ###########
        splitndgrid
        ###########

        Program to read an ASCII list of scattered points in nD space and bin
        them into files representing bins on an N-dimesional cartesian grid. A
        border may defined around each bin, where border width is constant for
        each dimension. Borders of adjacent bins may overlap so that points may
        be binned into more than one bin.

        Usage:

            splitndgrid INFILE OUTPATH NDIMS MINS MAXS BINSIZE BORDERSIZE

        Arguments:

            INFILE - Path to the ASCII source file containing the points. Each
                     record must be formatted: D1 D2...DN\\n  where e.g. D1 is
                     the position of the point in dimension 1. Note that fields
                     are whitespace separated and each record is terminated with
                     a newline character.

            OUTPATH - The output path. Output file names for each bin on the
                      grid are made by suffixing OUTPATH with a string made of
                      underscore-separated bin numbers (which start at 0).

            NDIMS   - The number of dimensions, the first n fields to
                      read from each line in INFILE that describe the location
                      of the point.

            MINS    - Comma separated string containing the minimum values of the
                      grid to bin onto for dimensions 1..n.

            MAXS    - Comma separated string containing the maximum values of the
                      grid to bin onto for dimensions 1..n.

            BINSIZE - Comma separated string containing the bin widths for each
                      dimension.

            BORDERSIZE - Comma separated string containing the border widths for
                         each dimension.

        Example:

            To bin a list of points on a lat lon grid where each record contains
            the fields:
                LAT LON ALTITUDE

            We only want to bin the records into 5x5 degree lat lon bins. The
            altitude field is propagated into the binned files. We assign no
            border in the longitude direction, but one of 0.02 degrees in the
            latitude direction.

            splitndgrid globalelev.txt ~/elevbins/bin_ 2 -90,0 90,360 5,5 0.02,0

            Note that points may be in more than one bin in the latitude
            direction, because the 0.02 degree borders of adjacent bins overlap
            in that dimension.

            This example will produce 2592 files from:
                    ~/elevbins/bin_01_01.txt
                to:
                    ~/elevbins/bin_36_72.txt

        Author: Elliot Sefton-Nash  (e.sefton-nash@uclmail.net)

        Changelog:
            2014-01-13 Original
    """
    sys.exit()


def warn(msg):
    import sys
    print >> sys.stderr, 'splitndgrid: WARNING // '+msg


def error(msg):
    import sys
    print >> sys.stderr, 'splitndgrid: ERROR // '+msg
    sys.exit()


def parseArgs(argv):
    argnames = ('INFILE','OUTPATH','NDIMS','MINS','MAXS','BINSIZE','BORDERSIZE')
    for i,a in enumerate(argv):
        if i == 2:
            try:
                exec(argnames[i]+'=int(a)')
            except:
                usage()

        elif i>2:
            # At this point nDims has been defined, if it doesn't equal the size
            # of mins, maxs, binSize and buffs then error.
            try:
                exec(argnames[i]+'=[float(s) for s in a.split(\',\')]')
            except:
                usage()
            exec('thisLen=len('+argnames[i]+')')
            if thisLen != NDIMS:
                error(argnames[i]+' must have NDIMS elements')

        elif i<2:
            exec(argnames[i]+'=a')

    return (INFILE,OUTPATH,NDIMS,MINS,MAXS,BINSIZE,BORDERSIZE)


def doBin(infilepath,outstem,nDims,mins,maxs,binSize,buffs):

    print infilepath,outstem,nDims,mins,maxs,binSize,buffs

    import numpy as np
    import itertools

    lowers,uppers = [],[]
    for id in range(nDims):
        # Vector of bin boundaries for each dimension, including buffers
        tmp=np.arange(mins[id],maxs[id],binSize[id])
        lowers.append(tmp-buffs[id])
        uppers.append(tmp+binSize[id]+buffs[id])

    # Make two lists containing filenames and  tuples of coordinates in the grid
    shapearg = ','.join( [ 'range('+str(len(lowers[i]))+')' for i in range(nDims) ] )
    fPath,gridCoords = [],[] # Lists of file paths and objects
    for thisBin in eval('itertools.product('+shapearg+')'):
        fPath.append(outstem+'_'.join([ str(thisBin[i]+1) for i in range(len(thisBin)) ])+'.txt')
        gridCoords.append(thisBin)

    openFileList,fObj = [],[]

    try:
        fin = open(infilepath,'r')
    except IOError:
        error('Unable to open '+infilepath)

    for line in fin:

        # Get numbers out of line
        vec = map(float, line.strip().split())[0:nDims]

        # For each dimension, which bins is it in?
        inBins=[]
        for id in range(nDims):
            inBins.append(np.where((vec[id] >= lowers[id]) & (vec[id] < uppers[id]))[0])

        # For every bin that the grid cell is in. Similar itertool trick to what we used
        # for file opening.
        argStr = ','.join( [ 'inBins['+str(i)+']' for i in range(nDims) ] )
        for thisBin in eval('itertools.product('+argStr+')'):

            # Returns file name for this record.
            thisfPath = fPath[gridCoords.index(thisBin)]

            # If the list file objects doesn't contain the path then file isn't
            # open. Open it.
            if not openFileList.__contains__(thisfPath):

                # Add file object to list
                openFileList.append(thisfPath)
                fObj.append( open(thisfPath,'w'))

            # Write the record to the appropriate file.
            fObj[openFileList.index(thisfPath)].write(line)

    # Close all open files
    fin.close()
    for thisfObj in fObj:
        thisfObj.close()


if __name__ == '__main__':

    import sys

    if len(sys.argv) != 8:
        usage()

    infilepath,outstem,nDims,mins,maxs,binSize,buffs = parseArgs(sys.argv[1:])
    doBin(infilepath,outstem,nDims,mins,maxs,binSize,buffs)