root / ase / io / pupynere.py @ 11
Historique | Voir | Annoter | Télécharger (20,72 ko)
1 | 1 | tkerber | """
|
---|---|---|---|
2 | 1 | tkerber | NetCDF reader/writer module.
|
3 | 1 | tkerber |
|
4 | 1 | tkerber | This module implements the Scientific.IO.NetCDF API to read and create
|
5 | 1 | tkerber | NetCDF files. The same API is also used in the PyNIO and pynetcdf
|
6 | 1 | tkerber | modules, allowing these modules to be used interchangebly when working
|
7 | 1 | tkerber | with NetCDF files. The major advantage of ``scipy.io.netcdf`` over other
|
8 | 1 | tkerber | modules is that it doesn't require the code to be linked to the NetCDF
|
9 | 1 | tkerber | libraries as the other modules do.
|
10 | 1 | tkerber |
|
11 | 1 | tkerber | The code is based on the NetCDF file format specification
|
12 | 1 | tkerber | (http://www.unidata.ucar.edu/software/netcdf/guide_15.html). A NetCDF
|
13 | 1 | tkerber | file is a self-describing binary format, with a header followed by
|
14 | 1 | tkerber | data. The header contains metadata describing dimensions, variables
|
15 | 1 | tkerber | and the position of the data in the file, so access can be done in an
|
16 | 1 | tkerber | efficient manner without loading unnecessary data into memory. We use
|
17 | 1 | tkerber | the ``mmap`` module to create Numpy arrays mapped to the data on disk,
|
18 | 1 | tkerber | for the same purpose.
|
19 | 1 | tkerber |
|
20 | 1 | tkerber | The structure of a NetCDF file is as follows:
|
21 | 1 | tkerber |
|
22 | 1 | tkerber | C D F <VERSION BYTE> <NUMBER OF RECORDS>
|
23 | 1 | tkerber | <DIMENSIONS> <GLOBAL ATTRIBUTES> <VARIABLES METADATA>
|
24 | 1 | tkerber | <NON-RECORD DATA> <RECORD DATA>
|
25 | 1 | tkerber |
|
26 | 1 | tkerber | Record data refers to data where the first axis can be expanded at
|
27 | 1 | tkerber | will. All record variables share a same dimension at the first axis,
|
28 | 1 | tkerber | and they are stored at the end of the file per record, ie
|
29 | 1 | tkerber |
|
30 | 1 | tkerber | A[0], B[0], ..., A[1], B[1], ..., etc,
|
31 | 1 | tkerber |
|
32 | 1 | tkerber | so that new data can be appended to the file without changing its original
|
33 | 1 | tkerber | structure. Non-record data are padded to a 4n bytes boundary. Record data
|
34 | 1 | tkerber | are also padded, unless there is exactly one record variable in the file,
|
35 | 1 | tkerber | in which case the padding is dropped. All data is stored in big endian
|
36 | 1 | tkerber | byte order.
|
37 | 1 | tkerber |
|
38 | 1 | tkerber | The Scientific.IO.NetCDF API allows attributes to be added directly to
|
39 | 1 | tkerber | instances of ``netcdf_file`` and ``netcdf_variable``. To differentiate
|
40 | 1 | tkerber | between user-set attributes and instance attributes, user-set attributes
|
41 | 1 | tkerber | are automatically stored in the ``_attributes`` attribute by overloading
|
42 | 1 | tkerber | ``__setattr__``. This is the reason why the code sometimes uses
|
43 | 1 | tkerber | ``obj.__dict__['key'] = value``, instead of simply ``obj.key = value``;
|
44 | 1 | tkerber | otherwise the key would be inserted into userspace attributes.
|
45 | 1 | tkerber |
|
46 | 1 | tkerber | To create a NetCDF file::
|
47 | 1 | tkerber |
|
48 | 1 | tkerber | >>> import time
|
49 | 1 | tkerber | >>> f = netcdf_file('simple.nc', 'w')
|
50 | 1 | tkerber | >>> f.history = 'Created for a test'
|
51 | 1 | tkerber | >>> f.createDimension('time', 10)
|
52 | 1 | tkerber | >>> time = f.createVariable('time', 'i', ('time',))
|
53 | 1 | tkerber | >>> time[:] = range(10)
|
54 | 1 | tkerber | >>> time.units = 'days since 2008-01-01'
|
55 | 1 | tkerber | >>> f.close()
|
56 | 1 | tkerber |
|
57 | 1 | tkerber | To read the NetCDF file we just created::
|
58 | 1 | tkerber |
|
59 | 1 | tkerber | >>> f = netcdf_file('simple.nc', 'r')
|
60 | 1 | tkerber | >>> print f.history
|
61 | 1 | tkerber | Created for a test
|
62 | 1 | tkerber | >>> time = f.variables['time']
|
63 | 1 | tkerber | >>> print time.units
|
64 | 1 | tkerber | days since 2008-01-01
|
65 | 1 | tkerber | >>> print time.shape
|
66 | 1 | tkerber | (10,)
|
67 | 1 | tkerber | >>> print time[-1]
|
68 | 1 | tkerber | 9
|
69 | 1 | tkerber | >>> f.close()
|
70 | 1 | tkerber |
|
71 | 1 | tkerber | TODO: properly implement ``_FillValue``.
|
72 | 1 | tkerber | """
|
73 | 1 | tkerber | |
74 | 1 | tkerber | __all__ = ['netcdf_file', 'netcdf_variable'] |
75 | 1 | tkerber | |
76 | 1 | tkerber | |
77 | 1 | tkerber | from operator import mul |
78 | 1 | tkerber | from mmap import mmap, ACCESS_READ |
79 | 1 | tkerber | |
80 | 1 | tkerber | from numpy import fromstring, ndarray, dtype, empty, array, asarray |
81 | 1 | tkerber | from numpy import little_endian as LITTLE_ENDIAN |
82 | 1 | tkerber | |
83 | 1 | tkerber | |
84 | 1 | tkerber | ABSENT = '\x00\x00\x00\x00\x00\x00\x00\x00'
|
85 | 1 | tkerber | ZERO = '\x00\x00\x00\x00'
|
86 | 1 | tkerber | NC_BYTE = '\x00\x00\x00\x01'
|
87 | 1 | tkerber | NC_CHAR = '\x00\x00\x00\x02'
|
88 | 1 | tkerber | NC_SHORT = '\x00\x00\x00\x03'
|
89 | 1 | tkerber | NC_INT = '\x00\x00\x00\x04'
|
90 | 1 | tkerber | NC_FLOAT = '\x00\x00\x00\x05'
|
91 | 1 | tkerber | NC_DOUBLE = '\x00\x00\x00\x06'
|
92 | 1 | tkerber | NC_DIMENSION = '\x00\x00\x00\n'
|
93 | 1 | tkerber | NC_VARIABLE = '\x00\x00\x00\x0b'
|
94 | 1 | tkerber | NC_ATTRIBUTE = '\x00\x00\x00\x0c'
|
95 | 1 | tkerber | |
96 | 1 | tkerber | |
97 | 1 | tkerber | TYPEMAP = { NC_BYTE: ('b', 1), |
98 | 1 | tkerber | NC_CHAR: ('c', 1), |
99 | 1 | tkerber | NC_SHORT: ('h', 2), |
100 | 1 | tkerber | NC_INT: ('i', 4), |
101 | 1 | tkerber | NC_FLOAT: ('f', 4), |
102 | 1 | tkerber | NC_DOUBLE: ('d', 8) } |
103 | 1 | tkerber | |
104 | 1 | tkerber | REVERSE = { 'b': NC_BYTE,
|
105 | 1 | tkerber | 'c': NC_CHAR,
|
106 | 1 | tkerber | 'h': NC_SHORT,
|
107 | 1 | tkerber | 'i': NC_INT,
|
108 | 1 | tkerber | 'f': NC_FLOAT,
|
109 | 1 | tkerber | 'd': NC_DOUBLE,
|
110 | 1 | tkerber | |
111 | 1 | tkerber | # these come from asarray(1).dtype.char and asarray('foo').dtype.char,
|
112 | 1 | tkerber | # used when getting the types from generic attributes.
|
113 | 1 | tkerber | 'l': NC_INT,
|
114 | 1 | tkerber | 'S': NC_CHAR }
|
115 | 1 | tkerber | |
116 | 1 | tkerber | |
117 | 1 | tkerber | class netcdf_file(object): |
118 | 1 | tkerber | """
|
119 | 1 | tkerber | A ``netcdf_file`` object has two standard attributes: ``dimensions`` and
|
120 | 1 | tkerber | ``variables``. The values of both are dictionaries, mapping dimension
|
121 | 1 | tkerber | names to their associated lengths and variable names to variables,
|
122 | 1 | tkerber | respectively. Application programs should never modify these
|
123 | 1 | tkerber | dictionaries.
|
124 | 1 | tkerber |
|
125 | 1 | tkerber | All other attributes correspond to global attributes defined in the
|
126 | 1 | tkerber | NetCDF file. Global file attributes are created by assigning to an
|
127 | 1 | tkerber | attribute of the ``netcdf_file`` object.
|
128 | 1 | tkerber |
|
129 | 1 | tkerber | """
|
130 | 1 | tkerber | def __init__(self, filename, mode='r', mmap=True): |
131 | 1 | tkerber | if not __debug__: |
132 | 1 | tkerber | raise RuntimeError('Current version of pupynere does not ' + |
133 | 1 | tkerber | 'work with -O option. We need to update ' +
|
134 | 1 | tkerber | 'to version 1.0.7!')
|
135 | 1 | tkerber | |
136 | 1 | tkerber | self.filename = filename
|
137 | 1 | tkerber | self.use_mmap = mmap
|
138 | 1 | tkerber | |
139 | 1 | tkerber | assert mode in 'rw', "Mode must be either 'r' or 'w'." |
140 | 1 | tkerber | self.mode = mode
|
141 | 1 | tkerber | |
142 | 1 | tkerber | self.dimensions = {}
|
143 | 1 | tkerber | self.variables = {}
|
144 | 1 | tkerber | |
145 | 1 | tkerber | self._dims = []
|
146 | 1 | tkerber | self._recs = 0 |
147 | 1 | tkerber | self._recsize = 0 |
148 | 1 | tkerber | |
149 | 1 | tkerber | self.fp = open(self.filename, '%sb' % mode) |
150 | 1 | tkerber | |
151 | 1 | tkerber | self._attributes = {}
|
152 | 1 | tkerber | |
153 | 1 | tkerber | if mode is 'r': |
154 | 1 | tkerber | self._read()
|
155 | 1 | tkerber | |
156 | 1 | tkerber | def __setattr__(self, attr, value): |
157 | 1 | tkerber | # Store user defined attributes in a separate dict,
|
158 | 1 | tkerber | # so we can save them to file later.
|
159 | 1 | tkerber | try:
|
160 | 1 | tkerber | self._attributes[attr] = value
|
161 | 1 | tkerber | except AttributeError: |
162 | 1 | tkerber | pass
|
163 | 1 | tkerber | self.__dict__[attr] = value
|
164 | 1 | tkerber | |
165 | 1 | tkerber | def close(self): |
166 | 1 | tkerber | if not self.fp.closed: |
167 | 1 | tkerber | try:
|
168 | 1 | tkerber | self.flush()
|
169 | 1 | tkerber | finally:
|
170 | 1 | tkerber | self.fp.close()
|
171 | 1 | tkerber | __del__ = close |
172 | 1 | tkerber | |
173 | 1 | tkerber | def createDimension(self, name, length): |
174 | 1 | tkerber | self.dimensions[name] = length
|
175 | 1 | tkerber | self._dims.append(name)
|
176 | 1 | tkerber | |
177 | 1 | tkerber | def createVariable(self, name, type, dimensions): |
178 | 1 | tkerber | shape = tuple([self.dimensions[dim] for dim in dimensions]) |
179 | 1 | tkerber | shape_ = tuple([dim or 0 for dim in shape]) # replace None with 0 for numpy |
180 | 1 | tkerber | |
181 | 1 | tkerber | if isinstance(type, basestring): type = dtype(type) |
182 | 1 | tkerber | typecode, size = type.char, type.itemsize |
183 | 1 | tkerber | dtype_ = '>%s' % typecode
|
184 | 1 | tkerber | if size > 1: dtype_ += str(size) |
185 | 1 | tkerber | |
186 | 1 | tkerber | data = empty(shape_, dtype=dtype_) |
187 | 1 | tkerber | self.variables[name] = netcdf_variable(data, typecode, shape, dimensions)
|
188 | 1 | tkerber | return self.variables[name] |
189 | 1 | tkerber | |
190 | 1 | tkerber | def flush(self): |
191 | 1 | tkerber | if self.mode is 'w': |
192 | 1 | tkerber | self._write()
|
193 | 1 | tkerber | sync = flush |
194 | 1 | tkerber | |
195 | 1 | tkerber | def _write(self): |
196 | 1 | tkerber | self.fp.write('CDF') |
197 | 1 | tkerber | |
198 | 1 | tkerber | self.__dict__['version_byte'] = 1 |
199 | 1 | tkerber | self.fp.write(array(1, '>b').tostring()) |
200 | 1 | tkerber | |
201 | 1 | tkerber | # Write headers and data.
|
202 | 1 | tkerber | self._write_numrecs()
|
203 | 1 | tkerber | self._write_dim_array()
|
204 | 1 | tkerber | self._write_gatt_array()
|
205 | 1 | tkerber | self._write_var_array()
|
206 | 1 | tkerber | |
207 | 1 | tkerber | def _write_numrecs(self): |
208 | 1 | tkerber | # Get highest record count from all record variables.
|
209 | 1 | tkerber | for var in self.variables.values(): |
210 | 1 | tkerber | if var.isrec and len(var.data) > self._recs: |
211 | 1 | tkerber | self.__dict__['_recs'] = len(var.data) |
212 | 1 | tkerber | self._pack_int(self._recs) |
213 | 1 | tkerber | |
214 | 1 | tkerber | def _write_dim_array(self): |
215 | 1 | tkerber | if self.dimensions: |
216 | 1 | tkerber | self.fp.write(NC_DIMENSION)
|
217 | 1 | tkerber | self._pack_int(len(self.dimensions)) |
218 | 1 | tkerber | for name in self._dims: |
219 | 1 | tkerber | self._pack_string(name)
|
220 | 1 | tkerber | length = self.dimensions[name]
|
221 | 1 | tkerber | self._pack_int(length or 0) # replace None with 0 for record dimension |
222 | 1 | tkerber | else:
|
223 | 1 | tkerber | self.fp.write(ABSENT)
|
224 | 1 | tkerber | |
225 | 1 | tkerber | def _write_gatt_array(self): |
226 | 1 | tkerber | self._write_att_array(self._attributes) |
227 | 1 | tkerber | |
228 | 1 | tkerber | def _write_att_array(self, attributes): |
229 | 1 | tkerber | if attributes:
|
230 | 1 | tkerber | self.fp.write(NC_ATTRIBUTE)
|
231 | 1 | tkerber | self._pack_int(len(attributes)) |
232 | 1 | tkerber | for name, values in attributes.items(): |
233 | 1 | tkerber | self._pack_string(name)
|
234 | 1 | tkerber | self._write_values(values)
|
235 | 1 | tkerber | else:
|
236 | 1 | tkerber | self.fp.write(ABSENT)
|
237 | 1 | tkerber | |
238 | 1 | tkerber | def _write_var_array(self): |
239 | 1 | tkerber | if self.variables: |
240 | 1 | tkerber | self.fp.write(NC_VARIABLE)
|
241 | 1 | tkerber | self._pack_int(len(self.variables)) |
242 | 1 | tkerber | |
243 | 1 | tkerber | # Sort variables non-recs first, then recs.
|
244 | 1 | tkerber | variables = self.variables.items()
|
245 | 1 | tkerber | if True: # Backwards compatible with Python versions < 2.4 |
246 | 1 | tkerber | keys = [(v._shape and not v.isrec, k) for k, v in variables] |
247 | 1 | tkerber | keys.sort() |
248 | 1 | tkerber | keys.reverse() |
249 | 1 | tkerber | variables = [k for isrec, k in keys] |
250 | 1 | tkerber | else: # Python version must be >= 2.4 |
251 | 1 | tkerber | variables.sort(key=lambda (k, v): v._shape and not v.isrec) |
252 | 1 | tkerber | variables.reverse() |
253 | 1 | tkerber | variables = [k for (k, v) in variables] |
254 | 1 | tkerber | |
255 | 1 | tkerber | # Set the metadata for all variables.
|
256 | 1 | tkerber | for name in variables: |
257 | 1 | tkerber | self._write_var_metadata(name)
|
258 | 1 | tkerber | # Now that we have the metadata, we know the vsize of
|
259 | 1 | tkerber | # each record variable, so we can calculate recsize.
|
260 | 1 | tkerber | self.__dict__['_recsize'] = sum([ |
261 | 1 | tkerber | var._vsize for var in self.variables.values() |
262 | 1 | tkerber | if var.isrec])
|
263 | 1 | tkerber | # Set the data for all variables.
|
264 | 1 | tkerber | for name in variables: |
265 | 1 | tkerber | self._write_var_data(name)
|
266 | 1 | tkerber | else:
|
267 | 1 | tkerber | self.fp.write(ABSENT)
|
268 | 1 | tkerber | |
269 | 1 | tkerber | def _write_var_metadata(self, name): |
270 | 1 | tkerber | var = self.variables[name]
|
271 | 1 | tkerber | |
272 | 1 | tkerber | self._pack_string(name)
|
273 | 1 | tkerber | self._pack_int(len(var.dimensions)) |
274 | 1 | tkerber | for dimname in var.dimensions: |
275 | 1 | tkerber | dimid = self._dims.index(dimname)
|
276 | 1 | tkerber | self._pack_int(dimid)
|
277 | 1 | tkerber | |
278 | 1 | tkerber | self._write_att_array(var._attributes)
|
279 | 1 | tkerber | |
280 | 1 | tkerber | nc_type = REVERSE[var.typecode()] |
281 | 1 | tkerber | self.fp.write(nc_type)
|
282 | 1 | tkerber | |
283 | 1 | tkerber | if not var.isrec: |
284 | 1 | tkerber | vsize = var.data.size * var.data.itemsize |
285 | 1 | tkerber | vsize += -vsize % 4
|
286 | 1 | tkerber | else: # record variable |
287 | 1 | tkerber | try:
|
288 | 1 | tkerber | vsize = var.data[0].size * var.data.itemsize
|
289 | 1 | tkerber | except IndexError: |
290 | 1 | tkerber | vsize = 0
|
291 | 1 | tkerber | rec_vars = len([var for var in self.variables.values() |
292 | 1 | tkerber | if var.isrec])
|
293 | 1 | tkerber | if rec_vars > 1: |
294 | 1 | tkerber | vsize += -vsize % 4
|
295 | 1 | tkerber | self.variables[name].__dict__['_vsize'] = vsize |
296 | 1 | tkerber | self._pack_int(vsize)
|
297 | 1 | tkerber | |
298 | 1 | tkerber | # Pack a bogus begin, and set the real value later.
|
299 | 1 | tkerber | self.variables[name].__dict__['_begin'] = self.fp.tell() |
300 | 1 | tkerber | self._pack_begin(0) |
301 | 1 | tkerber | |
302 | 1 | tkerber | def _write_var_data(self, name): |
303 | 1 | tkerber | var = self.variables[name]
|
304 | 1 | tkerber | |
305 | 1 | tkerber | # Set begin in file header.
|
306 | 1 | tkerber | the_beguine = self.fp.tell()
|
307 | 1 | tkerber | self.fp.seek(var._begin)
|
308 | 1 | tkerber | self._pack_begin(the_beguine)
|
309 | 1 | tkerber | self.fp.seek(the_beguine)
|
310 | 1 | tkerber | |
311 | 1 | tkerber | # Write data.
|
312 | 1 | tkerber | if not var.isrec: |
313 | 1 | tkerber | self.fp.write(var.data.tostring())
|
314 | 1 | tkerber | count = var.data.size * var.data.itemsize |
315 | 1 | tkerber | self.fp.write('0' * (var._vsize - count)) |
316 | 1 | tkerber | else: # record variable |
317 | 1 | tkerber | # Handle rec vars with shape[0] < nrecs.
|
318 | 1 | tkerber | if self._recs > len(var.data): |
319 | 1 | tkerber | shape = (self._recs,) + var.data.shape[1:] |
320 | 1 | tkerber | var.data.resize(shape) |
321 | 1 | tkerber | |
322 | 1 | tkerber | pos0 = pos = self.fp.tell()
|
323 | 1 | tkerber | for rec in var.data: |
324 | 1 | tkerber | # Apparently scalars cannot be converted to big endian. If we
|
325 | 1 | tkerber | # try to convert a ``=i4`` scalar to, say, '>i4' the dtype
|
326 | 1 | tkerber | # will remain as ``=i4``.
|
327 | 1 | tkerber | if not rec.shape and (rec.dtype.byteorder == '<' or |
328 | 1 | tkerber | (rec.dtype.byteorder == '=' and LITTLE_ENDIAN)): |
329 | 1 | tkerber | rec = rec.byteswap() |
330 | 1 | tkerber | self.fp.write(rec.tostring())
|
331 | 1 | tkerber | # Padding
|
332 | 1 | tkerber | count = rec.size * rec.itemsize |
333 | 1 | tkerber | self.fp.write('0' * (var._vsize - count)) |
334 | 1 | tkerber | pos += self._recsize
|
335 | 1 | tkerber | self.fp.seek(pos)
|
336 | 1 | tkerber | self.fp.seek(pos0 + var._vsize)
|
337 | 1 | tkerber | |
338 | 1 | tkerber | def _write_values(self, values): |
339 | 1 | tkerber | values = asarray(values) |
340 | 1 | tkerber | values = values.astype(values.dtype.newbyteorder('>'))
|
341 | 1 | tkerber | |
342 | 1 | tkerber | nc_type = REVERSE[values.dtype.char] |
343 | 1 | tkerber | self.fp.write(nc_type)
|
344 | 1 | tkerber | |
345 | 1 | tkerber | if values.dtype.char == 'S': |
346 | 1 | tkerber | nelems = values.itemsize |
347 | 1 | tkerber | else:
|
348 | 1 | tkerber | nelems = values.size |
349 | 1 | tkerber | self._pack_int(nelems)
|
350 | 1 | tkerber | |
351 | 1 | tkerber | if not values.shape and (values.dtype.byteorder == '<' or |
352 | 1 | tkerber | (values.dtype.byteorder == '=' and LITTLE_ENDIAN)): |
353 | 1 | tkerber | values = values.byteswap() |
354 | 1 | tkerber | self.fp.write(values.tostring())
|
355 | 1 | tkerber | count = values.size * values.itemsize |
356 | 1 | tkerber | self.fp.write('0' * (-count % 4)) # pad |
357 | 1 | tkerber | |
358 | 1 | tkerber | def _read(self): |
359 | 1 | tkerber | # Check magic bytes and version
|
360 | 1 | tkerber | assert self.fp.read(3) == 'CDF', "Error: %s is not a valid NetCDF 3 file" % self.filename |
361 | 1 | tkerber | self.__dict__['version_byte'] = fromstring(self.fp.read(1), '>b')[0] |
362 | 1 | tkerber | |
363 | 1 | tkerber | # Read file headers and set data.
|
364 | 1 | tkerber | self._read_numrecs()
|
365 | 1 | tkerber | self._read_dim_array()
|
366 | 1 | tkerber | self._read_gatt_array()
|
367 | 1 | tkerber | self._read_var_array()
|
368 | 1 | tkerber | |
369 | 1 | tkerber | def _read_numrecs(self): |
370 | 1 | tkerber | self.__dict__['_recs'] = self._unpack_int() |
371 | 1 | tkerber | |
372 | 1 | tkerber | def _read_dim_array(self): |
373 | 1 | tkerber | assert self.fp.read(4) in [ZERO, NC_DIMENSION] |
374 | 1 | tkerber | count = self._unpack_int()
|
375 | 1 | tkerber | |
376 | 1 | tkerber | for dim in range(count): |
377 | 1 | tkerber | name = self._unpack_string()
|
378 | 1 | tkerber | length = self._unpack_int() or None # None for record dimension |
379 | 1 | tkerber | self.dimensions[name] = length
|
380 | 1 | tkerber | self._dims.append(name) # preserve order |
381 | 1 | tkerber | |
382 | 1 | tkerber | def _read_gatt_array(self): |
383 | 1 | tkerber | for k, v in self._read_att_array().items(): |
384 | 1 | tkerber | self.__setattr__(k, v)
|
385 | 1 | tkerber | |
386 | 1 | tkerber | def _read_att_array(self): |
387 | 1 | tkerber | assert self.fp.read(4) in [ZERO, NC_ATTRIBUTE] |
388 | 1 | tkerber | count = self._unpack_int()
|
389 | 1 | tkerber | |
390 | 1 | tkerber | attributes = {} |
391 | 1 | tkerber | for attr in range(count): |
392 | 1 | tkerber | name = self._unpack_string()
|
393 | 1 | tkerber | attributes[name] = self._read_values()
|
394 | 1 | tkerber | return attributes
|
395 | 1 | tkerber | |
396 | 1 | tkerber | def _read_var_array(self): |
397 | 1 | tkerber | assert self.fp.read(4) in [ZERO, NC_VARIABLE] |
398 | 1 | tkerber | |
399 | 1 | tkerber | begin = 0
|
400 | 1 | tkerber | dtypes = {'names': [], 'formats': []} |
401 | 1 | tkerber | rec_vars = [] |
402 | 1 | tkerber | count = self._unpack_int()
|
403 | 1 | tkerber | for var in range(count): |
404 | 1 | tkerber | name, dimensions, shape, attributes, typecode, size, dtype_, begin_, vsize = self._read_var()
|
405 | 1 | tkerber | if shape and shape[0] is None: |
406 | 1 | tkerber | rec_vars.append(name) |
407 | 1 | tkerber | self.__dict__['_recsize'] += vsize |
408 | 1 | tkerber | if begin == 0: begin = begin_ |
409 | 1 | tkerber | dtypes['names'].append(name)
|
410 | 1 | tkerber | dtypes['formats'].append(str(shape[1:]) + dtype_) |
411 | 1 | tkerber | |
412 | 1 | tkerber | # Handle padding with a virtual variable.
|
413 | 1 | tkerber | if typecode in 'bch': |
414 | 1 | tkerber | actual_size = reduce(mul, (1,) + shape[1:]) * size |
415 | 1 | tkerber | padding = -actual_size % 4
|
416 | 1 | tkerber | if padding:
|
417 | 1 | tkerber | dtypes['names'].append('_padding_%d' % var) |
418 | 1 | tkerber | dtypes['formats'].append('(%d,)>b' % padding) |
419 | 1 | tkerber | |
420 | 1 | tkerber | # Data will be set later.
|
421 | 1 | tkerber | data = None
|
422 | 1 | tkerber | else:
|
423 | 1 | tkerber | if self.use_mmap: |
424 | 1 | tkerber | mm = mmap(self.fp.fileno(), begin_+vsize, access=ACCESS_READ)
|
425 | 1 | tkerber | data = ndarray.__new__(ndarray, shape, dtype=dtype_, |
426 | 1 | tkerber | buffer=mm, offset=begin_, order=0)
|
427 | 1 | tkerber | else:
|
428 | 1 | tkerber | pos = self.fp.tell()
|
429 | 1 | tkerber | self.fp.seek(begin_)
|
430 | 1 | tkerber | data = fromstring(self.fp.read(vsize), dtype=dtype_)
|
431 | 1 | tkerber | data.shape = shape |
432 | 1 | tkerber | self.fp.seek(pos)
|
433 | 1 | tkerber | |
434 | 1 | tkerber | # Add variable.
|
435 | 1 | tkerber | self.variables[name] = netcdf_variable(
|
436 | 1 | tkerber | data, typecode, shape, dimensions, attributes) |
437 | 1 | tkerber | |
438 | 1 | tkerber | if rec_vars:
|
439 | 1 | tkerber | # Remove padding when only one record variable.
|
440 | 1 | tkerber | if len(rec_vars) == 1: |
441 | 1 | tkerber | dtypes['names'] = dtypes['names'][:1] |
442 | 1 | tkerber | dtypes['formats'] = dtypes['formats'][:1] |
443 | 1 | tkerber | |
444 | 1 | tkerber | # Build rec array.
|
445 | 1 | tkerber | if self.use_mmap: |
446 | 1 | tkerber | mm = mmap(self.fp.fileno(), begin+self._recs*self._recsize, access=ACCESS_READ) |
447 | 1 | tkerber | rec_array = ndarray.__new__(ndarray, (self._recs,), dtype=dtypes,
|
448 | 1 | tkerber | buffer=mm, offset=begin, order=0)
|
449 | 1 | tkerber | else:
|
450 | 1 | tkerber | pos = self.fp.tell()
|
451 | 1 | tkerber | self.fp.seek(begin)
|
452 | 1 | tkerber | rec_array = fromstring(self.fp.read(self._recs*self._recsize), dtype=dtypes) |
453 | 1 | tkerber | rec_array.shape = (self._recs,)
|
454 | 1 | tkerber | self.fp.seek(pos)
|
455 | 1 | tkerber | |
456 | 1 | tkerber | for var in rec_vars: |
457 | 1 | tkerber | self.variables[var].__dict__['data'] = rec_array[var] |
458 | 1 | tkerber | |
459 | 1 | tkerber | def _read_var(self): |
460 | 1 | tkerber | name = self._unpack_string()
|
461 | 1 | tkerber | dimensions = [] |
462 | 1 | tkerber | shape = [] |
463 | 1 | tkerber | dims = self._unpack_int()
|
464 | 1 | tkerber | |
465 | 1 | tkerber | for i in range(dims): |
466 | 1 | tkerber | dimid = self._unpack_int()
|
467 | 1 | tkerber | dimname = self._dims[dimid]
|
468 | 1 | tkerber | dimensions.append(dimname) |
469 | 1 | tkerber | dim = self.dimensions[dimname]
|
470 | 1 | tkerber | shape.append(dim) |
471 | 1 | tkerber | dimensions = tuple(dimensions)
|
472 | 1 | tkerber | shape = tuple(shape)
|
473 | 1 | tkerber | |
474 | 1 | tkerber | attributes = self._read_att_array()
|
475 | 1 | tkerber | nc_type = self.fp.read(4) |
476 | 1 | tkerber | vsize = self._unpack_int()
|
477 | 1 | tkerber | begin = [self._unpack_int, self._unpack_int64][self.version_byte-1]() |
478 | 1 | tkerber | |
479 | 1 | tkerber | typecode, size = TYPEMAP[nc_type] |
480 | 1 | tkerber | if typecode is 'c': |
481 | 1 | tkerber | dtype_ = '>c'
|
482 | 1 | tkerber | else:
|
483 | 1 | tkerber | dtype_ = '>%s' % typecode
|
484 | 1 | tkerber | if size > 1: dtype_ += str(size) |
485 | 1 | tkerber | |
486 | 1 | tkerber | return name, dimensions, shape, attributes, typecode, size, dtype_, begin, vsize
|
487 | 1 | tkerber | |
488 | 1 | tkerber | def _read_values(self): |
489 | 1 | tkerber | nc_type = self.fp.read(4) |
490 | 1 | tkerber | n = self._unpack_int()
|
491 | 1 | tkerber | |
492 | 1 | tkerber | typecode, size = TYPEMAP[nc_type] |
493 | 1 | tkerber | |
494 | 1 | tkerber | count = n*size |
495 | 1 | tkerber | values = self.fp.read(count)
|
496 | 1 | tkerber | self.fp.read(-count % 4) # read padding |
497 | 1 | tkerber | |
498 | 1 | tkerber | if typecode is not 'c': |
499 | 1 | tkerber | values = fromstring(values, dtype='>%s%d' % (typecode, size))
|
500 | 1 | tkerber | if values.shape == (1,): values = values[0] |
501 | 1 | tkerber | else:
|
502 | 1 | tkerber | values = values.rstrip('\x00')
|
503 | 1 | tkerber | return values
|
504 | 1 | tkerber | |
505 | 1 | tkerber | def _pack_begin(self, begin): |
506 | 1 | tkerber | if self.version_byte == 1: |
507 | 1 | tkerber | self._pack_int(begin)
|
508 | 1 | tkerber | elif self.version_byte == 2: |
509 | 1 | tkerber | self._pack_int64(begin)
|
510 | 1 | tkerber | |
511 | 1 | tkerber | def _pack_int(self, value): |
512 | 1 | tkerber | self.fp.write(array(value, '>i').tostring()) |
513 | 1 | tkerber | _pack_int32 = _pack_int |
514 | 1 | tkerber | |
515 | 1 | tkerber | def _unpack_int(self): |
516 | 1 | tkerber | return int(fromstring(self.fp.read(4), '>i')[0]) |
517 | 1 | tkerber | _unpack_int32 = _unpack_int |
518 | 1 | tkerber | |
519 | 1 | tkerber | def _pack_int64(self, value): |
520 | 1 | tkerber | self.fp.write(array(value, '>q').tostring()) |
521 | 1 | tkerber | |
522 | 1 | tkerber | def _unpack_int64(self): |
523 | 1 | tkerber | return int(fromstring(self.fp.read(8), '>q')[0]) |
524 | 1 | tkerber | |
525 | 1 | tkerber | def _pack_string(self, s): |
526 | 1 | tkerber | count = len(s)
|
527 | 1 | tkerber | self._pack_int(count)
|
528 | 1 | tkerber | self.fp.write(s)
|
529 | 1 | tkerber | self.fp.write('0' * (-count % 4)) # pad |
530 | 1 | tkerber | |
531 | 1 | tkerber | def _unpack_string(self): |
532 | 1 | tkerber | count = self._unpack_int()
|
533 | 1 | tkerber | s = self.fp.read(count).rstrip('\x00') |
534 | 1 | tkerber | self.fp.read(-count % 4) # read padding |
535 | 1 | tkerber | return s
|
536 | 1 | tkerber | |
537 | 1 | tkerber | |
538 | 1 | tkerber | class netcdf_variable(object): |
539 | 1 | tkerber | """
|
540 | 1 | tkerber | ``netcdf_variable`` objects are constructed by calling the method
|
541 | 1 | tkerber | ``createVariable`` on the netcdf_file object.
|
542 | 1 | tkerber |
|
543 | 1 | tkerber | ``netcdf_variable`` objects behave much like array objects defined in
|
544 | 1 | tkerber | Numpy, except that their data resides in a file. Data is read by
|
545 | 1 | tkerber | indexing and written by assigning to an indexed subset; the entire
|
546 | 1 | tkerber | array can be accessed by the index ``[:]`` or using the methods
|
547 | 1 | tkerber | ``getValue`` and ``assignValue``. ``netcdf_variable`` objects also
|
548 | 1 | tkerber | have attribute ``shape`` with the same meaning as for arrays, but
|
549 | 1 | tkerber | the shape cannot be modified. There is another read-only attribute
|
550 | 1 | tkerber | ``dimensions``, whose value is the tuple of dimension names.
|
551 | 1 | tkerber |
|
552 | 1 | tkerber | All other attributes correspond to variable attributes defined in
|
553 | 1 | tkerber | the NetCDF file. Variable attributes are created by assigning to an
|
554 | 1 | tkerber | attribute of the ``netcdf_variable`` object.
|
555 | 1 | tkerber |
|
556 | 1 | tkerber | """
|
557 | 1 | tkerber | def __init__(self, data, typecode, shape, dimensions, attributes=None): |
558 | 1 | tkerber | self.data = data
|
559 | 1 | tkerber | self._typecode = typecode
|
560 | 1 | tkerber | self._shape = shape
|
561 | 1 | tkerber | self.dimensions = dimensions
|
562 | 1 | tkerber | |
563 | 1 | tkerber | self._attributes = attributes or {} |
564 | 1 | tkerber | for k, v in self._attributes.items(): |
565 | 1 | tkerber | self.__dict__[k] = v
|
566 | 1 | tkerber | |
567 | 1 | tkerber | def __setattr__(self, attr, value): |
568 | 1 | tkerber | # Store user defined attributes in a separate dict,
|
569 | 1 | tkerber | # so we can save them to file later.
|
570 | 1 | tkerber | try:
|
571 | 1 | tkerber | self._attributes[attr] = value
|
572 | 1 | tkerber | except AttributeError: |
573 | 1 | tkerber | pass
|
574 | 1 | tkerber | self.__dict__[attr] = value
|
575 | 1 | tkerber | |
576 | 1 | tkerber | def isrec(self): |
577 | 1 | tkerber | return self.data.shape and not self._shape[0] |
578 | 1 | tkerber | isrec = property(isrec)
|
579 | 1 | tkerber | |
580 | 1 | tkerber | def shape(self): |
581 | 1 | tkerber | return self.data.shape |
582 | 1 | tkerber | shape = property(shape)
|
583 | 1 | tkerber | |
584 | 1 | tkerber | def getValue(self): |
585 | 1 | tkerber | return self.data.item() |
586 | 1 | tkerber | |
587 | 1 | tkerber | def assignValue(self, value): |
588 | 1 | tkerber | self.data.itemset(value)
|
589 | 1 | tkerber | |
590 | 1 | tkerber | def typecode(self): |
591 | 1 | tkerber | return self._typecode |
592 | 1 | tkerber | |
593 | 1 | tkerber | def __getitem__(self, index): |
594 | 1 | tkerber | return self.data[index] |
595 | 1 | tkerber | |
596 | 1 | tkerber | def __setitem__(self, index, data): |
597 | 1 | tkerber | # Expand data for record vars?
|
598 | 1 | tkerber | if self.isrec: |
599 | 1 | tkerber | if isinstance(index, tuple): |
600 | 1 | tkerber | rec_index = index[0]
|
601 | 1 | tkerber | else:
|
602 | 1 | tkerber | rec_index = index |
603 | 1 | tkerber | if isinstance(rec_index, slice): |
604 | 1 | tkerber | recs = (rec_index.start or 0) + len(data) |
605 | 1 | tkerber | else:
|
606 | 1 | tkerber | recs = rec_index + 1
|
607 | 1 | tkerber | if recs > len(self.data): |
608 | 1 | tkerber | shape = (recs,) + self._shape[1:] |
609 | 1 | tkerber | self.data.resize(shape)
|
610 | 1 | tkerber | self.data[index] = data
|
611 | 1 | tkerber | |
612 | 1 | tkerber | |
613 | 1 | tkerber | NetCDFFile = netcdf_file |
614 | 1 | tkerber | NetCDFVariable = netcdf_variable |