root / ase / io / pupynere.py @ 14
Historique | Voir | Annoter | Télécharger (20,72 ko)
| 1 | 1 | tkerber | """
|
|---|---|---|---|
| 2 | 1 | tkerber | NetCDF reader/writer module.
|
| 3 | 1 | tkerber |
|
| 4 | 1 | tkerber | This module implements the Scientific.IO.NetCDF API to read and create
|
| 5 | 1 | tkerber | NetCDF files. The same API is also used in the PyNIO and pynetcdf
|
| 6 | 1 | tkerber | modules, allowing these modules to be used interchangebly when working
|
| 7 | 1 | tkerber | with NetCDF files. The major advantage of ``scipy.io.netcdf`` over other
|
| 8 | 1 | tkerber | modules is that it doesn't require the code to be linked to the NetCDF
|
| 9 | 1 | tkerber | libraries as the other modules do.
|
| 10 | 1 | tkerber |
|
| 11 | 1 | tkerber | The code is based on the NetCDF file format specification
|
| 12 | 1 | tkerber | (http://www.unidata.ucar.edu/software/netcdf/guide_15.html). A NetCDF
|
| 13 | 1 | tkerber | file is a self-describing binary format, with a header followed by
|
| 14 | 1 | tkerber | data. The header contains metadata describing dimensions, variables
|
| 15 | 1 | tkerber | and the position of the data in the file, so access can be done in an
|
| 16 | 1 | tkerber | efficient manner without loading unnecessary data into memory. We use
|
| 17 | 1 | tkerber | the ``mmap`` module to create Numpy arrays mapped to the data on disk,
|
| 18 | 1 | tkerber | for the same purpose.
|
| 19 | 1 | tkerber |
|
| 20 | 1 | tkerber | The structure of a NetCDF file is as follows:
|
| 21 | 1 | tkerber |
|
| 22 | 1 | tkerber | C D F <VERSION BYTE> <NUMBER OF RECORDS>
|
| 23 | 1 | tkerber | <DIMENSIONS> <GLOBAL ATTRIBUTES> <VARIABLES METADATA>
|
| 24 | 1 | tkerber | <NON-RECORD DATA> <RECORD DATA>
|
| 25 | 1 | tkerber |
|
| 26 | 1 | tkerber | Record data refers to data where the first axis can be expanded at
|
| 27 | 1 | tkerber | will. All record variables share a same dimension at the first axis,
|
| 28 | 1 | tkerber | and they are stored at the end of the file per record, ie
|
| 29 | 1 | tkerber |
|
| 30 | 1 | tkerber | A[0], B[0], ..., A[1], B[1], ..., etc,
|
| 31 | 1 | tkerber |
|
| 32 | 1 | tkerber | so that new data can be appended to the file without changing its original
|
| 33 | 1 | tkerber | structure. Non-record data are padded to a 4n bytes boundary. Record data
|
| 34 | 1 | tkerber | are also padded, unless there is exactly one record variable in the file,
|
| 35 | 1 | tkerber | in which case the padding is dropped. All data is stored in big endian
|
| 36 | 1 | tkerber | byte order.
|
| 37 | 1 | tkerber |
|
| 38 | 1 | tkerber | The Scientific.IO.NetCDF API allows attributes to be added directly to
|
| 39 | 1 | tkerber | instances of ``netcdf_file`` and ``netcdf_variable``. To differentiate
|
| 40 | 1 | tkerber | between user-set attributes and instance attributes, user-set attributes
|
| 41 | 1 | tkerber | are automatically stored in the ``_attributes`` attribute by overloading
|
| 42 | 1 | tkerber | ``__setattr__``. This is the reason why the code sometimes uses
|
| 43 | 1 | tkerber | ``obj.__dict__['key'] = value``, instead of simply ``obj.key = value``;
|
| 44 | 1 | tkerber | otherwise the key would be inserted into userspace attributes.
|
| 45 | 1 | tkerber |
|
| 46 | 1 | tkerber | To create a NetCDF file::
|
| 47 | 1 | tkerber |
|
| 48 | 1 | tkerber | >>> import time
|
| 49 | 1 | tkerber | >>> f = netcdf_file('simple.nc', 'w')
|
| 50 | 1 | tkerber | >>> f.history = 'Created for a test'
|
| 51 | 1 | tkerber | >>> f.createDimension('time', 10)
|
| 52 | 1 | tkerber | >>> time = f.createVariable('time', 'i', ('time',))
|
| 53 | 1 | tkerber | >>> time[:] = range(10)
|
| 54 | 1 | tkerber | >>> time.units = 'days since 2008-01-01'
|
| 55 | 1 | tkerber | >>> f.close()
|
| 56 | 1 | tkerber |
|
| 57 | 1 | tkerber | To read the NetCDF file we just created::
|
| 58 | 1 | tkerber |
|
| 59 | 1 | tkerber | >>> f = netcdf_file('simple.nc', 'r')
|
| 60 | 1 | tkerber | >>> print f.history
|
| 61 | 1 | tkerber | Created for a test
|
| 62 | 1 | tkerber | >>> time = f.variables['time']
|
| 63 | 1 | tkerber | >>> print time.units
|
| 64 | 1 | tkerber | days since 2008-01-01
|
| 65 | 1 | tkerber | >>> print time.shape
|
| 66 | 1 | tkerber | (10,)
|
| 67 | 1 | tkerber | >>> print time[-1]
|
| 68 | 1 | tkerber | 9
|
| 69 | 1 | tkerber | >>> f.close()
|
| 70 | 1 | tkerber |
|
| 71 | 1 | tkerber | TODO: properly implement ``_FillValue``.
|
| 72 | 1 | tkerber | """
|
| 73 | 1 | tkerber | |
| 74 | 1 | tkerber | __all__ = ['netcdf_file', 'netcdf_variable'] |
| 75 | 1 | tkerber | |
| 76 | 1 | tkerber | |
| 77 | 1 | tkerber | from operator import mul |
| 78 | 1 | tkerber | from mmap import mmap, ACCESS_READ |
| 79 | 1 | tkerber | |
| 80 | 1 | tkerber | from numpy import fromstring, ndarray, dtype, empty, array, asarray |
| 81 | 1 | tkerber | from numpy import little_endian as LITTLE_ENDIAN |
| 82 | 1 | tkerber | |
| 83 | 1 | tkerber | |
| 84 | 1 | tkerber | ABSENT = '\x00\x00\x00\x00\x00\x00\x00\x00'
|
| 85 | 1 | tkerber | ZERO = '\x00\x00\x00\x00'
|
| 86 | 1 | tkerber | NC_BYTE = '\x00\x00\x00\x01'
|
| 87 | 1 | tkerber | NC_CHAR = '\x00\x00\x00\x02'
|
| 88 | 1 | tkerber | NC_SHORT = '\x00\x00\x00\x03'
|
| 89 | 1 | tkerber | NC_INT = '\x00\x00\x00\x04'
|
| 90 | 1 | tkerber | NC_FLOAT = '\x00\x00\x00\x05'
|
| 91 | 1 | tkerber | NC_DOUBLE = '\x00\x00\x00\x06'
|
| 92 | 1 | tkerber | NC_DIMENSION = '\x00\x00\x00\n'
|
| 93 | 1 | tkerber | NC_VARIABLE = '\x00\x00\x00\x0b'
|
| 94 | 1 | tkerber | NC_ATTRIBUTE = '\x00\x00\x00\x0c'
|
| 95 | 1 | tkerber | |
| 96 | 1 | tkerber | |
| 97 | 1 | tkerber | TYPEMAP = { NC_BYTE: ('b', 1),
|
| 98 | 1 | tkerber | NC_CHAR: ('c', 1), |
| 99 | 1 | tkerber | NC_SHORT: ('h', 2), |
| 100 | 1 | tkerber | NC_INT: ('i', 4), |
| 101 | 1 | tkerber | NC_FLOAT: ('f', 4), |
| 102 | 1 | tkerber | NC_DOUBLE: ('d', 8) } |
| 103 | 1 | tkerber | |
| 104 | 1 | tkerber | REVERSE = { 'b': NC_BYTE,
|
| 105 | 1 | tkerber | 'c': NC_CHAR,
|
| 106 | 1 | tkerber | 'h': NC_SHORT,
|
| 107 | 1 | tkerber | 'i': NC_INT,
|
| 108 | 1 | tkerber | 'f': NC_FLOAT,
|
| 109 | 1 | tkerber | 'd': NC_DOUBLE,
|
| 110 | 1 | tkerber | |
| 111 | 1 | tkerber | # these come from asarray(1).dtype.char and asarray('foo').dtype.char,
|
| 112 | 1 | tkerber | # used when getting the types from generic attributes.
|
| 113 | 1 | tkerber | 'l': NC_INT,
|
| 114 | 1 | tkerber | 'S': NC_CHAR }
|
| 115 | 1 | tkerber | |
| 116 | 1 | tkerber | |
| 117 | 1 | tkerber | class netcdf_file(object): |
| 118 | 1 | tkerber | """
|
| 119 | 1 | tkerber | A ``netcdf_file`` object has two standard attributes: ``dimensions`` and
|
| 120 | 1 | tkerber | ``variables``. The values of both are dictionaries, mapping dimension
|
| 121 | 1 | tkerber | names to their associated lengths and variable names to variables,
|
| 122 | 1 | tkerber | respectively. Application programs should never modify these
|
| 123 | 1 | tkerber | dictionaries.
|
| 124 | 1 | tkerber |
|
| 125 | 1 | tkerber | All other attributes correspond to global attributes defined in the
|
| 126 | 1 | tkerber | NetCDF file. Global file attributes are created by assigning to an
|
| 127 | 1 | tkerber | attribute of the ``netcdf_file`` object.
|
| 128 | 1 | tkerber |
|
| 129 | 1 | tkerber | """
|
| 130 | 1 | tkerber | def __init__(self, filename, mode='r', mmap=True): |
| 131 | 1 | tkerber | if not __debug__: |
| 132 | 1 | tkerber | raise RuntimeError('Current version of pupynere does not ' + |
| 133 | 1 | tkerber | 'work with -O option. We need to update ' +
|
| 134 | 1 | tkerber | 'to version 1.0.7!')
|
| 135 | 1 | tkerber | |
| 136 | 1 | tkerber | self.filename = filename
|
| 137 | 1 | tkerber | self.use_mmap = mmap
|
| 138 | 1 | tkerber | |
| 139 | 1 | tkerber | assert mode in 'rw', "Mode must be either 'r' or 'w'." |
| 140 | 1 | tkerber | self.mode = mode
|
| 141 | 1 | tkerber | |
| 142 | 1 | tkerber | self.dimensions = {}
|
| 143 | 1 | tkerber | self.variables = {}
|
| 144 | 1 | tkerber | |
| 145 | 1 | tkerber | self._dims = []
|
| 146 | 1 | tkerber | self._recs = 0 |
| 147 | 1 | tkerber | self._recsize = 0 |
| 148 | 1 | tkerber | |
| 149 | 1 | tkerber | self.fp = open(self.filename, '%sb' % mode) |
| 150 | 1 | tkerber | |
| 151 | 1 | tkerber | self._attributes = {}
|
| 152 | 1 | tkerber | |
| 153 | 1 | tkerber | if mode is 'r': |
| 154 | 1 | tkerber | self._read()
|
| 155 | 1 | tkerber | |
| 156 | 1 | tkerber | def __setattr__(self, attr, value): |
| 157 | 1 | tkerber | # Store user defined attributes in a separate dict,
|
| 158 | 1 | tkerber | # so we can save them to file later.
|
| 159 | 1 | tkerber | try:
|
| 160 | 1 | tkerber | self._attributes[attr] = value
|
| 161 | 1 | tkerber | except AttributeError: |
| 162 | 1 | tkerber | pass
|
| 163 | 1 | tkerber | self.__dict__[attr] = value
|
| 164 | 1 | tkerber | |
| 165 | 1 | tkerber | def close(self): |
| 166 | 1 | tkerber | if not self.fp.closed: |
| 167 | 1 | tkerber | try:
|
| 168 | 1 | tkerber | self.flush()
|
| 169 | 1 | tkerber | finally:
|
| 170 | 1 | tkerber | self.fp.close()
|
| 171 | 1 | tkerber | __del__ = close |
| 172 | 1 | tkerber | |
| 173 | 1 | tkerber | def createDimension(self, name, length): |
| 174 | 1 | tkerber | self.dimensions[name] = length
|
| 175 | 1 | tkerber | self._dims.append(name)
|
| 176 | 1 | tkerber | |
| 177 | 1 | tkerber | def createVariable(self, name, type, dimensions): |
| 178 | 1 | tkerber | shape = tuple([self.dimensions[dim] for dim in dimensions]) |
| 179 | 1 | tkerber | shape_ = tuple([dim or 0 for dim in shape]) # replace None with 0 for numpy |
| 180 | 1 | tkerber | |
| 181 | 1 | tkerber | if isinstance(type, basestring): type = dtype(type) |
| 182 | 1 | tkerber | typecode, size = type.char, type.itemsize |
| 183 | 1 | tkerber | dtype_ = '>%s' % typecode
|
| 184 | 1 | tkerber | if size > 1: dtype_ += str(size) |
| 185 | 1 | tkerber | |
| 186 | 1 | tkerber | data = empty(shape_, dtype=dtype_) |
| 187 | 1 | tkerber | self.variables[name] = netcdf_variable(data, typecode, shape, dimensions)
|
| 188 | 1 | tkerber | return self.variables[name] |
| 189 | 1 | tkerber | |
| 190 | 1 | tkerber | def flush(self): |
| 191 | 1 | tkerber | if self.mode is 'w': |
| 192 | 1 | tkerber | self._write()
|
| 193 | 1 | tkerber | sync = flush |
| 194 | 1 | tkerber | |
| 195 | 1 | tkerber | def _write(self): |
| 196 | 1 | tkerber | self.fp.write('CDF') |
| 197 | 1 | tkerber | |
| 198 | 1 | tkerber | self.__dict__['version_byte'] = 1 |
| 199 | 1 | tkerber | self.fp.write(array(1, '>b').tostring()) |
| 200 | 1 | tkerber | |
| 201 | 1 | tkerber | # Write headers and data.
|
| 202 | 1 | tkerber | self._write_numrecs()
|
| 203 | 1 | tkerber | self._write_dim_array()
|
| 204 | 1 | tkerber | self._write_gatt_array()
|
| 205 | 1 | tkerber | self._write_var_array()
|
| 206 | 1 | tkerber | |
| 207 | 1 | tkerber | def _write_numrecs(self): |
| 208 | 1 | tkerber | # Get highest record count from all record variables.
|
| 209 | 1 | tkerber | for var in self.variables.values(): |
| 210 | 1 | tkerber | if var.isrec and len(var.data) > self._recs: |
| 211 | 1 | tkerber | self.__dict__['_recs'] = len(var.data) |
| 212 | 1 | tkerber | self._pack_int(self._recs) |
| 213 | 1 | tkerber | |
| 214 | 1 | tkerber | def _write_dim_array(self): |
| 215 | 1 | tkerber | if self.dimensions: |
| 216 | 1 | tkerber | self.fp.write(NC_DIMENSION)
|
| 217 | 1 | tkerber | self._pack_int(len(self.dimensions)) |
| 218 | 1 | tkerber | for name in self._dims: |
| 219 | 1 | tkerber | self._pack_string(name)
|
| 220 | 1 | tkerber | length = self.dimensions[name]
|
| 221 | 1 | tkerber | self._pack_int(length or 0) # replace None with 0 for record dimension |
| 222 | 1 | tkerber | else:
|
| 223 | 1 | tkerber | self.fp.write(ABSENT)
|
| 224 | 1 | tkerber | |
| 225 | 1 | tkerber | def _write_gatt_array(self): |
| 226 | 1 | tkerber | self._write_att_array(self._attributes) |
| 227 | 1 | tkerber | |
| 228 | 1 | tkerber | def _write_att_array(self, attributes): |
| 229 | 1 | tkerber | if attributes:
|
| 230 | 1 | tkerber | self.fp.write(NC_ATTRIBUTE)
|
| 231 | 1 | tkerber | self._pack_int(len(attributes)) |
| 232 | 1 | tkerber | for name, values in attributes.items(): |
| 233 | 1 | tkerber | self._pack_string(name)
|
| 234 | 1 | tkerber | self._write_values(values)
|
| 235 | 1 | tkerber | else:
|
| 236 | 1 | tkerber | self.fp.write(ABSENT)
|
| 237 | 1 | tkerber | |
| 238 | 1 | tkerber | def _write_var_array(self): |
| 239 | 1 | tkerber | if self.variables: |
| 240 | 1 | tkerber | self.fp.write(NC_VARIABLE)
|
| 241 | 1 | tkerber | self._pack_int(len(self.variables)) |
| 242 | 1 | tkerber | |
| 243 | 1 | tkerber | # Sort variables non-recs first, then recs.
|
| 244 | 1 | tkerber | variables = self.variables.items()
|
| 245 | 1 | tkerber | if True: # Backwards compatible with Python versions < 2.4 |
| 246 | 1 | tkerber | keys = [(v._shape and not v.isrec, k) for k, v in variables] |
| 247 | 1 | tkerber | keys.sort() |
| 248 | 1 | tkerber | keys.reverse() |
| 249 | 1 | tkerber | variables = [k for isrec, k in keys] |
| 250 | 1 | tkerber | else: # Python version must be >= 2.4 |
| 251 | 1 | tkerber | variables.sort(key=lambda (k, v): v._shape and not v.isrec) |
| 252 | 1 | tkerber | variables.reverse() |
| 253 | 1 | tkerber | variables = [k for (k, v) in variables] |
| 254 | 1 | tkerber | |
| 255 | 1 | tkerber | # Set the metadata for all variables.
|
| 256 | 1 | tkerber | for name in variables: |
| 257 | 1 | tkerber | self._write_var_metadata(name)
|
| 258 | 1 | tkerber | # Now that we have the metadata, we know the vsize of
|
| 259 | 1 | tkerber | # each record variable, so we can calculate recsize.
|
| 260 | 1 | tkerber | self.__dict__['_recsize'] = sum([ |
| 261 | 1 | tkerber | var._vsize for var in self.variables.values() |
| 262 | 1 | tkerber | if var.isrec])
|
| 263 | 1 | tkerber | # Set the data for all variables.
|
| 264 | 1 | tkerber | for name in variables: |
| 265 | 1 | tkerber | self._write_var_data(name)
|
| 266 | 1 | tkerber | else:
|
| 267 | 1 | tkerber | self.fp.write(ABSENT)
|
| 268 | 1 | tkerber | |
| 269 | 1 | tkerber | def _write_var_metadata(self, name): |
| 270 | 1 | tkerber | var = self.variables[name]
|
| 271 | 1 | tkerber | |
| 272 | 1 | tkerber | self._pack_string(name)
|
| 273 | 1 | tkerber | self._pack_int(len(var.dimensions)) |
| 274 | 1 | tkerber | for dimname in var.dimensions: |
| 275 | 1 | tkerber | dimid = self._dims.index(dimname)
|
| 276 | 1 | tkerber | self._pack_int(dimid)
|
| 277 | 1 | tkerber | |
| 278 | 1 | tkerber | self._write_att_array(var._attributes)
|
| 279 | 1 | tkerber | |
| 280 | 1 | tkerber | nc_type = REVERSE[var.typecode()] |
| 281 | 1 | tkerber | self.fp.write(nc_type)
|
| 282 | 1 | tkerber | |
| 283 | 1 | tkerber | if not var.isrec: |
| 284 | 1 | tkerber | vsize = var.data.size * var.data.itemsize |
| 285 | 1 | tkerber | vsize += -vsize % 4
|
| 286 | 1 | tkerber | else: # record variable |
| 287 | 1 | tkerber | try:
|
| 288 | 1 | tkerber | vsize = var.data[0].size * var.data.itemsize
|
| 289 | 1 | tkerber | except IndexError: |
| 290 | 1 | tkerber | vsize = 0
|
| 291 | 1 | tkerber | rec_vars = len([var for var in self.variables.values() |
| 292 | 1 | tkerber | if var.isrec])
|
| 293 | 1 | tkerber | if rec_vars > 1: |
| 294 | 1 | tkerber | vsize += -vsize % 4
|
| 295 | 1 | tkerber | self.variables[name].__dict__['_vsize'] = vsize |
| 296 | 1 | tkerber | self._pack_int(vsize)
|
| 297 | 1 | tkerber | |
| 298 | 1 | tkerber | # Pack a bogus begin, and set the real value later.
|
| 299 | 1 | tkerber | self.variables[name].__dict__['_begin'] = self.fp.tell() |
| 300 | 1 | tkerber | self._pack_begin(0) |
| 301 | 1 | tkerber | |
| 302 | 1 | tkerber | def _write_var_data(self, name): |
| 303 | 1 | tkerber | var = self.variables[name]
|
| 304 | 1 | tkerber | |
| 305 | 1 | tkerber | # Set begin in file header.
|
| 306 | 1 | tkerber | the_beguine = self.fp.tell()
|
| 307 | 1 | tkerber | self.fp.seek(var._begin)
|
| 308 | 1 | tkerber | self._pack_begin(the_beguine)
|
| 309 | 1 | tkerber | self.fp.seek(the_beguine)
|
| 310 | 1 | tkerber | |
| 311 | 1 | tkerber | # Write data.
|
| 312 | 1 | tkerber | if not var.isrec: |
| 313 | 1 | tkerber | self.fp.write(var.data.tostring())
|
| 314 | 1 | tkerber | count = var.data.size * var.data.itemsize |
| 315 | 1 | tkerber | self.fp.write('0' * (var._vsize - count)) |
| 316 | 1 | tkerber | else: # record variable |
| 317 | 1 | tkerber | # Handle rec vars with shape[0] < nrecs.
|
| 318 | 1 | tkerber | if self._recs > len(var.data): |
| 319 | 1 | tkerber | shape = (self._recs,) + var.data.shape[1:] |
| 320 | 1 | tkerber | var.data.resize(shape) |
| 321 | 1 | tkerber | |
| 322 | 1 | tkerber | pos0 = pos = self.fp.tell()
|
| 323 | 1 | tkerber | for rec in var.data: |
| 324 | 1 | tkerber | # Apparently scalars cannot be converted to big endian. If we
|
| 325 | 1 | tkerber | # try to convert a ``=i4`` scalar to, say, '>i4' the dtype
|
| 326 | 1 | tkerber | # will remain as ``=i4``.
|
| 327 | 1 | tkerber | if not rec.shape and (rec.dtype.byteorder == '<' or |
| 328 | 1 | tkerber | (rec.dtype.byteorder == '=' and LITTLE_ENDIAN)): |
| 329 | 1 | tkerber | rec = rec.byteswap() |
| 330 | 1 | tkerber | self.fp.write(rec.tostring())
|
| 331 | 1 | tkerber | # Padding
|
| 332 | 1 | tkerber | count = rec.size * rec.itemsize |
| 333 | 1 | tkerber | self.fp.write('0' * (var._vsize - count)) |
| 334 | 1 | tkerber | pos += self._recsize
|
| 335 | 1 | tkerber | self.fp.seek(pos)
|
| 336 | 1 | tkerber | self.fp.seek(pos0 + var._vsize)
|
| 337 | 1 | tkerber | |
| 338 | 1 | tkerber | def _write_values(self, values): |
| 339 | 1 | tkerber | values = asarray(values) |
| 340 | 1 | tkerber | values = values.astype(values.dtype.newbyteorder('>'))
|
| 341 | 1 | tkerber | |
| 342 | 1 | tkerber | nc_type = REVERSE[values.dtype.char] |
| 343 | 1 | tkerber | self.fp.write(nc_type)
|
| 344 | 1 | tkerber | |
| 345 | 1 | tkerber | if values.dtype.char == 'S': |
| 346 | 1 | tkerber | nelems = values.itemsize |
| 347 | 1 | tkerber | else:
|
| 348 | 1 | tkerber | nelems = values.size |
| 349 | 1 | tkerber | self._pack_int(nelems)
|
| 350 | 1 | tkerber | |
| 351 | 1 | tkerber | if not values.shape and (values.dtype.byteorder == '<' or |
| 352 | 1 | tkerber | (values.dtype.byteorder == '=' and LITTLE_ENDIAN)): |
| 353 | 1 | tkerber | values = values.byteswap() |
| 354 | 1 | tkerber | self.fp.write(values.tostring())
|
| 355 | 1 | tkerber | count = values.size * values.itemsize |
| 356 | 1 | tkerber | self.fp.write('0' * (-count % 4)) # pad |
| 357 | 1 | tkerber | |
| 358 | 1 | tkerber | def _read(self): |
| 359 | 1 | tkerber | # Check magic bytes and version
|
| 360 | 1 | tkerber | assert self.fp.read(3) == 'CDF', "Error: %s is not a valid NetCDF 3 file" % self.filename |
| 361 | 1 | tkerber | self.__dict__['version_byte'] = fromstring(self.fp.read(1), '>b')[0] |
| 362 | 1 | tkerber | |
| 363 | 1 | tkerber | # Read file headers and set data.
|
| 364 | 1 | tkerber | self._read_numrecs()
|
| 365 | 1 | tkerber | self._read_dim_array()
|
| 366 | 1 | tkerber | self._read_gatt_array()
|
| 367 | 1 | tkerber | self._read_var_array()
|
| 368 | 1 | tkerber | |
| 369 | 1 | tkerber | def _read_numrecs(self): |
| 370 | 1 | tkerber | self.__dict__['_recs'] = self._unpack_int() |
| 371 | 1 | tkerber | |
| 372 | 1 | tkerber | def _read_dim_array(self): |
| 373 | 1 | tkerber | assert self.fp.read(4) in [ZERO, NC_DIMENSION] |
| 374 | 1 | tkerber | count = self._unpack_int()
|
| 375 | 1 | tkerber | |
| 376 | 1 | tkerber | for dim in range(count): |
| 377 | 1 | tkerber | name = self._unpack_string()
|
| 378 | 1 | tkerber | length = self._unpack_int() or None # None for record dimension |
| 379 | 1 | tkerber | self.dimensions[name] = length
|
| 380 | 1 | tkerber | self._dims.append(name) # preserve order |
| 381 | 1 | tkerber | |
| 382 | 1 | tkerber | def _read_gatt_array(self): |
| 383 | 1 | tkerber | for k, v in self._read_att_array().items(): |
| 384 | 1 | tkerber | self.__setattr__(k, v)
|
| 385 | 1 | tkerber | |
| 386 | 1 | tkerber | def _read_att_array(self): |
| 387 | 1 | tkerber | assert self.fp.read(4) in [ZERO, NC_ATTRIBUTE] |
| 388 | 1 | tkerber | count = self._unpack_int()
|
| 389 | 1 | tkerber | |
| 390 | 1 | tkerber | attributes = {}
|
| 391 | 1 | tkerber | for attr in range(count): |
| 392 | 1 | tkerber | name = self._unpack_string()
|
| 393 | 1 | tkerber | attributes[name] = self._read_values()
|
| 394 | 1 | tkerber | return attributes
|
| 395 | 1 | tkerber | |
| 396 | 1 | tkerber | def _read_var_array(self): |
| 397 | 1 | tkerber | assert self.fp.read(4) in [ZERO, NC_VARIABLE] |
| 398 | 1 | tkerber | |
| 399 | 1 | tkerber | begin = 0
|
| 400 | 1 | tkerber | dtypes = {'names': [], 'formats': []}
|
| 401 | 1 | tkerber | rec_vars = [] |
| 402 | 1 | tkerber | count = self._unpack_int()
|
| 403 | 1 | tkerber | for var in range(count): |
| 404 | 1 | tkerber | name, dimensions, shape, attributes, typecode, size, dtype_, begin_, vsize = self._read_var()
|
| 405 | 1 | tkerber | if shape and shape[0] is None: |
| 406 | 1 | tkerber | rec_vars.append(name) |
| 407 | 1 | tkerber | self.__dict__['_recsize'] += vsize |
| 408 | 1 | tkerber | if begin == 0: begin = begin_ |
| 409 | 1 | tkerber | dtypes['names'].append(name)
|
| 410 | 1 | tkerber | dtypes['formats'].append(str(shape[1:]) + dtype_) |
| 411 | 1 | tkerber | |
| 412 | 1 | tkerber | # Handle padding with a virtual variable.
|
| 413 | 1 | tkerber | if typecode in 'bch': |
| 414 | 1 | tkerber | actual_size = reduce(mul, (1,) + shape[1:]) * size |
| 415 | 1 | tkerber | padding = -actual_size % 4
|
| 416 | 1 | tkerber | if padding:
|
| 417 | 1 | tkerber | dtypes['names'].append('_padding_%d' % var) |
| 418 | 1 | tkerber | dtypes['formats'].append('(%d,)>b' % padding) |
| 419 | 1 | tkerber | |
| 420 | 1 | tkerber | # Data will be set later.
|
| 421 | 1 | tkerber | data = None
|
| 422 | 1 | tkerber | else:
|
| 423 | 1 | tkerber | if self.use_mmap: |
| 424 | 1 | tkerber | mm = mmap(self.fp.fileno(), begin_+vsize, access=ACCESS_READ)
|
| 425 | 1 | tkerber | data = ndarray.__new__(ndarray, shape, dtype=dtype_, |
| 426 | 1 | tkerber | buffer=mm, offset=begin_, order=0)
|
| 427 | 1 | tkerber | else:
|
| 428 | 1 | tkerber | pos = self.fp.tell()
|
| 429 | 1 | tkerber | self.fp.seek(begin_)
|
| 430 | 1 | tkerber | data = fromstring(self.fp.read(vsize), dtype=dtype_)
|
| 431 | 1 | tkerber | data.shape = shape |
| 432 | 1 | tkerber | self.fp.seek(pos)
|
| 433 | 1 | tkerber | |
| 434 | 1 | tkerber | # Add variable.
|
| 435 | 1 | tkerber | self.variables[name] = netcdf_variable(
|
| 436 | 1 | tkerber | data, typecode, shape, dimensions, attributes) |
| 437 | 1 | tkerber | |
| 438 | 1 | tkerber | if rec_vars:
|
| 439 | 1 | tkerber | # Remove padding when only one record variable.
|
| 440 | 1 | tkerber | if len(rec_vars) == 1: |
| 441 | 1 | tkerber | dtypes['names'] = dtypes['names'][:1] |
| 442 | 1 | tkerber | dtypes['formats'] = dtypes['formats'][:1] |
| 443 | 1 | tkerber | |
| 444 | 1 | tkerber | # Build rec array.
|
| 445 | 1 | tkerber | if self.use_mmap: |
| 446 | 1 | tkerber | mm = mmap(self.fp.fileno(), begin+self._recs*self._recsize, access=ACCESS_READ) |
| 447 | 1 | tkerber | rec_array = ndarray.__new__(ndarray, (self._recs,), dtype=dtypes,
|
| 448 | 1 | tkerber | buffer=mm, offset=begin, order=0)
|
| 449 | 1 | tkerber | else:
|
| 450 | 1 | tkerber | pos = self.fp.tell()
|
| 451 | 1 | tkerber | self.fp.seek(begin)
|
| 452 | 1 | tkerber | rec_array = fromstring(self.fp.read(self._recs*self._recsize), dtype=dtypes) |
| 453 | 1 | tkerber | rec_array.shape = (self._recs,)
|
| 454 | 1 | tkerber | self.fp.seek(pos)
|
| 455 | 1 | tkerber | |
| 456 | 1 | tkerber | for var in rec_vars: |
| 457 | 1 | tkerber | self.variables[var].__dict__['data'] = rec_array[var] |
| 458 | 1 | tkerber | |
| 459 | 1 | tkerber | def _read_var(self): |
| 460 | 1 | tkerber | name = self._unpack_string()
|
| 461 | 1 | tkerber | dimensions = [] |
| 462 | 1 | tkerber | shape = [] |
| 463 | 1 | tkerber | dims = self._unpack_int()
|
| 464 | 1 | tkerber | |
| 465 | 1 | tkerber | for i in range(dims): |
| 466 | 1 | tkerber | dimid = self._unpack_int()
|
| 467 | 1 | tkerber | dimname = self._dims[dimid]
|
| 468 | 1 | tkerber | dimensions.append(dimname) |
| 469 | 1 | tkerber | dim = self.dimensions[dimname]
|
| 470 | 1 | tkerber | shape.append(dim) |
| 471 | 1 | tkerber | dimensions = tuple(dimensions)
|
| 472 | 1 | tkerber | shape = tuple(shape)
|
| 473 | 1 | tkerber | |
| 474 | 1 | tkerber | attributes = self._read_att_array()
|
| 475 | 1 | tkerber | nc_type = self.fp.read(4) |
| 476 | 1 | tkerber | vsize = self._unpack_int()
|
| 477 | 1 | tkerber | begin = [self._unpack_int, self._unpack_int64][self.version_byte-1]() |
| 478 | 1 | tkerber | |
| 479 | 1 | tkerber | typecode, size = TYPEMAP[nc_type] |
| 480 | 1 | tkerber | if typecode is 'c': |
| 481 | 1 | tkerber | dtype_ = '>c'
|
| 482 | 1 | tkerber | else:
|
| 483 | 1 | tkerber | dtype_ = '>%s' % typecode
|
| 484 | 1 | tkerber | if size > 1: dtype_ += str(size) |
| 485 | 1 | tkerber | |
| 486 | 1 | tkerber | return name, dimensions, shape, attributes, typecode, size, dtype_, begin, vsize
|
| 487 | 1 | tkerber | |
| 488 | 1 | tkerber | def _read_values(self): |
| 489 | 1 | tkerber | nc_type = self.fp.read(4) |
| 490 | 1 | tkerber | n = self._unpack_int()
|
| 491 | 1 | tkerber | |
| 492 | 1 | tkerber | typecode, size = TYPEMAP[nc_type] |
| 493 | 1 | tkerber | |
| 494 | 1 | tkerber | count = n*size |
| 495 | 1 | tkerber | values = self.fp.read(count)
|
| 496 | 1 | tkerber | self.fp.read(-count % 4) # read padding |
| 497 | 1 | tkerber | |
| 498 | 1 | tkerber | if typecode is not 'c': |
| 499 | 1 | tkerber | values = fromstring(values, dtype='>%s%d' % (typecode, size))
|
| 500 | 1 | tkerber | if values.shape == (1,): values = values[0] |
| 501 | 1 | tkerber | else:
|
| 502 | 1 | tkerber | values = values.rstrip('\x00')
|
| 503 | 1 | tkerber | return values
|
| 504 | 1 | tkerber | |
| 505 | 1 | tkerber | def _pack_begin(self, begin): |
| 506 | 1 | tkerber | if self.version_byte == 1: |
| 507 | 1 | tkerber | self._pack_int(begin)
|
| 508 | 1 | tkerber | elif self.version_byte == 2: |
| 509 | 1 | tkerber | self._pack_int64(begin)
|
| 510 | 1 | tkerber | |
| 511 | 1 | tkerber | def _pack_int(self, value): |
| 512 | 1 | tkerber | self.fp.write(array(value, '>i').tostring()) |
| 513 | 1 | tkerber | _pack_int32 = _pack_int |
| 514 | 1 | tkerber | |
| 515 | 1 | tkerber | def _unpack_int(self): |
| 516 | 1 | tkerber | return int(fromstring(self.fp.read(4), '>i')[0]) |
| 517 | 1 | tkerber | _unpack_int32 = _unpack_int |
| 518 | 1 | tkerber | |
| 519 | 1 | tkerber | def _pack_int64(self, value): |
| 520 | 1 | tkerber | self.fp.write(array(value, '>q').tostring()) |
| 521 | 1 | tkerber | |
| 522 | 1 | tkerber | def _unpack_int64(self): |
| 523 | 1 | tkerber | return int(fromstring(self.fp.read(8), '>q')[0]) |
| 524 | 1 | tkerber | |
| 525 | 1 | tkerber | def _pack_string(self, s): |
| 526 | 1 | tkerber | count = len(s)
|
| 527 | 1 | tkerber | self._pack_int(count)
|
| 528 | 1 | tkerber | self.fp.write(s)
|
| 529 | 1 | tkerber | self.fp.write('0' * (-count % 4)) # pad |
| 530 | 1 | tkerber | |
| 531 | 1 | tkerber | def _unpack_string(self): |
| 532 | 1 | tkerber | count = self._unpack_int()
|
| 533 | 1 | tkerber | s = self.fp.read(count).rstrip('\x00') |
| 534 | 1 | tkerber | self.fp.read(-count % 4) # read padding |
| 535 | 1 | tkerber | return s
|
| 536 | 1 | tkerber | |
| 537 | 1 | tkerber | |
| 538 | 1 | tkerber | class netcdf_variable(object): |
| 539 | 1 | tkerber | """
|
| 540 | 1 | tkerber | ``netcdf_variable`` objects are constructed by calling the method
|
| 541 | 1 | tkerber | ``createVariable`` on the netcdf_file object.
|
| 542 | 1 | tkerber |
|
| 543 | 1 | tkerber | ``netcdf_variable`` objects behave much like array objects defined in
|
| 544 | 1 | tkerber | Numpy, except that their data resides in a file. Data is read by
|
| 545 | 1 | tkerber | indexing and written by assigning to an indexed subset; the entire
|
| 546 | 1 | tkerber | array can be accessed by the index ``[:]`` or using the methods
|
| 547 | 1 | tkerber | ``getValue`` and ``assignValue``. ``netcdf_variable`` objects also
|
| 548 | 1 | tkerber | have attribute ``shape`` with the same meaning as for arrays, but
|
| 549 | 1 | tkerber | the shape cannot be modified. There is another read-only attribute
|
| 550 | 1 | tkerber | ``dimensions``, whose value is the tuple of dimension names.
|
| 551 | 1 | tkerber |
|
| 552 | 1 | tkerber | All other attributes correspond to variable attributes defined in
|
| 553 | 1 | tkerber | the NetCDF file. Variable attributes are created by assigning to an
|
| 554 | 1 | tkerber | attribute of the ``netcdf_variable`` object.
|
| 555 | 1 | tkerber |
|
| 556 | 1 | tkerber | """
|
| 557 | 1 | tkerber | def __init__(self, data, typecode, shape, dimensions, attributes=None): |
| 558 | 1 | tkerber | self.data = data
|
| 559 | 1 | tkerber | self._typecode = typecode
|
| 560 | 1 | tkerber | self._shape = shape
|
| 561 | 1 | tkerber | self.dimensions = dimensions
|
| 562 | 1 | tkerber | |
| 563 | 1 | tkerber | self._attributes = attributes or {} |
| 564 | 1 | tkerber | for k, v in self._attributes.items(): |
| 565 | 1 | tkerber | self.__dict__[k] = v
|
| 566 | 1 | tkerber | |
| 567 | 1 | tkerber | def __setattr__(self, attr, value): |
| 568 | 1 | tkerber | # Store user defined attributes in a separate dict,
|
| 569 | 1 | tkerber | # so we can save them to file later.
|
| 570 | 1 | tkerber | try:
|
| 571 | 1 | tkerber | self._attributes[attr] = value
|
| 572 | 1 | tkerber | except AttributeError: |
| 573 | 1 | tkerber | pass
|
| 574 | 1 | tkerber | self.__dict__[attr] = value
|
| 575 | 1 | tkerber | |
| 576 | 1 | tkerber | def isrec(self): |
| 577 | 1 | tkerber | return self.data.shape and not self._shape[0] |
| 578 | 1 | tkerber | isrec = property(isrec)
|
| 579 | 1 | tkerber | |
| 580 | 1 | tkerber | def shape(self): |
| 581 | 1 | tkerber | return self.data.shape |
| 582 | 1 | tkerber | shape = property(shape)
|
| 583 | 1 | tkerber | |
| 584 | 1 | tkerber | def getValue(self): |
| 585 | 1 | tkerber | return self.data.item() |
| 586 | 1 | tkerber | |
| 587 | 1 | tkerber | def assignValue(self, value): |
| 588 | 1 | tkerber | self.data.itemset(value)
|
| 589 | 1 | tkerber | |
| 590 | 1 | tkerber | def typecode(self): |
| 591 | 1 | tkerber | return self._typecode |
| 592 | 1 | tkerber | |
| 593 | 1 | tkerber | def __getitem__(self, index): |
| 594 | 1 | tkerber | return self.data[index] |
| 595 | 1 | tkerber | |
| 596 | 1 | tkerber | def __setitem__(self, index, data): |
| 597 | 1 | tkerber | # Expand data for record vars?
|
| 598 | 1 | tkerber | if self.isrec: |
| 599 | 1 | tkerber | if isinstance(index, tuple): |
| 600 | 1 | tkerber | rec_index = index[0]
|
| 601 | 1 | tkerber | else:
|
| 602 | 1 | tkerber | rec_index = index |
| 603 | 1 | tkerber | if isinstance(rec_index, slice): |
| 604 | 1 | tkerber | recs = (rec_index.start or 0) + len(data) |
| 605 | 1 | tkerber | else:
|
| 606 | 1 | tkerber | recs = rec_index + 1
|
| 607 | 1 | tkerber | if recs > len(self.data): |
| 608 | 1 | tkerber | shape = (recs,) + self._shape[1:] |
| 609 | 1 | tkerber | self.data.resize(shape)
|
| 610 | 1 | tkerber | self.data[index] = data
|
| 611 | 1 | tkerber | |
| 612 | 1 | tkerber | |
| 613 | 1 | tkerber | NetCDFFile = netcdf_file |
| 614 | 1 | tkerber | NetCDFVariable = netcdf_variable |