# coding:utf-8
"""
Module to read and write netcdf file.
Interface to netCDF4
"""
import sys
from collections import OrderedDict
import netCDF4
import numpy as np
import os
def _check_var(nvariables, variables):
""" Check if <variables> are contained in <nvariables>
return True/False, difference
"""
nv = set(nvariables)
v = set(variables)
ok = set(nv).issuperset(v)
return ok, v.difference(nv)
[docs]def read_var(filename, variables, index=slice(None), **kwargs):
""" Extract variables from a netCDF
Raise an IOerror if the file[s] is not found;
netcdf time are return as datetime array if possible
Parameters
----------
filename: string
path to a netCDF file
variables: list or string
variables to read as a list of string;
a single variable can also be given as a string
index: slice, optional
A slice object; for example np.s_[0, :, 10]
kwargs: keyword arguments
A list of arguments to pass to the MFDataset class.
For example to aggregate the files on the <time> dimension
use aggdim='time'
Returns
-------
list of numpy array: list
"""
filename = [filename] if type(filename) is not list else filename
if type(variables) is not list:
variables = [variables]
# test if filename exists
for f in filename:
if 'http://' in f:
continue
if not os.path.isfile(f):
raise IOError('{} was not found'.format(f))
try:
with netCDF4.MFDataset(filename, **kwargs) as ncfile:
vararray = _extract_from_netcdf(ncfile, variables, index)
except OSError as err:
if len(filename) == 1:
with netCDF4.Dataset(filename[0], **kwargs) as ncfile:
vararray = _extract_from_netcdf(ncfile, variables, index)
else:
raise err
return vararray
def _extract_from_netcdf(ncfile, variables, index):
vararray = []
nvariables = list(ncfile.variables.keys())
ok, diffvar = _check_var(nvariables, variables)
if not ok:
err = '{} not found in file\n. Available :{}'.format(
",".join(diffvar), ",".join(nvariables))
raise Exception(err)
for var in variables:
if var == 'time':
time = ncfile.variables[var]
try:
vardata = netCDF4.num2date(time[:], units=time.units)
except (AttributeError, ValueError):
vardata = time[:]
else:
ndim = ncfile.variables[var].ndim
if (ndim == 3) and (index != slice(None)):
nindex = index[1:]
else:
nindex = index
vardata = ncfile.variables[var][nindex]
vararray.append(vardata.squeeze())
return vararray
[docs]def read_var_bbox(filename, variables, bbox, lon='lon', lat='lat',
return_index=False):
"""Read var only in the bbox
Similar to read_var but read only the data in the given bounding box.
Parameters
----------
filename: list or string
filename(s) where the data is located
variables: list or string
variable(s) name of the data to read from file
bbox: list
coordinates of the bounding box as:
minlon, maxlon, minlat, maxlat
lon: string or np.ndarray, default lon
name of the 2D longitude array in the data;
or 2D longitude array
lat: string or np.ndarray, default lat
name of the 2D latitude array in the data:
or 2D latitude array
return_index: boolean, default False
If True return the index used to reduce the variable data, with the
dimension (time, height, lon, lat)
Returns
-------
bbox_lon: numpy array
lon restricted to the bbox
bbox_lat: numpy array
lat restricted to the bbox
bbox_dta: numpy arra
data restricted to the bbox
"""
if type(lon) is str:
lon, lat = read_var(filename, [lon, lat])
xll, xur, yll, yur = bbox
xindex, yindex = np.where((lon > xll) & (lon < xur) &
(lat > yll) & (lat < yur))
xmin, xmax = xindex.min(), xindex.max() + 1
ymin, ymax = yindex.min(), yindex.max() + 1
index = np.s_[:, :, xmin:xmax, ymin:ymax]
vardata = read_var(filename, variables, index=index)
returndata = [lon[xmin:xmax, ymin:ymax], lat[xmin:xmax, ymin:ymax]]
returndata.extend(vardata)
if return_index:
returndata.extend([index])
return returndata
[docs]def read_gattributes(filename):
""" Read global attributes from a netCDF
Parameters
----------
filename: string
path to a netcdf file
Returns
-------
global attributes: dictionary
"""
with netCDF4.Dataset(filename) as ncfile:
gattributes = ncfile.__dict__
return gattributes
[docs]def read_dimensions(filename):
""" Read dimensions
Unlimited dimensions size are return as None
Parameters
----------
filename: string
Returns
-------
a dictionary with name as key and the dimension as value: dictionary
"""
dimensions = OrderedDict()
with netCDF4.Dataset(filename) as ncfile:
for name, dim in ncfile.dimensions.items():
dimensions[name] = None if dim.isunlimited() else dim.size
return dimensions
[docs]def read_variables(filename):
""" Read variables
Parameters
----------
filename: string
Returns
-------
a list of variables names: list
"""
with netCDF4.Dataset(filename) as ncfile:
variables = list(ncfile.variables.keys())
return variables
[docs]def read_var_attributes(filename, var):
""" Return the attributes of the variables as a dictionary"""
with netCDF4.Dataset(filename) as ncfile:
return OrderedDict((n, getattr(ncfile.variables[var], n)) for n in
ncfile.variables[var].ncattrs())
[docs]def create(outname, dimensions, gattributes, format='NETCDF3_CLASSIC'):
""" create a netCDF """
ncfile = netCDF4.Dataset(outname, 'w', format=format)
for dimname, size in dimensions.items():
ncfile.createDimension(dimname, size)
ncfile.setncatts(gattributes)
return ncfile
[docs]def addvar(ncfile, varname, vardata, dimensions, attributes=None):
""" Add a variable to an opened netcdf file"""
var = ncfile.createVariable(varname, vardata.dtype, dimensions)
if type(attributes) in [dict, OrderedDict]:
var.setncatts(attributes)
try:
var[:] = vardata
except Exception as e:
sys.stderr.write('{0} in addvar()\n'.format(e))
sys.exit(1)
[docs]def addvar_to_file(filename, varname, vardata, dimensions, attributes=None):
"""Add a variable to a netcdf file"""
with netCDF4.Dataset(filename, 'r+') as ncfile:
addvar(ncfile, varname, vardata, dimensions, attributes)