Source code for iris.io

# Copyright Iris contributors
#
# This file is part of Iris and is released under the LGPL license.
# See COPYING and COPYING.LESSER in the root of the repository for full
# licensing details.
"""
Provides an interface to manage URI scheme support in iris.

"""

import collections
from collections import OrderedDict
import glob
import os.path
import pathlib
import re

import iris.exceptions


# Saving routines, indexed by file extension.
class _SaversDict(dict):
    """A dictionary that can only have string keys with no overlap."""

    def __setitem__(self, key, value):
        if not isinstance(key, str):
            raise ValueError("key is not a string")
        if key in self:
            raise ValueError("A saver already exists for", key)
        for k in self.keys():
            if k.endswith(key) or key.endswith(k):
                raise ValueError(
                    "key %s conflicts with existing key %s" % (key, k)
                )
        dict.__setitem__(self, key, value)


_savers = _SaversDict()


[docs]def run_callback(callback, cube, field, filename): """ Runs the callback mechanism given the appropriate arguments. Args: * callback: A function to add metadata from the originating field and/or URI which obeys the following rules: 1. Function signature must be: ``(cube, field, filename)``. 2. Modifies the given cube inplace, unless a new cube is returned by the function. 3. If the cube is to be rejected the callback must raise an :class:`iris.exceptions.IgnoreCubeException`. .. note:: It is possible that this function returns None for certain callbacks, the caller of this function should handle this case. """ from iris.cube import Cube if callback is None: return cube # Call the callback function on the cube, generally the function will # operate on the cube in place, but it is also possible that the function # will return a completely new cube instance. try: result = callback(cube, field, filename) except iris.exceptions.IgnoreCubeException: result = None else: if result is None: result = cube elif not isinstance(result, Cube): raise TypeError( "Callback function returned an " "unhandled data type." ) return result
[docs]def decode_uri(uri, default="file"): r""" Decodes a single URI into scheme and scheme-specific parts. In addition to well-formed URIs, it also supports bare file paths as strings or :class:`pathlib.PurePath`. Both Windows and UNIX style paths are accepted. .. testsetup:: from iris.io import * Examples: >>> from iris.io import decode_uri >>> print(decode_uri('http://www.thing.com:8080/resource?id=a:b')) ('http', '//www.thing.com:8080/resource?id=a:b') >>> print(decode_uri('file:///data/local/dataZoo/...')) ('file', '///data/local/dataZoo/...') >>> print(decode_uri('/data/local/dataZoo/...')) ('file', '/data/local/dataZoo/...') >>> print(decode_uri('file:///C:\data\local\dataZoo\...')) ('file', '///C:\\data\\local\\dataZoo\\...') >>> print(decode_uri('C:\data\local\dataZoo\...')) ('file', 'C:\\data\\local\\dataZoo\\...') >>> print(decode_uri('dataZoo/...')) ('file', 'dataZoo/...') """ if isinstance(uri, pathlib.PurePath): uri = str(uri) # make sure scheme has at least 2 letters to avoid windows drives # put - last in the brackets so it refers to the character, not a range # reference on valid schemes: http://tools.ietf.org/html/std66#section-3.1 match = re.match(r"^([a-zA-Z][a-zA-Z0-9+.-]+):(.+)", uri) if match: scheme = match.group(1) part = match.group(2) else: # Catch bare UNIX and Windows paths scheme = default part = uri return scheme, part
[docs]def expand_filespecs(file_specs, files_expected=True): """ Find all matching file paths from a list of file-specs. Parameters ---------- file_specs : iterable of str File paths which may contain ``~`` elements or wildcards. files_expected : bool, default=True Whether file is expected to exist (i.e. for load). Returns ------- list of str if files_expected is ``True``: A well-ordered list of matching absolute file paths. If any of the file-specs match no existing files, an exception is raised. if files_expected is ``False``: A list of expanded file paths. """ # Remove any hostname component - currently unused filenames = [ os.path.abspath( os.path.expanduser(fn[2:] if fn.startswith("//") else fn) ) for fn in file_specs ] if files_expected: # Try to expand all filenames as globs glob_expanded = OrderedDict( [[fn, sorted(glob.glob(fn))] for fn in filenames] ) # If any of the specs expanded to an empty list then raise an error all_expanded = glob_expanded.values() if not all(all_expanded): msg = "One or more of the files specified did not exist:" for pattern, expanded in glob_expanded.items(): if expanded: msg += '\n - "{}" matched {} file(s)'.format( pattern, len(expanded) ) else: msg += '\n * "{}" didn\'t match any files'.format( pattern ) raise IOError(msg) result = [fname for fnames in all_expanded for fname in fnames] else: result = filenames return result
[docs]def load_files(filenames, callback, constraints=None): """ Takes a list of filenames which may also be globs, and optionally a constraint set and a callback function, and returns a generator of Cubes from the given files. .. note:: Typically, this function should not be called directly; instead, the intended interface for loading is :func:`iris.load`. """ from iris.fileformats import FORMAT_AGENT all_file_paths = expand_filespecs(filenames) # Create default dict mapping iris format handler to its associated filenames handler_map = collections.defaultdict(list) for fn in all_file_paths: with open(fn, "rb") as fh: handling_format_spec = FORMAT_AGENT.get_spec( os.path.basename(fn), fh ) handler_map[handling_format_spec].append(fn) # Call each iris format handler with the approriate filenames for handling_format_spec in sorted(handler_map): fnames = handler_map[handling_format_spec] if handling_format_spec.constraint_aware_handler: for cube in handling_format_spec.handler( fnames, callback, constraints ): yield cube else: for cube in handling_format_spec.handler(fnames, callback): yield cube
[docs]def load_http(urls, callback): """ Takes a list of OPeNDAP URLs and a callback function, and returns a generator of Cubes from the given URLs. .. note:: Typically, this function should not be called directly; instead, the intended interface for loading is :func:`iris.load`. """ # Create default dict mapping iris format handler to its associated filenames from iris.fileformats import FORMAT_AGENT handler_map = collections.defaultdict(list) for url in urls: handling_format_spec = FORMAT_AGENT.get_spec(url, None) handler_map[handling_format_spec].append(url) # Call each iris format handler with the appropriate filenames for handling_format_spec in sorted(handler_map): fnames = handler_map[handling_format_spec] for cube in handling_format_spec.handler(fnames, callback): yield cube
def _dot_save(cube, target): # A simple wrapper for `iris.fileformats.dot.save` which allows the # saver to be registered without triggering the import of # `iris.fileformats.dot`. from iris.fileformats.dot import save return save(cube, target) def _dot_save_png(cube, target, **kwargs): # A simple wrapper for `iris.fileformats.dot.save_png` which allows the # saver to be registered without triggering the import of # `iris.fileformats.dot`. from iris.fileformats.dot import save_png return save_png(cube, target, **kwargs) def _grib_save(cube, target, append=False, **kwargs): # A simple wrapper for the grib save routine, which allows the saver to be # registered without having the grib implementation installed. try: from iris_grib import save_grib2 except ImportError: raise RuntimeError( "Unable to save GRIB file - " '"iris_grib" package is not installed.' ) save_grib2(cube, target, append, **kwargs) def _check_init_savers(): from iris.fileformats import netcdf, pp if "pp" not in _savers: _savers.update( { "pp": pp.save, "nc": netcdf.save, "dot": _dot_save, "dotpng": _dot_save_png, "grib2": _grib_save, } )
[docs]def add_saver(file_extension, new_saver): """ Add a custom saver to the Iris session. Args: * file_extension: A string such as "pp" or "my_format". * new_saver: A function of the form ``my_saver(cube, target)``. See also :func:`iris.io.save` """ # Make sure it's a func with 2+ args if ( not hasattr(new_saver, "__call__") or new_saver.__code__.co_argcount < 2 ): raise ValueError("Saver routines must be callable with 2+ arguments.") # Try to add this saver. Invalid keys will be rejected. _savers[file_extension] = new_saver
[docs]def find_saver(filespec): """ Find the saver function appropriate to the given filename or extension. Args: * filespec A string such as "my_file.pp" or "PP". Returns: A save function or None. Save functions can be passed to :func:`iris.io.save`. """ _check_init_savers() matches = [ ext for ext in _savers if filespec.lower().endswith("." + ext) or filespec.lower() == ext ] # Multiple matches could occur if one of the savers included a '.': # e.g. _savers = {'.dot.png': dot_png_saver, '.png': png_saver} if len(matches) > 1: fmt = "Multiple savers found for %r: %s" matches = ", ".join(map(repr, matches)) raise ValueError(fmt % (filespec, matches)) return _savers[matches[0]] if matches else None
[docs]def save(source, target, saver=None, **kwargs): """ Save one or more Cubes to file (or other writeable). Iris currently supports three file formats for saving, which it can recognise by filename extension: * netCDF - the Unidata network Common Data Format: * see :func:`iris.fileformats.netcdf.save` * GRIB2 - the WMO GRIdded Binary data format: * see :func:`iris_grib.save_grib2`. * PP - the Met Office UM Post Processing Format: * see :func:`iris.fileformats.pp.save` A custom saver can be provided to the function to write to a different file format. Parameters ---------- source : :class:`iris.cube.Cube` or :class:`iris.cube.CubeList` target : str or pathlib.PurePath or io.TextIOWrapper When given a filename or file, Iris can determine the file format. saver : str or function, optional Specifies the file format to save. If omitted, Iris will attempt to determine the format. If a string, this is the recognised filename extension (where the actual filename may not have it). Otherwise the value is a saver function, of the form: ``my_saver(cube, target)`` plus any custom keywords. It is assumed that a saver will accept an ``append`` keyword if its file format can handle multiple cubes. See also :func:`iris.io.add_saver`. **kwargs : dict, optional All other keywords are passed through to the saver function; see the relevant saver documentation for more information on keyword arguments. Warnings -------- Saving a cube whose data has been loaded lazily (if `cube.has_lazy_data()` returns `True`) to the same file it expects to load data from will cause both the data in-memory and the data on disk to be lost. .. code-block:: python cube = iris.load_cube("somefile.nc") # The next line causes data loss in 'somefile.nc' and the cube. iris.save(cube, "somefile.nc") In general, overwriting a file which is the source for any lazily loaded data can result in corruption. Users should proceed with caution when attempting to overwrite an existing file. Examples -------- >>> # Setting up >>> import iris >>> my_cube = iris.load_cube(iris.sample_data_path('air_temp.pp')) >>> my_cube_list = iris.load(iris.sample_data_path('space_weather.nc')) >>> # Save a cube to PP >>> iris.save(my_cube, "myfile.pp") >>> # Save a cube list to a PP file, appending to the contents of the file >>> # if it already exists >>> iris.save(my_cube_list, "myfile.pp", append=True) >>> # Save a cube to netCDF, defaults to NETCDF4 file format >>> iris.save(my_cube, "myfile.nc") >>> # Save a cube list to netCDF, using the NETCDF3_CLASSIC storage option >>> iris.save(my_cube_list, "myfile.nc", netcdf_format="NETCDF3_CLASSIC") """ from iris.cube import Cube, CubeList # Determine format from filename if isinstance(target, pathlib.PurePath): target = str(target) if isinstance(target, str) and saver is None: # Converts tilde or wildcards to absolute path (target,) = expand_filespecs([str(target)], False) saver = find_saver(target) elif hasattr(target, "name") and saver is None: saver = find_saver(target.name) elif isinstance(saver, str): saver = find_saver(saver) if saver is None: raise ValueError("Cannot save; no saver") # Single cube? if isinstance(source, Cube): saver(source, target, **kwargs) # CubeList or sequence of cubes? elif isinstance(source, CubeList) or ( isinstance(source, (list, tuple)) and all([isinstance(i, Cube) for i in source]) ): # Only allow cubelist saving for those fileformats that are capable. if "iris.fileformats.netcdf" not in saver.__module__: # Make sure the saver accepts an append keyword if "append" not in saver.__code__.co_varnames: raise ValueError( "Cannot append cubes using saver function " "'%s' in '%s'" % (saver.__code__.co_name, saver.__code__.co_filename) ) # Force append=True for the tail cubes. Don't modify the incoming # kwargs. kwargs = kwargs.copy() for i, cube in enumerate(source): if i != 0: kwargs["append"] = True saver(cube, target, **kwargs) # Netcdf saver. else: saver(source, target, **kwargs) else: raise ValueError("Cannot save; non Cube found in source")