{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "%matplotlib inline"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\n# Loading a Cube From a Custom File Format\n\nThis example shows how a custom text file can be loaded using the standard Iris\nload mechanism.\n\nThe first stage in the process is to define an Iris :class:`FormatSpecification\n<iris.io.format_picker.FormatSpecification>` for the file format. To create a\nformat specification we need to define the following:\n\n* format_name - Some text that describes the format specification we are\n  creating\n* file_element - FileElement object describing the element which identifies\n  this FormatSpecification.\n\n  Possible values are:\n\n    ``iris.io.format_picker.MagicNumber(n, o)``\n        The n bytes from the file at offset o.\n\n    ``iris.io.format_picker.FileExtension()``\n        The file's extension.\n\n    ``iris.io.format_picker.LeadingLine()``\n        The first line of the file.\n\n* file_element_value - The value that the file_element should take if a file\n  matches this FormatSpecification\n* handler (optional) - A generator function that will be called when the file\n  specification has been identified. This function is provided by the user and\n  provides the means to parse the whole file. If no handler function is\n  provided, then identification is still possible without any handling.\n\n  The handler function must define the following arguments:\n\n  * list of filenames to process\n  * callback function - An optional function to filter/alter the Iris cubes\n    returned\n\n  The handler function must be defined as generator which yields each cube as\n  they are produced.\n\n* priority (optional) - Integer giving a priority for considering this\n  specification where higher priority means sooner consideration\n\nIn the following example, the function :func:`load_NAME_III` has been defined\nto handle the loading of the raw data from the custom file format. This\nfunction is called from :func:`NAME_to_cube` which uses this data to create and\nyield Iris cubes.\n\nIn the ``main()`` function the filenames are loaded via the ``iris.load_cube``\nfunction which automatically invokes the ``FormatSpecification`` we defined.\nThe cube returned from the load function is then used to produce a plot.\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "import datetime\n\nfrom cf_units import CALENDAR_STANDARD, Unit\nimport matplotlib.pyplot as plt\nimport numpy as np\n\nimport iris\nimport iris.coord_systems as icoord_systems\nimport iris.coords as icoords\nimport iris.fileformats\nimport iris.io.format_picker as format_picker\nimport iris.plot as iplt\n\nUTC_format = \"%H%M%Z %d/%m/%Y\"\n\nFLOAT_HEADERS = [\n    \"X grid origin\",\n    \"Y grid origin\",\n    \"X grid resolution\",\n    \"Y grid resolution\",\n]\nINT_HEADERS = [\"X grid size\", \"Y grid size\", \"Number of fields\"]\nDATE_HEADERS = [\"Run time\", \"Start of release\", \"End of release\"]\nCOLUMN_NAMES = [\n    \"species_category\",\n    \"species\",\n    \"cell_measure\",\n    \"quantity\",\n    \"unit\",\n    \"z_level\",\n    \"time\",\n]\n\n\ndef load_NAME_III(filename):\n    \"\"\"\n    Loads the Met Office's NAME III grid output files returning headers, column\n    definitions and data arrays as 3 separate lists.\n\n    \"\"\"\n\n    # Loading a file gives a generator of lines which can be progressed using\n    # the next() function. This will come in handy as we wish to progress\n    # through the file line by line.\n    with open(filename) as file_handle:\n        # Define a dictionary which can hold the header metadata for this file.\n        headers = {}\n\n        # Skip the NAME header of the file which looks something like\n        # 'NAME III (version X.X.X)'.\n        next(file_handle)\n\n        # Read the next 16 lines of header information, putting the form\n        # \"header name:    header value\" into a dictionary.\n        for _ in range(16):\n            header_name, header_value = next(file_handle).split(\":\")\n\n            # Strip off any spurious space characters in the header name and\n            # value.\n            header_name = header_name.strip()\n            header_value = header_value.strip()\n\n            # Cast some headers into floats or integers if they match a given\n            # header name.\n            if header_name in FLOAT_HEADERS:\n                header_value = float(header_value)\n            elif header_name in INT_HEADERS:\n                header_value = int(header_value)\n            elif header_name in DATE_HEADERS:\n                # convert the time to python datetimes\n                header_value = datetime.datetime.strptime(\n                    header_value, UTC_format\n                )\n\n            headers[header_name] = header_value\n\n        # Skip the next blank line in the file.\n        next(file_handle)\n\n        # Read the next 7 lines of column definitions.\n        column_headings = {}\n        for column_header_name in COLUMN_NAMES:\n            column_headings[column_header_name] = [\n                col.strip() for col in next(file_handle).split(\",\")\n            ][:-1]\n\n        # Convert the time to python datetimes.\n        new_time_column_header = []\n        for i, t in enumerate(column_headings[\"time\"]):\n            # The first 4 columns aren't time at all, so don't convert them to\n            # datetimes.\n            if i >= 4:\n                t = datetime.datetime.strptime(t, UTC_format)\n            new_time_column_header.append(t)\n        column_headings[\"time\"] = new_time_column_header\n\n        # Skip the blank line after the column headers.\n        next(file_handle)\n\n        # Make a list of data arrays to hold the data for each column.\n        data_shape = (headers[\"Y grid size\"], headers[\"X grid size\"])\n        data_arrays = [\n            np.zeros(data_shape, dtype=np.float32)\n            for i in range(headers[\"Number of fields\"])\n        ]\n\n        # Iterate over the remaining lines which represent the data in a column\n        # form.\n        for line in file_handle:\n            # Split the line by comma, removing the last empty column caused by\n            # the trailing comma.\n            vals = line.split(\",\")[:-1]\n\n            # Cast the x and y grid positions to floats and convert them to\n            # zero based indices (the numbers are 1 based grid positions where\n            # 0.5 represents half a grid point.)\n            x = int(float(vals[0]) - 1.5)\n            y = int(float(vals[1]) - 1.5)\n\n            # Populate the data arrays (i.e. all columns but the leading 4).\n            for i, data_array in enumerate(data_arrays):\n                data_array[y, x] = float(vals[i + 4])\n\n    return headers, column_headings, data_arrays\n\n\ndef NAME_to_cube(filenames, callback):\n    \"\"\"\n    Returns a generator of cubes given a list of filenames and a callback.\n    \"\"\"\n\n    for filename in filenames:\n        header, column_headings, data_arrays = load_NAME_III(filename)\n\n        for i, data_array in enumerate(data_arrays):\n            # turn the dictionary of column headers with a list of header\n            # information for each field into a dictionary of headers for just\n            # this field. Ignore the first 4 columns of grid position (data was\n            # located with the data array).\n            field_headings = dict(\n                (k, v[i + 4]) for k, v in column_headings.items()\n            )\n\n            # make an cube\n            cube = iris.cube.Cube(data_array)\n\n            # define the name and unit\n            name = \"%s %s\" % (\n                field_headings[\"species\"],\n                field_headings[\"quantity\"],\n            )\n            name = name.upper().replace(\" \", \"_\")\n            cube.rename(name)\n            # Some units are badly encoded in the file, fix this by putting a\n            # space in between. (if gs is not found, then the string will be\n            # returned unchanged)\n            cube.units = field_headings[\"unit\"].replace(\"gs\", \"g s\")\n\n            # define and add the singular coordinates of the field (flight\n            # level, time etc.)\n            cube.add_aux_coord(\n                icoords.AuxCoord(\n                    field_headings[\"z_level\"],\n                    long_name=\"flight_level\",\n                    units=\"1\",\n                )\n            )\n\n            # define the time unit and use it to serialise the datetime for the\n            # time coordinate\n            time_unit = Unit(\"hours since epoch\", calendar=CALENDAR_STANDARD)\n            time_coord = icoords.AuxCoord(\n                time_unit.date2num(field_headings[\"time\"]),\n                standard_name=\"time\",\n                units=time_unit,\n            )\n            cube.add_aux_coord(time_coord)\n\n            # build a coordinate system which can be referenced by latitude and\n            # longitude coordinates\n            lat_lon_coord_system = icoord_systems.GeogCS(6371229)\n\n            # build regular latitude and longitude coordinates which have\n            # bounds\n            start = header[\"X grid origin\"] + header[\"X grid resolution\"]\n            step = header[\"X grid resolution\"]\n            count = header[\"X grid size\"]\n            pts = start + np.arange(count, dtype=np.float32) * step\n            lon_coord = icoords.DimCoord(\n                pts,\n                standard_name=\"longitude\",\n                units=\"degrees\",\n                coord_system=lat_lon_coord_system,\n            )\n            lon_coord.guess_bounds()\n\n            start = header[\"Y grid origin\"] + header[\"Y grid resolution\"]\n            step = header[\"Y grid resolution\"]\n            count = header[\"Y grid size\"]\n            pts = start + np.arange(count, dtype=np.float32) * step\n            lat_coord = icoords.DimCoord(\n                pts,\n                standard_name=\"latitude\",\n                units=\"degrees\",\n                coord_system=lat_lon_coord_system,\n            )\n            lat_coord.guess_bounds()\n\n            # add the latitude and longitude coordinates to the cube, with\n            # mappings to data dimensions\n            cube.add_dim_coord(lat_coord, 0)\n            cube.add_dim_coord(lon_coord, 1)\n\n            # implement standard iris callback capability. Although callbacks\n            # are not used in this example, the standard mechanism for a custom\n            # loader to implement a callback is shown:\n            cube = iris.io.run_callback(\n                callback, cube, [header, field_headings, data_array], filename\n            )\n\n            # yield the cube created (the loop will continue when the next()\n            # element is requested)\n            yield cube\n\n\n# Create a format_picker specification of the NAME file format giving it a\n# priority greater than the built in NAME loader.\n_NAME_III_spec = format_picker.FormatSpecification(\n    \"Name III\",\n    format_picker.LeadingLine(),\n    lambda line: line.startswith(b\"NAME III\"),\n    NAME_to_cube,\n    priority=6,\n)\n\n# Register the NAME loader with iris\niris.fileformats.FORMAT_AGENT.add_spec(_NAME_III_spec)\n\n\n# ---------------------------------------------\n# |          Using the new loader             |\n# ---------------------------------------------\n\n\ndef main():\n    fname = iris.sample_data_path(\"NAME_output.txt\")\n\n    boundary_volc_ash_constraint = iris.Constraint(\n        \"VOLCANIC_ASH_AIR_CONCENTRATION\", flight_level=\"From FL000 - FL200\"\n    )\n\n    # Callback shown as None to illustrate where a cube-level callback function\n    # would be used if required\n    cube = iris.load_cube(fname, boundary_volc_ash_constraint, callback=None)\n\n    # draw contour levels for the data (the top level is just a catch-all)\n    levels = (0.0002, 0.002, 0.004, 1e10)\n    cs = iplt.contourf(\n        cube,\n        levels=levels,\n        colors=(\"#80ffff\", \"#939598\", \"#e00404\"),\n    )\n\n    # draw a black outline at the lowest contour to highlight affected areas\n    iplt.contour(cube, levels=(levels[0], 100), colors=\"black\")\n\n    # set an extent and a background image for the map\n    ax = plt.gca()\n    ax.set_extent((-90, 20, 20, 75))\n    ax.stock_img(\"ne_shaded\")\n\n    # make a legend, with custom labels, for the coloured contour set\n    artists, _ = cs.legend_elements()\n    labels = [\n        r\"$%s < x \\leq %s$\" % (levels[0], levels[1]),\n        r\"$%s < x \\leq %s$\" % (levels[1], levels[2]),\n        r\"$x > %s$\" % levels[2],\n    ]\n    ax.legend(\n        artists, labels, title=\"Ash concentration / g m-3\", loc=\"upper left\"\n    )\n\n    time = cube.coord(\"time\")\n    time_date = time.units.num2date(time.points[0]).strftime(UTC_format)\n    plt.title(\"Volcanic ash concentration forecast\\nvalid at %s\" % time_date)\n\n    iplt.show()\n\n\nif __name__ == \"__main__\":\n    main()"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.10.6"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}