{ "cells": [ { "cell_type": "markdown", "id": "8e308cd3-7f5a-4b62-bd2d-027850282c00", "metadata": { "editable": true, "slideshow": { "slide_type": "" }, "tags": [] }, "source": [ "## Writing GRIB data to Zarr" ] }, { "cell_type": "code", "execution_count": 1, "id": "62b00621-67cd-46b0-81ef-16278a6eee18", "metadata": { "editable": true, "slideshow": { "slide_type": "" }, "tags": [] }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "8d75e667605a4e4eb22967f6a3d6e9c6", "version_major": 2, "version_minor": 0 }, "text/plain": [ "pl.grib: 0%| | 0.00/48.8k [00:00\n", "#T_ca541 th {\n", " text-align: left;\n", "}\n", "#T_ca541_row0_col0, #T_ca541_row0_col1, #T_ca541_row0_col2, #T_ca541_row0_col3, #T_ca541_row0_col4, #T_ca541_row0_col5, #T_ca541_row0_col6, #T_ca541_row0_col7, #T_ca541_row0_col8, #T_ca541_row1_col0, #T_ca541_row1_col1, #T_ca541_row1_col2, #T_ca541_row1_col3, #T_ca541_row1_col4, #T_ca541_row1_col5, #T_ca541_row1_col6, #T_ca541_row1_col7, #T_ca541_row1_col8 {\n", " text-align: left;\n", "}\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
  leveldatetimestepparamIdclassstreamtypeexperimentVersionNumber
shortNametypeOfLevel         
risobaricInhPa700,50020240603,202406040,12000,6157odoperfc0001
tisobaricInhPa700,50020240603,202406040,12000,6130odoperfc0001
\n" ], "text/plain": [ "" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ds.describe()" ] }, { "cell_type": "markdown", "id": "2ab4d979-7c02-42e4-9b52-8ec12e76853b", "metadata": { "editable": true, "slideshow": { "slide_type": "" }, "tags": [] }, "source": [ "#### Using to_target() on the data object" ] }, { "cell_type": "raw", "id": "60ee891e-f0cd-426c-8f26-8155e9c25381", "metadata": { "editable": true, "raw_mimetype": "text/restructuredtext", "slideshow": { "slide_type": "" }, "tags": [] }, "source": [ "We use :func:`to_target` to write the GRIB fieldlist/field into a zarr store. First, the data is converted to Xarray then :py:func:`xarray.Dataset.to_zarr` is called to generate the zarr store. We need to set the kwargs accordingly." ] }, { "cell_type": "code", "execution_count": 3, "id": "e3ff25d0-bce4-4cfc-bdd0-93ca0864d08d", "metadata": { "editable": true, "slideshow": { "slide_type": "" }, "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/opt/homebrew/Caskroom/miniforge/base/envs/dev/lib/python3.11/site-packages/zarr/api/asynchronous.py:205: UserWarning: Consolidated metadata is currently not part in the Zarr format 3 specification. It may not be supported by other zarr implementations and may change in the future.\n", " warnings.warn(\n" ] } ], "source": [ "# with these options each field will be a separate chunk\n", "ds.to_target(\"zarr\", \n", " earthkit_to_xarray_kwargs={\"chunks\": {\"forecast_reference_time\": 1, \n", " \"step\": 1, \n", " \"level\": 1}},\n", " xarray_to_zarr_kwargs={\"store\": \"_pl.zarr\", \"mode\": \"w\"})" ] }, { "cell_type": "code", "execution_count": 4, "id": "3ffafb60-c412-4560-9bea-089143bcf85d", "metadata": { "editable": true, "slideshow": { "slide_type": "" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "
/\n",
       "├── forecast_reference_time (4,) int64\n",
       "├── latitude (19,) float64\n",
       "├── level (2,) int64\n",
       "├── longitude (36,) float64\n",
       "├── r (4, 2, 2, 19, 36) float64\n",
       "├── step (2,) int64\n",
       "└── t (4, 2, 2, 19, 36) float64\n",
       "
\n" ], "text/plain": [ "\u001b[1m/\u001b[0m\n", "├── \u001b[1mforecast_reference_time\u001b[0m (4,) int64\n", "├── \u001b[1mlatitude\u001b[0m (19,) float64\n", "├── \u001b[1mlevel\u001b[0m (2,) int64\n", "├── \u001b[1mlongitude\u001b[0m (36,) float64\n", "├── \u001b[1mr\u001b[0m (4, 2, 2, 19, 36) float64\n", "├── \u001b[1mstep\u001b[0m (2,) int64\n", "└── \u001b[1mt\u001b[0m (4, 2, 2, 19, 36) float64\n" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import zarr \n", "root = zarr.group(\"_pl.zarr\")\n", "root.tree()" ] }, { "cell_type": "code", "execution_count": 5, "id": "515bc071-d45f-48aa-abea-0cc688f4eebc", "metadata": { "editable": true, "slideshow": { "slide_type": "" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "Type : Array\n", "Zarr format : 3\n", "Data type : DataType.float64\n", "Shape : (4, 2, 2, 19, 36)\n", "Chunk shape : (1, 1, 1, 19, 36)\n", "Order : C\n", "Read-only : False\n", "Store type : LocalStore\n", "Filters : ()\n", "Serializer : BytesCodec(endian=)\n", "Compressors : (ZstdCodec(level=0, checksum=False),)\n", "No. bytes : 87552 (85.5K)" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "root[\"t\"].info" ] }, { "cell_type": "markdown", "id": "ef19bc33-fcc7-4b5a-83b0-ee59da4179f0", "metadata": { "editable": true, "slideshow": { "slide_type": "" }, "tags": [] }, "source": [ "The zarr store can be loaded to Xarray to check its content." ] }, { "cell_type": "code", "execution_count": 6, "id": "706d4467-64b8-46bf-894d-346088208fa2", "metadata": { "editable": true, "slideshow": { "slide_type": "" }, "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/var/folders/93/w0p869rx17q98wxk83gn9ys40000gn/T/ipykernel_45349/754541422.py:2: FutureWarning: In a future version of xarray decode_timedelta will default to False rather than None. To silence this warning, set decode_timedelta to True, False, or a 'CFTimedeltaCoder' instance.\n", " xarray.open_dataset(\"_pl.zarr\")\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
<xarray.Dataset> Size: 176kB\n",
       "Dimensions:                  (step: 2, longitude: 36,\n",
       "                              forecast_reference_time: 4, latitude: 19, level: 2)\n",
       "Coordinates:\n",
       "  * step                     (step) timedelta64[ns] 16B 00:00:00 06:00:00\n",
       "  * longitude                (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n",
       "  * forecast_reference_time  (forecast_reference_time) datetime64[ns] 32B 202...\n",
       "  * latitude                 (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n",
       "  * level                    (level) int64 16B 500 700\n",
       "Data variables:\n",
       "    r                        (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
       "    t                        (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n",
       "Attributes:\n",
       "    class:        od\n",
       "    stream:       oper\n",
       "    levtype:      pl\n",
       "    type:         fc\n",
       "    expver:       0001\n",
       "    date:         20240603\n",
       "    time:         0\n",
       "    domain:       g\n",
       "    number:       0\n",
       "    Conventions:  CF-1.8\n",
       "    institution:  ECMWF
" ], "text/plain": [ " Size: 176kB\n", "Dimensions: (step: 2, longitude: 36,\n", " forecast_reference_time: 4, latitude: 19, level: 2)\n", "Coordinates:\n", " * step (step) timedelta64[ns] 16B 00:00:00 06:00:00\n", " * longitude (longitude) float64 288B 0.0 10.0 ... 340.0 350.0\n", " * forecast_reference_time (forecast_reference_time) datetime64[ns] 32B 202...\n", " * latitude (latitude) float64 152B 90.0 80.0 ... -80.0 -90.0\n", " * level (level) int64 16B 500 700\n", "Data variables:\n", " r (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n", " t (forecast_reference_time, step, level, latitude, longitude) float64 88kB ...\n", "Attributes:\n", " class: od\n", " stream: oper\n", " levtype: pl\n", " type: fc\n", " expver: 0001\n", " date: 20240603\n", " time: 0\n", " domain: g\n", " number: 0\n", " Conventions: CF-1.8\n", " institution: ECMWF" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import xarray\n", "xarray.open_dataset(\"_pl.zarr\")" ] }, { "cell_type": "code", "execution_count": null, "id": "8479a12e-e907-43de-a3a6-aefb8cbfa754", "metadata": { "editable": true, "slideshow": { "slide_type": "" }, "tags": [] }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "dev", "language": "python", "name": "dev" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.12" } }, "nbformat": 4, "nbformat_minor": 5 }