{ "cells": [ { "cell_type": "markdown", "id": "d02b3258-df28-41cb-921d-f365b1dd96f7", "metadata": { "editable": true, "slideshow": { "slide_type": "" }, "tags": [] }, "source": [ "## Reading data from URLs" ] }, { "cell_type": "markdown", "id": "2c9d2ae0-aa05-4426-b8de-0d49495abee4", "metadata": {}, "source": [ "### Using individual URLs" ] }, { "cell_type": "raw", "id": "965e6efa-56cd-43b7-8501-a384f7baf38c", "metadata": { "editable": true, "raw_mimetype": "text/restructuredtext", "slideshow": { "slide_type": "" }, "tags": [] }, "source": [ "We can read individual files from URLs with :ref:`from_source() `:" ] }, { "cell_type": "code", "execution_count": 1, "id": "9e07cfc0-6a41-4865-aefc-1d352d70fe4a", "metadata": { "editable": true, "slideshow": { "slide_type": "" }, "tags": [] }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "6c598cd160e44444bd146ba04289d0d2", "version_major": 2, "version_minor": 0 }, "text/plain": [ "test.grib: 0%| | 0.00/1.03k [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
centreshortNametypeOfLevelleveldataDatedataTimestepRangedataTypenumbergridType
0ecmf2tsurface02020051312000an0regular_ll
1ecmfmslsurface02020051312000an0regular_ll
\n", "" ], "text/plain": [ " centre shortName typeOfLevel level dataDate dataTime stepRange dataType \\\n", "0 ecmf 2t surface 0 20200513 1200 0 an \n", "1 ecmf msl surface 0 20200513 1200 0 an \n", "\n", " number gridType \n", "0 0 regular_ll \n", "1 0 regular_ll " ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fs.ls()" ] }, { "cell_type": "markdown", "id": "91bccd51-f1c8-4dd8-bfd3-c6bc027f4878", "metadata": {}, "source": [ "Tar and zip archives can also be loaded from a URL:" ] }, { "cell_type": "code", "execution_count": 3, "id": "f1a423ae-7003-4185-b0c4-292f01cebb70", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "8f99bb60504740a5986d09e854c1bce7", "version_major": 2, "version_minor": 0 }, "text/plain": [ "test_gribs.tar: 0%| | 0.00/463k [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
centreshortNametypeOfLevelleveldataDatedataTimestepRangedataTypenumbergridType
0ecmf2tsurface02020051312000an0regular_ll
1ecmfmslsurface02020051312000an0regular_ll
2ecmftisobaricInhPa5002007010112000an0regular_ll
3ecmfzisobaricInhPa5002007010112000an0regular_ll
4ecmftisobaricInhPa8502007010112000an0regular_ll
5ecmfzisobaricInhPa8502007010112000an0regular_ll
\n", "" ], "text/plain": [ " centre shortName typeOfLevel level dataDate dataTime stepRange \\\n", "0 ecmf 2t surface 0 20200513 1200 0 \n", "1 ecmf msl surface 0 20200513 1200 0 \n", "2 ecmf t isobaricInhPa 500 20070101 1200 0 \n", "3 ecmf z isobaricInhPa 500 20070101 1200 0 \n", "4 ecmf t isobaricInhPa 850 20070101 1200 0 \n", "5 ecmf z isobaricInhPa 850 20070101 1200 0 \n", "\n", " dataType number gridType \n", "0 an 0 regular_ll \n", "1 an 0 regular_ll \n", "2 an 0 regular_ll \n", "3 an 0 regular_ll \n", "4 an 0 regular_ll \n", "5 an 0 regular_ll " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fs.ls()" ] }, { "cell_type": "markdown", "id": "118a94ff-5a8e-4f78-88f5-5b231216adb0", "metadata": {}, "source": [ "### Using multiple URLs" ] }, { "cell_type": "markdown", "id": "37a16e66-04b7-4316-b631-0a48c01f4269", "metadata": {}, "source": [ "We can access a list of URLs in one go. In the example below the first file contains 2 fields while the second one 4 fields." ] }, { "cell_type": "code", "execution_count": 5, "id": "f03c1cd4-2a74-4756-8a43-5ae5c6e53a0f", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "b2f113935f6a4307af763c78cc521ea2", "version_major": 2, "version_minor": 0 }, "text/plain": [ ": 0%| | 0.00/511k [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
centreshortNametypeOfLevelleveldataDatedataTimestepRangedataTypenumbergridType
0ecmf2tsurface02020051312000an0regular_ll
1ecmfmslsurface02020051312000an0regular_ll
2ecmftisobaricInhPa5002007010112000an0regular_ll
3ecmfzisobaricInhPa5002007010112000an0regular_ll
4ecmftisobaricInhPa8502007010112000an0regular_ll
5ecmfzisobaricInhPa8502007010112000an0regular_ll
\n", "" ], "text/plain": [ " centre shortName typeOfLevel level dataDate dataTime stepRange \\\n", "0 ecmf 2t surface 0 20200513 1200 0 \n", "1 ecmf msl surface 0 20200513 1200 0 \n", "2 ecmf t isobaricInhPa 500 20070101 1200 0 \n", "3 ecmf z isobaricInhPa 500 20070101 1200 0 \n", "4 ecmf t isobaricInhPa 850 20070101 1200 0 \n", "5 ecmf z isobaricInhPa 850 20070101 1200 0 \n", "\n", " dataType number gridType \n", "0 an 0 regular_ll \n", "1 an 0 regular_ll \n", "2 an 0 regular_ll \n", "3 an 0 regular_ll \n", "4 an 0 regular_ll \n", "5 an 0 regular_ll " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fs = ekd.from_source(\"url\", \n", " [\"https://sites.ecmwf.int/repository/earthkit-data/examples/test.grib\",\n", " \"https://sites.ecmwf.int/repository/earthkit-data/examples/test4.grib\"])\n", "fs.ls()" ] }, { "cell_type": "markdown", "id": "585e7e29-9e50-49cb-96af-13d4d725de6a", "metadata": {}, "source": [ "### Using URL patterns" ] }, { "cell_type": "raw", "id": "a5bde9a5-c859-4615-b73d-0fe16609de5a", "metadata": { "editable": true, "raw_mimetype": "text/restructuredtext", "slideshow": { "slide_type": "" }, "tags": [] }, "source": [ "URLs can also be specified by using :ref:`url-patterns `. In the example below when pattern \"id\" is substituted it will match two files: test4.grib and test6.grib:" ] }, { "cell_type": "code", "execution_count": 6, "id": "246e985b-f16e-4012-a0b5-fced45044017", "metadata": { "editable": true, "slideshow": { "slide_type": "" }, "tags": [] }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "2cd769cc0f414658bd473d902a89e4dd", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/2 [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
centreshortNametypeOfLevelleveldataDatedataTimestepRangedataTypenumbergridType
0ecmftisobaricInhPa5002007010112000an0regular_ll
1ecmfzisobaricInhPa5002007010112000an0regular_ll
2ecmftisobaricInhPa8502007010112000an0regular_ll
3ecmfzisobaricInhPa8502007010112000an0regular_ll
4ecmftisobaricInhPa10002018080112000an0regular_ll
5ecmfuisobaricInhPa10002018080112000an0regular_ll
6ecmfvisobaricInhPa10002018080112000an0regular_ll
7ecmftisobaricInhPa8502018080112000an0regular_ll
8ecmfuisobaricInhPa8502018080112000an0regular_ll
9ecmfvisobaricInhPa8502018080112000an0regular_ll
\n", "" ], "text/plain": [ " centre shortName typeOfLevel level dataDate dataTime stepRange \\\n", "0 ecmf t isobaricInhPa 500 20070101 1200 0 \n", "1 ecmf z isobaricInhPa 500 20070101 1200 0 \n", "2 ecmf t isobaricInhPa 850 20070101 1200 0 \n", "3 ecmf z isobaricInhPa 850 20070101 1200 0 \n", "4 ecmf t isobaricInhPa 1000 20180801 1200 0 \n", "5 ecmf u isobaricInhPa 1000 20180801 1200 0 \n", "6 ecmf v isobaricInhPa 1000 20180801 1200 0 \n", "7 ecmf t isobaricInhPa 850 20180801 1200 0 \n", "8 ecmf u isobaricInhPa 850 20180801 1200 0 \n", "9 ecmf v isobaricInhPa 850 20180801 1200 0 \n", "\n", " dataType number gridType \n", "0 an 0 regular_ll \n", "1 an 0 regular_ll \n", "2 an 0 regular_ll \n", "3 an 0 regular_ll \n", "4 an 0 regular_ll \n", "5 an 0 regular_ll \n", "6 an 0 regular_ll \n", "7 an 0 regular_ll \n", "8 an 0 regular_ll \n", "9 an 0 regular_ll " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fs = ekd.from_source(\"url-pattern\", \n", " \"https://sites.ecmwf.int/repository/earthkit-data/examples/test{id}.grib\",\n", " {\"id\": [4, 6]})\n", "fs.ls()" ] }, { "cell_type": "markdown", "id": "a0420b9d-3d9a-4357-9044-3dec40951b80", "metadata": {}, "source": [ "We can specify a format for each pattern. In this example \"my_date\" is the pattern name and \":date(%Y-%m-%d)\" specifies the format:" ] }, { "cell_type": "code", "execution_count": 7, "id": "7d06c47a-7c15-4f38-95c0-89d4cee811ed", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "ee595274a5c040268833fbe018f56f90", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/2 [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
centreshortNametypeOfLevelleveldataDatedataTimestepRangedataTypenumbergridType
0ecmf2tsurface02020051312000an0regular_ll
1ecmfmslsurface02020051312000an0regular_ll
\n", "" ], "text/plain": [ " centre shortName typeOfLevel level dataDate dataTime stepRange dataType \\\n", "0 ecmf 2t surface 0 20200513 1200 0 an \n", "1 ecmf msl surface 0 20200513 1200 0 an \n", "\n", " number gridType \n", "0 0 regular_ll \n", "1 0 regular_ll " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import datetime \n", "\n", "fs = ekd.from_source(\n", " \"url-pattern\", \n", " \"https://sites.ecmwf.int/repository/earthkit-data/test-data/test_{my_date:date(%Y-%m-%d)}_{name}.grib\",\n", " {\"my_date\": datetime.datetime(2020,5,13), \"name\": [\"t2\",\"msl\"]})\n", "fs.ls()\n" ] }, { "cell_type": "code", "execution_count": null, "id": "eefb68f5-a714-44c2-98e2-71d9352036dd", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "dev", "language": "python", "name": "dev" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.12" } }, "nbformat": 4, "nbformat_minor": 5 }