{ "cells": [ { "cell_type": "markdown", "id": "98d9d0d8", "metadata": { "tags": [ "papermill-error-cell-tag" ] }, "source": [ "An Exception was encountered at 'In [1]'." ] }, { "cell_type": "markdown", "id": "43986fc8", "metadata": { "editable": true, "papermill": { "duration": 0.009847, "end_time": "2025-11-25T01:42:46.437553", "exception": false, "start_time": "2025-11-25T01:42:46.427706", "status": "completed" }, "slideshow": { "slide_type": "" }, "tags": [] }, "source": [ "# ROF global monthly, annual, seasonal flows analysis \n", "\n", "Use the following datasets\n", "\n", "1. reach-D19 gauge link ascii\n", "2. D19 flow site geopackage\n", "3. D19 discharge netCDF\n", "4. history netCD including river discharge\n", "\n", "[1. Setupt](#setup)\n", "\n", "[2. Loading data](#load_data)\n", "\n", "- Read monthly history files from archive. \n", "- Reference data: monthly discharge estimates at 922 big river mouths from Dai et al. 2019 data (D19)\n", "\n", "[3. Large 24 river analysis](#24_large_rivers)\n", "\n", "- Plotting time seriese (annual, seasonal cycle).\n", "- if D19 referece data is available, scatter plots at Large 24 selected rivers against D19 referece data\n", "\n", "[4. Large 50 rivers analysis](#50_large_rivers)\n", "\n", "- Annual flow summary table at large 50 selected rivers.\n", "- if D19 referece data is available, Scatter plot of annual flow against D19 reference data.\n", "\n", "[5. 922 rivers analysis](#922_rivers)\n", "\n", "- run only if reference flow is available\n", "- error statistics (%bias, rmse, correlation) at all 922 river sites.\n", "- plot error statistic on the global map\n", "- plot boxplots including case(s) for each error metric" ] }, { "cell_type": "markdown", "id": "aae1c9e1", "metadata": { "tags": [ "papermill-error-cell-tag" ] }, "source": [ "Execution using papermill encountered an exception here and stopped:" ] }, { "cell_type": "code", "execution_count": 1, "id": "11b508f0", "metadata": { "editable": true, "execution": { "iopub.execute_input": "2025-11-25T01:42:46.457945Z", "iopub.status.busy": "2025-11-25T01:42:46.457698Z", "iopub.status.idle": "2025-11-25T01:42:56.298505Z", "shell.execute_reply": "2025-11-25T01:42:56.297705Z" }, "papermill": { "duration": 9.852122, "end_time": "2025-11-25T01:42:56.299370", "exception": true, "start_time": "2025-11-25T01:42:46.447248", "status": "failed" }, "slideshow": { "slide_type": "" }, "tags": [ "hide-input" ] }, "outputs": [ { "ename": "ModuleNotFoundError", "evalue": "No module named 'cupid_utils'", "output_type": "error", "traceback": [ "\u001b[31m---------------------------------------------------------------------------\u001b[39m", "\u001b[31mModuleNotFoundError\u001b[39m Traceback (most recent call last)", "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 16\u001b[39m\n\u001b[32m 13\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mcartopy\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mcrs\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mccrs\u001b[39;00m\n\u001b[32m 14\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mcartopy\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mfeature\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mcfeature\u001b[39;00m\n\u001b[32m---> \u001b[39m\u001b[32m16\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mcupid_utils\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mcu\u001b[39;00m \u001b[38;5;66;03m# go buffs!\u001b[39;00m\n\u001b[32m 17\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mcupid_utils\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mrof\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m colors\n\u001b[32m 18\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mcupid_utils\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mrof\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m metrics \u001b[38;5;28;01mas\u001b[39;00m metric\n", "\u001b[31mModuleNotFoundError\u001b[39m: No module named 'cupid_utils'" ] } ], "source": [ "%matplotlib inline\n", "\n", "import os, sys\n", "import glob\n", "import matplotlib.pyplot as plt\n", "import matplotlib as mpl\n", "from matplotlib.ticker import FormatStrFormatter\n", "from sklearn.linear_model import LinearRegression\n", "import numpy as np\n", "import pandas as pd\n", "import geopandas as gpd\n", "import xarray as xr\n", "import cartopy.crs as ccrs\n", "import cartopy.feature as cfeature\n", "\n", "import cupid_utils as cu # go buffs!\n", "from cupid_utils.rof import colors\n", "from cupid_utils.rof import metrics as metric\n", "from cupid_utils.rof import load_yaml\n", "from cupid_utils.rof import no_time_variable\n", "\n", "rivers_50m = cfeature.NaturalEarthFeature(\"physical\", \"rivers_lake_centerlines\", \"50m\")\n", "land = cfeature.LAND\n", "\n", "print(\"\\nThe Python version: %s.%s.%s\" % sys.version_info[:3])\n", "print(xr.__name__, xr.__version__)\n", "print(pd.__name__, pd.__version__)\n", "print(gpd.__name__, gpd.__version__)" ] }, { "cell_type": "markdown", "id": "49dc57e4", "metadata": { "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "source": [ "-------------------------\n", "## 1. Setup " ] }, { "cell_type": "code", "execution_count": null, "id": "f0b97ce6", "metadata": { "editable": true, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "slideshow": { "slide_type": "" }, "tags": [ "parameters", "hide-input" ] }, "outputs": [], "source": [ "# Parameter Defaults\n", "# parameters are set in CUPiD's config.yml file\n", "# when running interactively, manually set the parameters below\n", "\n", "# global parameters\n", "CESM_output_dir = \"\" # e.g., \"/glade/campaign/cesm/development/cross-wg/diagnostic_framework/CESM_output_for_testing\"\n", "case_name = None # case name: e.g., \"b.e30_beta02.BLT1850.ne30_t232.104\"\n", "base_case_name = None # base case name: e.g., \"b.e23_alpha17f.BLT1850.ne30_t232.092\"\n", "base_case_output_dir = None\n", "start_date = \"\" # simulation starting date: e.g., \"0001-01-01\"\n", "end_date = \"\" # simulation ending date: \"0100-01-01\"\n", "base_start_date = \"\" # base simulation starting date: \"0001-01-01\"\n", "base_end_date = \"\" # base simulation ending date: \"0100-01-01\"\n", "serial = True # use dask LocalCluster\n", "lc_kwargs = {}\n", "product = \"./\"\n", "\n", "# rof parameters\n", "hist_str = \"\" # Used for hist file tag\n", "analysis_name = \"\" # Used for Figure png names\n", "climo_nyears = 10 # number of years to compute the climatology\n", "grid_name = \"f09_f09_mosart\" # ROF grid name used in case\n", "base_grid_name = (\n", " grid_name # spcify ROF grid name for base_case in config.yml if different than case\n", ")\n", "figureSave = False" ] }, { "cell_type": "code", "execution_count": null, "id": "31125a1d", "metadata": { "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [ "injected-parameters" ] }, "outputs": [], "source": [ "# Parameters\n", "case_name = \"b.e30_alpha07c_cesm.B1850C_LTso.ne30_t232_wgx3.232\"\n", "base_case_name = \"b.e30_alpha07c_cesm.B1850C_LTso.ne30_t232_wgx3.228\"\n", "case_nickname = \"BLT1850_232\"\n", "base_case_nickname = \"BLT1850_228\"\n", "CESM_output_dir = \"/glade/campaign/cesm/development/cross-wg/diagnostic_framework/CESM_output_for_testing\"\n", "start_date = \"0001-01-01\"\n", "end_date = \"0021-01-01\"\n", "climo_start_year = 11\n", "climo_end_year = 21\n", "base_start_date = \"0001-01-01\"\n", "base_end_date = \"0045-01-01\"\n", "base_climo_start_year = 36\n", "base_climo_end_year = 45\n", "obs_data_dir = (\n", " \"/glade/campaign/cesm/development/cross-wg/diagnostic_framework/CUPiD_obs_data\"\n", ")\n", "ts_dir = None\n", "lc_kwargs = {\"threads_per_worker\": 1}\n", "serial = False\n", "hist_str = \"h0a\"\n", "analysis_name = \"\"\n", "grid_name = \"f09_f09_mosart\"\n", "climo_nyears = 10\n", "figureSave = False\n", "subset_kwargs = {}\n", "product = \"/glade/work/richling/cupid-test-2/CUPiD/examples/key_metrics/computed_notebooks//rof/global_discharge_gauge_compare_obs.ipynb\"\n" ] }, { "cell_type": "code", "execution_count": null, "id": "93613811", "metadata": { "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [ "hide-input" ] }, "outputs": [], "source": [ "if base_case_output_dir is None:\n", " base_case_output_dir = CESM_output_dir\n", "\n", "out_dir = os.path.join(os.path.dirname(product), \"rof_nb_output\")\n", "os.makedirs(out_dir, exist_ok=True)" ] }, { "cell_type": "code", "execution_count": null, "id": "3b8233de", "metadata": { "editable": true, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "slideshow": { "slide_type": "" }, "tags": [ "hide-input" ] }, "outputs": [], "source": [ "# ROF additional setup\n", "setup = load_yaml(os.path.join(cu.__path__[0], \"..\", \"setup\", \"setup.yaml\"))\n", "\n", "ancillary_dir = setup[\n", " \"ancillary_dir\"\n", "] # ancillary directory including ROF domain, river network data etc.\n", "ref_flow_dir = setup[\"ref_flow_dir\"] # including observed or reference flow data\n", "case_meta = setup[\"case_meta\"] # Case metadata\n", "reach_gpkg = setup[\"reach_gpkg\"] # river reach geopackage meta\n", "\n", "if not analysis_name:\n", " analysis_name = case_name\n", "if not base_grid_name:\n", " base_grid_name = grid_name\n", "\n", "case_dic = {\n", " case_name: {\n", " \"grid\": grid_name,\n", " \"sim_period\": slice(f\"{start_date}\", f\"{end_date}\"),\n", " \"climo_nyrs\": min(climo_nyears, int(end_date[:4]) - int(start_date[:4]) + 1),\n", " \"output_dir\": CESM_output_dir,\n", " }\n", "}\n", "if base_case_name is not None:\n", " case_dic[base_case_name] = {\n", " \"grid\": base_grid_name,\n", " \"sim_period\": slice(f\"{base_start_date}\", f\"{base_end_date}\"),\n", " \"climo_nyrs\": min(\n", " climo_nyears, int(base_end_date[:4]) - int(base_start_date[:4]) + 1\n", " ),\n", " \"output_dir\": base_case_output_dir,\n", " }" ] }, { "cell_type": "markdown", "id": "4c0a5980", "metadata": { "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "source": [ "### Dasks" ] }, { "cell_type": "code", "execution_count": null, "id": "9ea2783f", "metadata": { "editable": true, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "slideshow": { "slide_type": "" }, "tags": [ "hide-input" ] }, "outputs": [], "source": [ "# When running interactively, cupid_run should be set to False\n", "cupid_run = True\n", "\n", "client = None\n", "if cupid_run:\n", " # Spin up cluster (if running in parallel)\n", " if not serial:\n", " from dask.distributed import Client, LocalCluster\n", "\n", " cluster = LocalCluster(**lc_kwargs)\n", " client = Client(cluster)\n", "else:\n", " if not serial:\n", " from dask.distributed import Client\n", " from dask_jobqueue import PBSCluster\n", "\n", " cluster = PBSCluster(\n", " cores=1,\n", " processes=1,\n", " memory=\"50GB\",\n", " queue=\"casper\",\n", " walltime=\"01:00:00\",\n", " )\n", " cluster.scale(jobs=10)\n", " client = Client(cluster)\n", "client" ] }, { "cell_type": "markdown", "id": "9207c029", "metadata": { "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "source": [ "## 2. Loading data " ] }, { "cell_type": "markdown", "id": "9bef2706", "metadata": { "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "source": [ "### 2.1. Monthly/annual flow netCDFs\n", "- month_data (xr dataset) - read from archive\n", "- year_data (xr dataset) - computed from monthly\n", "- seas_data (xr dataset) - computed from monthly" ] }, { "cell_type": "code", "execution_count": null, "id": "32584621", "metadata": { "editable": true, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "slideshow": { "slide_type": "" }, "tags": [ "hide-input" ] }, "outputs": [], "source": [ "%%time\n", "\n", "reachID = {}\n", "month_data = {}\n", "year_data = {}\n", "seas_data = {}\n", "for case, meta in case_dic.items():\n", " in_dire = os.path.join(meta[\"output_dir\"], case, \"rof/hist\")\n", " model = case_meta[meta[\"grid\"]][\"model\"]\n", " domain = case_meta[meta[\"grid\"]][\"domain_nc\"]\n", " var_list = case_meta[meta[\"grid\"]][\"vars_read\"]\n", "\n", " def preprocess(ds):\n", " return ds[var_list]\n", "\n", " year_list = [\n", " \"{:04d}\".format(yr)\n", " for yr in np.arange(\n", " int(meta[\"sim_period\"].start[0:4]), int(meta[\"sim_period\"].stop[0:4]) + 1\n", " )\n", " ]\n", "\n", " nc_list = []\n", " for nc_path in sorted(glob.glob(f\"{in_dire}/{case}.{model}.{hist_str}.????-*.nc\")):\n", " for yr in year_list:\n", " if yr in os.path.basename(nc_path):\n", " nc_list.append(nc_path)\n", "\n", " # load data\n", " ds = xr.open_mfdataset(\n", " nc_list,\n", " data_vars=\"minimal\",\n", " parallel=True,\n", " preprocess=preprocess,\n", " ).sel(time=meta[\"sim_period\"])\n", "\n", " # monthly\n", " month_data[case] = ds.isel(time=slice(-meta[\"climo_nyrs\"] * 12, None)).load()\n", " # annual\n", " year_data[case] = (\n", " ds.isel(time=slice(-meta[\"climo_nyrs\"] * 12, None))\n", " .resample(time=\"YS\")\n", " .mean(dim=\"time\")\n", " .load()\n", " )\n", " # seasonal (compute here instead of reading for conisistent analysis period)\n", " seas_data[case] = (\n", " ds.isel(time=slice(-meta[\"climo_nyrs\"] * 12, None))\n", " .groupby(\"time.month\")\n", " .mean(\"time\")\n", " .load()\n", " )\n", " vars_no_time = no_time_variable(month_data[case])\n", " if vars_no_time:\n", " seas_data[case][vars_no_time] = seas_data[case][vars_no_time].isel(\n", " month=0, drop=True\n", " )\n", " mon_time = month_data[case].time.values\n", " if domain == \"None\":\n", " reachID[case] = month_data[case][\"reachID\"].values\n", " else:\n", " reachID[case] = xr.open_dataset(f\"{ancillary_dir}/{domain}\")[\"reachID\"].values\n", " print(f\"Finished loading {case}\")" ] }, { "cell_type": "markdown", "id": "cc8ed6f9", "metadata": { "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "source": [ "### 2.2 Large river ID and name ascii\n", "- big_river_50: dictionary {_site_id_:_river name_}\n", "- big_river_24: dictionary {_site_id_:_river name_}" ] }, { "cell_type": "code", "execution_count": null, "id": "ab79440d", "metadata": { "editable": true, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "slideshow": { "slide_type": "" }, "tags": [ "hide-input" ] }, "outputs": [], "source": [ "df = pd.read_csv(\n", " os.path.join(cu.__path__[0], \"..\", \"setup\", \"large_river_50.txt\"),\n", " index_col=\"river_name\",\n", ")\n", "big_river_50 = {key: values[\"site_id\"] for key, values in df.iterrows()}\n", "big_river_24 = {\n", " key: values[\"site_id\"] for ix, (key, values) in enumerate(df.iterrows()) if ix < 24\n", "} # The first 24 is used for plots" ] }, { "cell_type": "markdown", "id": "f8f4ccce", "metadata": { "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "source": [ "### 2.3. reach-D19 gauge link csv\n", "- gauge_reach_lnk (dataframe)" ] }, { "cell_type": "code", "execution_count": null, "id": "9d5d0885", "metadata": { "editable": true, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "slideshow": { "slide_type": "" }, "tags": [ "hide-input" ] }, "outputs": [], "source": [ "gauge_reach_lnk = {}\n", "for case, meta in case_dic.items():\n", " gauge_reach_lnk[case] = pd.read_csv(\n", " \"%s/D09/D09_925.%s.asc\" % (ref_flow_dir, case_meta[meta[\"grid\"]][\"network\"])\n", " )" ] }, { "cell_type": "markdown", "id": "1eb889af", "metadata": { "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "source": [ "### 2.4 D19 flow site shapefile\n", "- gauge_shp (dataframe)" ] }, { "cell_type": "code", "execution_count": null, "id": "2ca46aa3", "metadata": { "editable": true, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "slideshow": { "slide_type": "" }, "tags": [ "hide-input" ] }, "outputs": [], "source": [ "%%time\n", "\n", "gauge_shp = gpd.read_file(\n", " os.path.join(ref_flow_dir, \"D09\", \"geospatial\", \"D09_925.gpkg\")\n", ")\n", "gauge_shp = gauge_shp[gauge_shp[\"id\"] != 9999999]" ] }, { "cell_type": "markdown", "id": "cbe6a395", "metadata": { "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "source": [ "### 2.5 D19 discharge data\n", "- ds_q_obs_mon (xr datasets)\n", "- ds_q_obs_yr (xr datasets)\n", "- dr_q_obs_seasonal (xr datasets)" ] }, { "cell_type": "code", "execution_count": null, "id": "3952b872", "metadata": { "editable": true, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "slideshow": { "slide_type": "" }, "tags": [ "hide-input" ] }, "outputs": [], "source": [ "%%time\n", "\n", "# read monthly data\n", "ds_q = xr.open_dataset(\n", " \"%s/D09/coastal-stns-Vol-monthly.updated-May2019.mod.nc\" % (ref_flow_dir),\n", " decode_times=False,\n", ")\n", "ds_q[\"time\"] = xr.cftime_range(\n", " start=\"1900-01-01\", end=\"2018-12-01\", freq=\"MS\", calendar=\"standard\"\n", ")\n", "\n", "# monthly - if time_period is outside observation period, use the entire obs period\n", "obs_available = True\n", "if ds_q[\"time\"].sel(time=slice(str(mon_time[0]), str(mon_time[-1]))).values.size == 0:\n", " obs_available = False\n", " ds_q_obs_mon = ds_q[\"FLOW\"]\n", "else:\n", " ds_q_obs_mon = ds_q[\"FLOW\"].sel(time=slice(str(mon_time[0]), str(mon_time[-1])))\n", "# compute annual flow from monthly\n", "ds_q_obs_yr = ds_q_obs_mon.resample(time=\"YE\").mean(dim=\"time\")\n", "# compute annual cycle at monthly scale\n", "dr_q_obs_seasonal = ds_q_obs_mon.groupby(\"time.month\").mean(\"time\")\n", "# annual mean, max, and min flow during time_period\n", "ds_q_obs_yr_min = ds_q_obs_yr.min(\"time\")\n", "ds_q_obs_yr_max = ds_q_obs_yr.max(\"time\")\n", "ds_q_obs_yr_mean = ds_q_obs_yr.mean(\"time\")" ] }, { "cell_type": "markdown", "id": "7203a502", "metadata": { "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "source": [ "### 2.6 Get indices in observation and simulation for gauge name (processing)\n", "- gauge_plot (dictionary)" ] }, { "cell_type": "code", "execution_count": null, "id": "aca1ad68", "metadata": { "editable": true, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "slideshow": { "slide_type": "" }, "tags": [ "hide-input" ] }, "outputs": [], "source": [ "gauge_plot = {}\n", "for gname, site_id in big_river_50.items():\n", " gauge_plot[gname] = {}\n", " for case in case_dic.keys():\n", " gauge_ix = [\n", " i for i, gid in enumerate(ds_q.id.values) if gid == site_id\n", " ] # go through obs Dataset and get index matching to river (gauge) name\n", " gauge_id = ds_q.id.values[gauge_ix][0] ## guage ID\n", " seg_id = (\n", " gauge_reach_lnk[case]\n", " .loc[gauge_reach_lnk[case][\"gauge_id\"] == gauge_id][\"route_id\"]\n", " .values\n", " ) # matching reach ID in river network\n", " seg_ix = np.argwhere(\n", " reachID[case] == seg_id\n", " ) # matching reach index in river network\n", " if len(seg_ix) == 0:\n", " seg_ix = -999\n", " else:\n", " seg_ix = seg_ix[0]\n", " gauge_plot[gname][case] = [gauge_ix, seg_ix, seg_id]" ] }, { "cell_type": "markdown", "id": "38963b81", "metadata": { "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "source": [ "------\n", "## 3. Analysis for 24 large rivers " ] }, { "cell_type": "markdown", "id": "813c86e0", "metadata": { "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "source": [ "### 3.1 Annual flow series" ] }, { "cell_type": "code", "execution_count": null, "id": "0ead9053", "metadata": { "editable": true, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "slideshow": { "slide_type": "" }, "tags": [ "hide-input" ] }, "outputs": [], "source": [ "%%time\n", "\n", "fig, axes = plt.subplots(8, 3, figsize=(7.25, 11.5))\n", "plt.subplots_adjust(\n", " top=0.975, bottom=0.065, right=0.98, left=0.10, hspace=0.225, wspace=0.250\n", ") # create some space below the plots by increasing the bottom-value\n", "\n", "for ix, river_name in enumerate(big_river_24.keys()):\n", " row = ix // 3\n", " col = ix % 3\n", " for case, meta in case_dic.items():\n", " net_idx = gauge_plot[river_name][case][1]\n", " gaug_idx = gauge_plot[river_name][case][0]\n", "\n", " q_name = case_meta[meta[\"grid\"]][\"flow_name\"]\n", "\n", " if len(net_idx) == 1:\n", " year_data[case][q_name][:, net_idx].plot(\n", " ax=axes[row, col], linestyle=\"-\", lw=0.75, label=case\n", " )\n", " elif len(net_idx) == 2: # means 2d grid\n", " year_data[case][q_name][:, net_idx[0], net_idx[1]].plot(\n", " ax=axes[row, col], linestyle=\"-\", lw=0.75, label=case\n", " )\n", " if obs_available:\n", " ds_q_obs_yr.loc[:, gaug_idx].plot(\n", " ax=axes[row, col],\n", " linestyle=\"None\",\n", " marker=\"o\",\n", " markersize=3,\n", " c=\"k\",\n", " label=\"D19\",\n", " )\n", " else:\n", " qob_mean = ds_q_obs_yr_mean.loc[gaug_idx]\n", " qob_max = ds_q_obs_yr_max.loc[gaug_idx]\n", " qob_min = ds_q_obs_yr_min.loc[gaug_idx]\n", " axes[row, col].axhline(\n", " y=qob_mean, color=\"k\", linestyle=\"--\", lw=0.5, label=\"D19 mean (1900-2018)\"\n", " )\n", " axes[row, col].axhspan(\n", " qob_min[0],\n", " qob_max[0],\n", " color=\"gray\",\n", " alpha=0.1,\n", " label=\"D19 min-max (1900-2018)\",\n", " )\n", "\n", " axes[row, col].set_title(\"%d %s\" % (ix + 1, river_name), fontsize=8)\n", "\n", " axes[row, col].set_xlabel(\"\")\n", " if row < 7:\n", " axes[row, col].set_xticklabels(\"\")\n", " if col == 0:\n", " axes[row, col].set_ylabel(\"Annual flow [m$^3$/s]\", fontsize=8)\n", " else:\n", " axes[row, col].set_ylabel(\"\")\n", " axes[row, col].tick_params(\"both\", labelsize=\"xx-small\")\n", "\n", "# Legend- make space below the plot-raise bottom. there will be an label below the second last (bottom middle) ax, thanks to the bbox_to_anchor=(x, y) with a negative y-value.\n", "axes.flatten()[-2].legend(\n", " loc=\"upper center\",\n", " bbox_to_anchor=(0.125, -0.35, 0.75, 0.1),\n", " ncol=5,\n", " fontsize=\"x-small\",\n", ")\n", "\n", "if figureSave:\n", " plt.savefig(f\"{out_dir}/NB1_Fig1_big_river_annual_{analysis_name}.png\", dpi=200)" ] }, { "cell_type": "markdown", "id": "7e495343", "metadata": { "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "source": [ "### 3.2. Annual cycle at monthly step" ] }, { "cell_type": "code", "execution_count": null, "id": "5cc2df46", "metadata": { "editable": true, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "slideshow": { "slide_type": "" }, "tags": [ "hide-input" ] }, "outputs": [], "source": [ "%%time\n", "\n", "fig, axes = plt.subplots(8, 3, figsize=(7.25, 11.5))\n", "plt.subplots_adjust(\n", " top=0.975, bottom=0.065, right=0.98, left=0.10, hspace=0.225, wspace=0.250\n", ") # create some space below the plots by increasing the bottom-value\n", "\n", "for ix, river_name in enumerate(big_river_24.keys()):\n", " row = ix // 3\n", " col = ix % 3\n", " for case, meta in case_dic.items():\n", " net_idx = gauge_plot[river_name][case][1]\n", " gaug_idx = gauge_plot[river_name][case][0]\n", "\n", " q_name = case_meta[meta[\"grid\"]][\"flow_name\"]\n", "\n", " if len(net_idx) == 1: # means vector\n", " seas_data[case][q_name][:, net_idx].plot(\n", " ax=axes[row, col], linestyle=\"-\", lw=0.75, label=case\n", " )\n", " elif len(net_idx) == 2: # means 2d grid\n", " seas_data[case][q_name][:, net_idx[0], net_idx[1]].plot(\n", " ax=axes[row, col], linestyle=\"-\", lw=1.0, label=case\n", " )\n", "\n", " if obs_available:\n", " dr_q_obs_seasonal.loc[:, gaug_idx].plot(\n", " ax=axes[row, col],\n", " linestyle=\":\",\n", " lw=0.5,\n", " marker=\"o\",\n", " markersize=2,\n", " c=\"k\",\n", " label=\"D19\",\n", " )\n", "\n", " axes[row, col].set_title(\"%d %s\" % (ix + 1, river_name), fontsize=8)\n", " axes[row, col].set_xlabel(\"\")\n", " if row < 7:\n", " axes[row, col].set_xticklabels(\"\")\n", " if col == 0:\n", " axes[row, col].set_ylabel(\"Mon. flow [m$^3$/s]\", fontsize=8)\n", " else:\n", " axes[row, col].set_ylabel(\"\")\n", " axes[row, col].tick_params(\"both\", labelsize=\"xx-small\")\n", "\n", "# Legend- make space below the plot-raise bottom. there will be an label below the second last (bottom middle) ax, thanks to the bbox_to_anchor=(x, y) with a negative y-value.\n", "axes.flatten()[-2].legend(\n", " loc=\"upper center\",\n", " bbox_to_anchor=(0.10, -0.30, 0.75, 0.1),\n", " ncol=5,\n", " fontsize=\"x-small\",\n", ")\n", "\n", "if figureSave:\n", " plt.savefig(f\"{out_dir}/NB1_Fig2_big_river_season_{analysis_name}.png\", dpi=200)" ] }, { "cell_type": "markdown", "id": "9dc6be1a", "metadata": { "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "source": [ "### 3.3. scatter plots of monthly flow - obs vs sim" ] }, { "cell_type": "code", "execution_count": null, "id": "9541a4a6", "metadata": { "editable": true, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "slideshow": { "slide_type": "" }, "tags": [ "hide-input" ] }, "outputs": [], "source": [ "%%time\n", "\n", "if obs_available:\n", " # Monthly flow scatter plot\n", " fig, axes = plt.subplots(8, 3, figsize=(7.50, 15.00))\n", " plt.subplots_adjust(\n", " top=0.995, bottom=0.075, right=0.995, left=0.1, wspace=0.25, hspace=0.25\n", " )\n", "\n", " for ix, river_name in enumerate(big_river_24.keys()):\n", " row = ix // 3\n", " col = ix % 3\n", " axes[row, col].yaxis.set_major_formatter(FormatStrFormatter(\"%.0f\"))\n", "\n", " for jx, (case, meta) in enumerate(case_dic.items()):\n", "\n", " net_idx = gauge_plot[river_name][case][1]\n", " gaug_idx = gauge_plot[river_name][case][0]\n", "\n", " q_name = case_meta[meta[\"grid\"]][\"flow_name\"]\n", "\n", " if len(net_idx) == 1: # means vector\n", " ds_sim = month_data[case][q_name][:, net_idx]\n", " elif len(net_idx) == 2: # means 2d grid\n", " ds_sim = month_data[case][q_name][:, net_idx[0], net_idx[1]].squeeze()\n", "\n", " ds_obs = ds_q_obs_mon.sel(time=meta[\"time_period\"]).loc[:, gaug_idx]\n", "\n", " axes[row, col].plot(\n", " ds_obs, ds_sim, \"o\", label=case, markersize=4.0, alpha=0.4\n", " )\n", " if jx == 0:\n", " max_val = np.max(ds_obs)\n", " min_val = np.min(ds_obs)\n", " else:\n", " max_val = np.max([np.max(ds_sim), max_val])\n", " min_val = np.min([np.min(ds_sim), min_val])\n", "\n", " axes[row, col].plot(\n", " [min_val * 0.98, max_val * 1.02],\n", " [min_val * 0.98, max_val * 1.02],\n", " \":k\",\n", " linewidth=0.5,\n", " )\n", "\n", " axes[row, col].annotate(\n", " \"%d %s\" % (ix + 1, river_name),\n", " xy=(0.05, 0.875),\n", " xycoords=\"axes fraction\",\n", " fontsize=8,\n", " bbox=dict(facecolor=\"white\", edgecolor=\"None\", alpha=0.8),\n", " )\n", " if row == 7 and col == 1:\n", " axes[row, col].set_xlabel(\n", " \"Monthly reference discharge [m$^3$/s]\", fontsize=9\n", " )\n", " else:\n", " axes[row, col].set_xlabel(\"\")\n", "\n", " if col == 0 and row == 5:\n", " axes[row, col].set_ylabel(\n", " \"Monthly simulated discharge [m$^3$/s]\", fontsize=10\n", " )\n", " else:\n", " axes[row, col].set_ylabel(\"\")\n", " axes[row, col].tick_params(\"both\", labelsize=\"xx-small\")\n", "\n", " axes.flatten()[-2].legend(\n", " loc=\"upper center\",\n", " bbox_to_anchor=(0.10, -0.40, 0.75, 0.1),\n", " ncol=5,\n", " fontsize=\"x-small\",\n", " )\n", "\n", " if figureSave:\n", " plt.savefig(\n", " f\"{out_dir}/NB1_Fig3_big_river_month_scatter_{analysis_name}.png\", dpi=200\n", " )" ] }, { "cell_type": "markdown", "id": "99f5ba40", "metadata": { "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "source": [ "### 3.4. scatter plots of annual flow" ] }, { "cell_type": "code", "execution_count": null, "id": "b5ee5533", "metadata": { "editable": true, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "slideshow": { "slide_type": "" }, "tags": [ "hide-input" ] }, "outputs": [], "source": [ "%%time\n", "if obs_available:\n", " # annual flow scatter plot\n", " fig, axes = plt.subplots(8, 3, figsize=(7.50, 15.00))\n", " plt.subplots_adjust(\n", " top=0.995, bottom=0.075, right=0.995, left=0.1, wspace=0.25, hspace=0.25\n", " )\n", "\n", " for ix, river_name in enumerate(big_river_24.keys()):\n", " row = ix // 3\n", " col = ix % 3\n", " axes[row, col].yaxis.set_major_formatter(FormatStrFormatter(\"%.0f\"))\n", "\n", " for jx, (case, meta) in enumerate(case_dic.items()):\n", " net_idx = gauge_plot[river_name][case][1]\n", " gaug_idx = gauge_plot[river_name][case][0]\n", "\n", " q_name = case_meta[meta[\"grid\"]][\"flow_name\"]\n", "\n", " if len(net_idx) == 1: # means vector\n", " ds_sim = year_data[case][q_name][:, net_idx]\n", " elif len(net_idx) == 2: # means 2d grid\n", " ds_sim = year_data[case][q_name][:, net_idx[0], net_idx[1]]\n", "\n", " ds_obs = ds_q_obs_yr.sel(time=meta[\"time_period\"]).loc[:, gaug_idx]\n", "\n", " axes[row, col].plot(\n", " ds_obs, ds_sim, \"o\", label=case, markersize=4.0, alpha=0.4\n", " )\n", " if jx == 0:\n", " max_val = np.max(ds_obs)\n", " min_val = np.min(ds_obs)\n", " else:\n", " max_val = np.max([np.max(ds_sim), max_val])\n", " min_val = np.min([np.min(ds_sim), min_val])\n", "\n", " axes[row, col].plot(\n", " [min_val * 0.98, max_val * 1.02],\n", " [min_val * 0.98, max_val * 1.02],\n", " \":k\",\n", " linewidth=0.5,\n", " )\n", "\n", " axes[row, col].annotate(\n", " \"%d %s\" % (ix + 1, river_name),\n", " xy=(0.05, 0.875),\n", " xycoords=\"axes fraction\",\n", " fontsize=8,\n", " bbox=dict(facecolor=\"white\", edgecolor=\"None\", alpha=0.8),\n", " )\n", " if row == 7 and col == 1:\n", " axes[row, col].set_xlabel(\n", " \"Monthly reference discharge [m$^3$/s]\", fontsize=9\n", " )\n", " else:\n", " axes[row, col].set_xlabel(\"\")\n", "\n", " if col == 0 and row == 5:\n", " axes[row, col].set_ylabel(\n", " \"Monthly simulated discharge [m$^3$/s]\", fontsize=10\n", " )\n", " else:\n", " axes[row, col].set_ylabel(\"\")\n", " axes[row, col].tick_params(\"both\", labelsize=\"xx-small\")\n", "\n", " axes.flatten()[-2].legend(\n", " loc=\"upper center\",\n", " bbox_to_anchor=(0.10, -0.40, 0.75, 0.1),\n", " ncol=5,\n", " fontsize=\"x-small\",\n", " )\n", "\n", " if figureSave:\n", " plt.savefig(\n", " f\"{out_dir}/NB1_Fig4_big_river_annual_scatter_{analysis_name}.png\", dpi=200\n", " )" ] }, { "cell_type": "markdown", "id": "d2eac551", "metadata": { "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "source": [ "## 4. Anaysis for Large 50 rivers " ] }, { "cell_type": "markdown", "id": "a08e80ec", "metadata": { "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "source": [ "### 4.1 Summary tables" ] }, { "cell_type": "code", "execution_count": null, "id": "eeec623e", "metadata": { "editable": true, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "slideshow": { "slide_type": "" }, "tags": [ "hide-input" ] }, "outputs": [], "source": [ "%%time\n", "\n", "table_file = os.path.join(out_dir, f\"large50rivers_{analysis_name}.txt\")\n", "with open(table_file, \"w\") as f:\n", " # some meta\n", " f.write(\"# obs: Dai et al.(2019)\\n\")\n", " f.write(\"# vol: km^3/yr\\n\")\n", " f.write(\"# area: km^2\\n\")\n", "\n", " # headers\n", " f.write(\"No, river_name,\")\n", " f.write(\"{0: >15}_vol,\".format(\"obs\"))\n", " for jx, (case, meta) in enumerate(case_dic.items()):\n", " f.write(\"{0: >15}_vol,\".format(meta[\"grid\"]))\n", " f.write(\"{0: >15}_area\".format(\"obs\"))\n", " for jx, (case, meta) in enumerate(case_dic.items()):\n", " f.write(\",\")\n", " f.write(\"{0: >15}_area\".format(meta[\"grid\"]))\n", " f.write(\"\\n\")\n", "\n", " # data for each river\n", " for ix, river_name in enumerate(big_river_50.keys()):\n", "\n", " f.write(\"%02d,\" % (ix + 1))\n", " f.write(\"{0: >20}\".format(river_name))\n", "\n", " for jx, (case, meta) in enumerate(case_dic.items()):\n", " f.write(\",\")\n", " net_idx = gauge_plot[river_name][case][1]\n", " gaug_idx = gauge_plot[river_name][case][0]\n", "\n", " q_name = case_meta[meta[\"grid\"]][\"flow_name\"]\n", "\n", " if len(net_idx) == 1: # means vector\n", " qsim = (\n", " np.nanmean(year_data[case][q_name][:, net_idx].values)\n", " * 60\n", " * 60\n", " * 24\n", " * 365\n", " / 10**9\n", " )\n", " elif len(net_idx) == 2: # means 2d grid\n", " qsim = (\n", " np.nanmean(\n", " year_data[case][q_name][:, net_idx[0], net_idx[1]].values\n", " )\n", " * 60\n", " * 60\n", " * 24\n", " * 365\n", " / 10**9\n", " )\n", "\n", " if jx == 0:\n", " qobs = (\n", " ds_q_obs_yr_mean.loc[gaug_idx].values[0]\n", " * 60\n", " * 60\n", " * 24\n", " * 365\n", " / 10**9\n", " )\n", " f.write(\"{:19.1f},\".format(qobs))\n", "\n", " f.write(\"{:19.1f}\".format(qsim))\n", "\n", " for jx, (case, _) in enumerate(case_dic.items()):\n", " f.write(\",\")\n", " gaug_idx = gauge_plot[river_name][case][0]\n", "\n", " # just get gauge_id from qs_q for now\n", " gauge_id = ds_q[\"id\"].loc[gaug_idx].values[0]\n", " network_area = gauge_reach_lnk[case][\n", " gauge_reach_lnk[case][\"gauge_id\"] == gauge_id\n", " ][\"route_area\"].values[0]\n", "\n", " if jx == 0:\n", " area = ds_q[\"area_stn\"].loc[gaug_idx].values[0]\n", " f.write(\"{:20.0f},\".format(area))\n", "\n", " f.write(\"{:20.0f}\".format(network_area))\n", "\n", " f.write(\"\\n\")" ] }, { "cell_type": "markdown", "id": "fe4d9dfe", "metadata": { "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "source": [ "### 4.2. scatter plot of annual mean flow" ] }, { "cell_type": "code", "execution_count": null, "id": "279b5eaf", "metadata": { "editable": true, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "slideshow": { "slide_type": "" }, "tags": [ "hide-input" ] }, "outputs": [], "source": [ "if obs_available:\n", "\n", " df_yr_vol = pd.read_csv(\n", " table_file,\n", " skiprows=3,\n", " index_col=[\"No\"],\n", " skipinitialspace=True,\n", " )\n", "\n", " fig, axes = plt.subplots(1, figsize=(5.50, 5.50))\n", " regressor = LinearRegression()\n", " bias_text = \"\"\n", "\n", " for jx, (case, meta) in enumerate(case_dic.items()):\n", " # compute linear regression\n", " df_reg = df_yr_vol[[\"obs_vol\", f\"{meta['grid']}_vol\"]].dropna()\n", " regressor.fit(df_reg[[\"obs_vol\"]], df_reg[f\"{meta['grid']}_vol\"])\n", " y_pred = regressor.predict(df_reg[[\"obs_vol\"]])\n", "\n", " # compute bias over 50 sites\n", " diff = (df_yr_vol[f\"{meta['grid']}_vol\"] - df_yr_vol[\"obs_vol\"]).mean(\n", " axis=0, skipna=True\n", " )\n", "\n", " df_yr_vol.plot(\n", " ax=axes,\n", " kind=\"scatter\",\n", " x=\"obs_vol\",\n", " y=f\"{meta['grid']}_vol\",\n", " s=30,\n", " alpha=0.6,\n", " label=meta[\"grid\"],\n", " )\n", " # plt.plot(df_reg['obs_vol'], y_pred, color=color)\n", " bias_text = bias_text + f\"\\n{meta['grid']}: {diff:.1f} \"\n", "\n", " plt.text(\n", " 0.65, 0.30, \"bias [km3/yr]\", transform=axes.transAxes, verticalalignment=\"top\"\n", " )\n", " plt.text(\n", " 0.65, 0.30, f\"{bias_text} \", transform=axes.transAxes, verticalalignment=\"top\"\n", " )\n", "\n", " plt.axline((0, 0), slope=1, linestyle=\"--\", color=\"black\")\n", " axes.set_xscale(\"log\")\n", " axes.set_yscale(\"log\")\n", " axes.set_xlabel(\"reference flow\")\n", " axes.set_ylabel(\"Simulated flow\")\n", " axes.set_title(\"River Flow at stations [km^3/yr]\")\n", "\n", " if figureSave:\n", " plt.savefig(\n", " f\"{out_dir}/NB1_Fig5_50big_river_annual_flow_scatter_{analysis_name}.png\",\n", " dpi=200,\n", " )" ] }, { "cell_type": "markdown", "id": "fe5b4834", "metadata": { "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "source": [ "------\n", "## 5. Anaysis for all 922 sites " ] }, { "cell_type": "markdown", "id": "a8785758", "metadata": { "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "source": [ "### 5.1 Compute metris at all the sites (no plots nor tables)" ] }, { "cell_type": "code", "execution_count": null, "id": "147e11f7", "metadata": { "editable": true, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "slideshow": { "slide_type": "" }, "tags": [ "hide-input" ] }, "outputs": [], "source": [ "%%time\n", "if obs_available:\n", " # Merge gauge_reach lnk (dataframe) into gauge shapefile\n", " gauge_shp1 = {}\n", " for case, df in gauge_reach_lnk.items():\n", " # df = df.loc[(df['flag'] == 0)]\n", " df1 = df.drop(columns=[\"riv_name\"])\n", " gauge_shp1[case] = pd.merge(\n", " gauge_shp, df1, how=\"inner\", left_on=\"id\", right_on=\"gauge_id\"\n", " )\n", "\n", " # compute %bias, correlation, and RMSE at each site based on monthly\n", " mon_pbias = {}\n", " mon_corr = {}\n", " mon_rmse = {}\n", "\n", " for case, meta in case_dic.items():\n", " bias = np.full(len(gauge_shp1[case]), np.nan, dtype=float)\n", " corr = np.full(len(gauge_shp1[case]), np.nan, dtype=float)\n", " rmse = np.full(len(gauge_shp1[case]), np.nan, dtype=float)\n", "\n", " q_name = case_meta[meta[\"grid\"]][\"flow_name\"]\n", "\n", " for ix, row in gauge_shp1[case].iterrows():\n", " q_obs = np.full(\n", " meta[\"climo_nyrs\"] * 12, np.nan, dtype=float\n", " ) # dummy q_sim that will be replaced by actual data if exist\n", " q_sim = np.full(\n", " meta[\"climo_nyrs\"] * 12, np.nan, dtype=float\n", " ) # dummy q_sim that will be replaced by actual data if exist\n", "\n", " route_id = row[\"route_id\"]\n", " gauge_id = row[\"gauge_id\"]\n", "\n", " gauge_ix = np.argwhere(ds_q.id.values == gauge_id)\n", " if len(gauge_ix) == 1:\n", " gauge_ix = gauge_ix[0]\n", " q_obs = ds_q_obs_mon[:, gauge_ix].squeeze().values\n", "\n", " route_ix = np.argwhere(reachID[case] == route_id)\n", " if (\n", " len(route_ix) == 1\n", " ): # meaning there is flow site in network and simulation exist at this site\n", " route_ix = route_ix[0]\n", " if len(route_ix) == 1: # means vector\n", " q_sim = month_data[case][q_name][:, route_ix].squeeze().values\n", " elif len(route_ix) == 2: # means 2d grid\n", " q_sim = (\n", " month_data[case][q_name][:, route_ix[0], route_ix[1]]\n", " .squeeze()\n", " .values\n", " )\n", "\n", " # compute %bias, correlation, RMSE\n", " bias[ix] = metric.pbias(q_sim, q_obs)\n", " corr[ix] = metric.corr(q_sim, q_obs)\n", " rmse[ix] = metric.rmse(qsim, qobs)\n", "\n", " mon_pbias[case] = bias\n", " mon_corr[case] = corr\n", " mon_rmse[case] = rmse\n", "\n", " gauge_shp1[case][f\"bias_{meta['grid']}\"] = bias\n", " gauge_shp1[case][f\"corr_{meta['grid']}\"] = corr\n", " gauge_shp1[case][f\"rmse_{meta['grid']}\"] = rmse" ] }, { "cell_type": "markdown", "id": "b0e0bfc5", "metadata": { "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "source": [ "### 5.2. Spatial metric map " ] }, { "cell_type": "code", "execution_count": null, "id": "c0163ac0", "metadata": { "editable": true, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "slideshow": { "slide_type": "" }, "tags": [ "hide-input" ] }, "outputs": [], "source": [ "%%time\n", "if obs_available:\n", " # some local plot setups\n", " cbar_kwrgs = {\n", " \"bias\": {\n", " \"shrink\": 0.9,\n", " \"pad\": 0.02,\n", " \"orientation\": \"horizontal\",\n", " \"extend\": \"both\",\n", " },\n", " \"corr\": {\n", " \"shrink\": 0.9,\n", " \"pad\": 0.02,\n", " \"orientation\": \"horizontal\",\n", " \"extend\": \"min\",\n", " },\n", " \"rmse\": {\n", " \"shrink\": 0.9,\n", " \"pad\": 0.02,\n", " \"orientation\": \"horizontal\",\n", " \"extend\": \"max\",\n", " },\n", " }\n", "\n", " meta = {\n", " \"bias\": {\n", " \"name\": \"%bias\",\n", " \"vmin\": -100,\n", " \"vmax\": 100,\n", " \"cm\": colors.cmap_RedGrayBlue,\n", " },\n", " \"corr\": {\"name\": \"correlation\", \"vmin\": 0.2, \"vmax\": 1, \"cm\": colors.cmap_corr},\n", " \"rmse\": {\"name\": \"RMSE\", \"vmin\": 0, \"vmax\": 1000, \"cm\": mpl.cm.turbo},\n", " }\n", "\n", " for error_metric in [\"rmse\", \"bias\", \"corr\"]:\n", " for case, meta in case_dic.items():\n", " fig, ax = plt.subplots(\n", " 1, figsize=(7.5, 4.0), subplot_kw={\"projection\": ccrs.PlateCarree()}\n", " )\n", "\n", " ax.add_feature(\n", " rivers_50m, facecolor=\"None\", edgecolor=\"b\", lw=0.5, alpha=0.3\n", " )\n", " ax.add_feature(land, facecolor=\"white\", edgecolor=\"grey\")\n", "\n", " gauge_shp1[case].plot(\n", " ax=ax,\n", " column=f\"{error_metric}_{meta['grid']}\",\n", " markersize=10,\n", " cmap=meta[error_metric][\"cm\"],\n", " vmin=meta[error_metric][\"vmin\"],\n", " vmax=meta[error_metric][\"vmax\"],\n", " )\n", "\n", " ax.set_title(\"%s %s\" % (case, meta[error_metric][\"name\"]))\n", "\n", " points = ax.collections[-1]\n", " plt.colorbar(points, ax=ax, **cbar_kwrgs[error_metric])\n", "\n", " plt.tight_layout()\n", "\n", " if figureSave:\n", " plt.savefig(\n", " f\"{out_dir}/NB1_Fig6_{error_metric}_{case}_map.png\", dpi=200\n", " )" ] }, { "cell_type": "markdown", "id": "d09df7cc", "metadata": { "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "source": [ "### 5.4 Boxplots of Error metrics (RMSE, %bias, and correlation coefficient)\n", "Boxplot distribution is based on metrics sampled at 922 sites.\n", "\n", "The box extends from the Q1 to Q3 quartile values of the data, with a line at the median (Q2). \n", "The whiskers extend from the edges of box to show the range of the data. \n", "By default, they extend no more than 1.5 * IQR (IQR = Q3 - Q1) from the edges of the box, ending at the farthest data point within that interval. \n", "Outliers are plotted as separate dots." ] }, { "cell_type": "code", "execution_count": null, "id": "512ff9cd", "metadata": { "editable": true, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "slideshow": { "slide_type": "" }, "tags": [ "hide-input" ] }, "outputs": [], "source": [ "if obs_available:\n", " boxprops = {\"linestyle\": \"-\", \"linewidth\": 1.5, \"color\": \"blue\"}\n", " medianprops = {\"linestyle\": \"-\", \"linewidth\": 1.5, \"color\": \"red\"}\n", "\n", " for error_metric in [\"rmse\", \"bias\", \"corr\"]:\n", " column_stat = []\n", " gauge_shp_all_case = gauge_shp.copy(deep=True)\n", " for case, meta in case_dic.items():\n", " gauge_shp_all_case = gauge_shp_all_case.merge(\n", " gauge_shp1[case][[\"id\", f\"{error_metric}_{meta['grid']}\"]],\n", " left_on=\"id\",\n", " right_on=\"id\",\n", " )\n", " column_stat.append(f\"{error_metric}_{meta['grid']}\")\n", " fig, ax = plt.subplots(1, figsize=(6.5, 4))\n", " gauge_shp_all_case.boxplot(\n", " ax=ax,\n", " column=column_stat,\n", " boxprops=boxprops,\n", " medianprops=medianprops,\n", " sym=\".\",\n", " )\n", "\n", " xticklabels = [label[len(error_metric) + 1 :] for label in column_stat]\n", " ax.set_xticklabels(xticklabels)\n", "\n", " if error_metric == \"rmse\":\n", " ax.set_ylim([0, 1000])\n", " ax.set_title(\"RMSE [m3/s]\")\n", " elif error_metric == \"bias\":\n", " ax.set_ylim([-150, 250])\n", " ax.set_title(\"%bias [%]\")\n", " elif error_metric == \"corr\":\n", " ax.set_ylim([-0.2, 1])\n", " ax.set_title(\"correlation\")\n", "\n", " if figureSave:\n", " plt.savefig(f\"{out_dir}/NB1_Fig7_{error_metric}_boxplot.png\", dpi=150)" ] }, { "cell_type": "code", "execution_count": null, "id": "50c45b9c", "metadata": { "editable": true, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "slideshow": { "slide_type": "" }, "tags": [ "hide-input" ] }, "outputs": [], "source": [ "if client and not cupid_run:\n", " client.shutdown()" ] } ], "metadata": { "kernelspec": { "display_name": "cupid-analysis", "language": "python", "name": "cupid-analysis" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.4" }, "papermill": { "duration": 11.5344, "end_time": "2025-11-25T01:42:57.037106", "exception": true, "input_path": "/glade/derecho/scratch/richling/tmp/tmp6czgsu6m.ipynb", "output_path": "/glade/work/richling/cupid-test-2/CUPiD/examples/key_metrics/computed_notebooks/rof/global_discharge_gauge_compare_obs.ipynb", "parameters": { "CESM_output_dir": "/glade/campaign/cesm/development/cross-wg/diagnostic_framework/CESM_output_for_testing", "analysis_name": "", "base_case_name": "b.e30_alpha07c_cesm.B1850C_LTso.ne30_t232_wgx3.228", "base_case_nickname": "BLT1850_228", "base_climo_end_year": 45, "base_climo_start_year": 36, "base_end_date": "0045-01-01", "base_start_date": "0001-01-01", "case_name": "b.e30_alpha07c_cesm.B1850C_LTso.ne30_t232_wgx3.232", "case_nickname": "BLT1850_232", "climo_end_year": 21, "climo_nyears": 10, "climo_start_year": 11, "end_date": "0021-01-01", "figureSave": false, "grid_name": "f09_f09_mosart", "hist_str": "h0a", "lc_kwargs": { "threads_per_worker": 1 }, "obs_data_dir": "/glade/campaign/cesm/development/cross-wg/diagnostic_framework/CUPiD_obs_data", "product": "/glade/work/richling/cupid-test-2/CUPiD/examples/key_metrics/computed_notebooks//rof/global_discharge_gauge_compare_obs.ipynb", "serial": false, "start_date": "0001-01-01", "subset_kwargs": {}, "ts_dir": null }, "start_time": "2025-11-25T01:42:45.502706" } }, "nbformat": 4, "nbformat_minor": 5 }