{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "a4eebff4",
   "metadata": {},
   "source": [
    "### What this script does\n",
    "\n",
    "- Opens LV0/LV1 cloud‐radar NetCDF files and prints dataset metadata (dims, vars, attrs).\n",
    "\n",
    "- Scans a chosen day folder, loads time series (SurfTemp, RH, WS, LWP, SurfPres, Rain), and makes per-file plots + an all-day plot.\n",
    "\n",
    "- Loads vertical profiles (TProf, AHProf, RHProf) with their altitude grids (TAlts, HAlts) and renders per-file profile images and a combined day view.\n",
    "\n",
    "- Builds a per-timestamp profile table (temperature/ humidity profiles + surface P/T), converts radar time to UTC.\n",
    "\n",
    "- Computes pressure profiles by hydrostatic integration from surface pressure, saturation vapor pressure, vapor pressure, specific humidity, and virtual potential temperature (θv) profiles.\n",
    "\n",
    "- Resamples all profiles to 10-minute averages and saves to a Parquet dataset.\n",
    "\n",
    "- Extracts cloud-base height (CBH) or Rain time series from LV1 files, plots, resamples Rain to 10-min, and saves to CSV.\n",
    "\n",
    "- Provides a few presentation plots of a selected 10-min profile (Temperature, θv, RH, qv vs altitude).\n",
    "\n",
    "#### Lines you should change:\n",
    "\n",
    "- Single LV0 file you inspect: file_path = r\"C:\\path\\to\\your\\Cloud_radar\\2024-05\\2024-05-03\\240503_000002_P00_ZEN.LV0.NC\"\n",
    "\n",
    "- Folder holding LV1 files for a day: folder_path = r\"C:\\path\\to\\your\\Cloud_radar\\2024-05\\2024-05-03\"\n",
    "\n",
    "- Base + month + day blocks used in several sections (CBH, Rain, vertical profiles): \n",
    "    - base_dir_cr = r\"C:\\path\\to\\your\\Cloud_radar\"\n",
    "    - month       = \"2024-05\"       <- change to target month\n",
    "    - day         = \"2024-05-02\"    <- change to target day\n",
    "    - day_folder_path = os.path.join(base_dir_cr, month, day)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "61cefd57",
   "metadata": {},
   "outputs": [],
   "source": [
    "import netCDF4 as nc\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import os\n",
    "from datetime import datetime, timedelta\n",
    "from netCDF4 import Dataset\n",
    "from reportlab.lib.pagesizes import letter\n",
    "from reportlab.pdfgen import canvas\n",
    "from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle\n",
    "from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer\n",
    "import matplotlib.dates as mdates\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "155197c5",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Constants\n",
    "C_p = 1005  # Specific heat capacity of dry air at constant pressure (J/kg/K)\n",
    "g = 9.81    # Gravitational acceleration (m/s^2)\n",
    "Ttrip = 273.16  # Triple point temperature in Kelvin\n",
    "Rd=287.04\n",
    "Rv=461.5\n",
    "epsilon=(Rv/Rd)-1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "541bc008",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Function to calculate pressure at heights\n",
    "def calculate_pressure_at_heights(df):\n",
    "    calculated_pressures = []  # To store pressure values for all times and altitudes\n",
    "    \n",
    "    # Loop through each row in the dataframe, accessing each profile (altitudes and temperature)\n",
    "    for idx, row in df.iterrows():\n",
    "        # Extract the altitude and temperature profiles for this timestamp\n",
    "        temperature_altitudes = row['temperature_altitudes']\n",
    "        temperature_profile = row['temperature_profile']\n",
    "        surface_pressure = row['surf_pres']  # Surface pressure for this timestamp\n",
    "        \n",
    "        # Ensure that the altitudes are sorted, just in case\n",
    "        sorted_indices = np.argsort(temperature_altitudes)\n",
    "        sorted_altitudes = np.array(temperature_altitudes)[sorted_indices]\n",
    "        sorted_temperatures = np.array(temperature_profile)[sorted_indices]\n",
    "        \n",
    "        # Start with the surface pressure (P0)\n",
    "        P0 = surface_pressure  # Surface pressure (hPa)\n",
    "        pressures = [P0]  # First pressure is the surface pressure\n",
    "\n",
    "        # Perform second-order (Trapezoidal Rule) integration for each altitude step\n",
    "        for i in range(1, len(sorted_altitudes)):\n",
    "            z1 = sorted_altitudes[i - 1]\n",
    "            z2 = sorted_altitudes[i]\n",
    "            T1 = sorted_temperatures[i - 1]\n",
    "            T2 = sorted_temperatures[i]\n",
    "            P1 = pressures[-1]  # Previous pressure value\n",
    "            \n",
    "            delta_z = z2 - z1  # Altitude difference\n",
    "            \n",
    "            # First order estimate of P2 based on P1\n",
    "            P2_guess = P1 * np.exp(-g * delta_z / (Rd * T1))\n",
    "            \n",
    "            # Trapezoidal rule correction for P2\n",
    "            delta_P = 0.5 * (-(g * P1 / (Rd * T1)) + -(g * P2_guess / (Rd * T2))) * delta_z\n",
    "            P2 = P1 + delta_P\n",
    "            pressures.append(P2)\n",
    "        \n",
    "        # Append the calculated pressures for this profile\n",
    "        calculated_pressures.append(pressures)\n",
    "    \n",
    "    return calculated_pressures\n",
    "\n",
    "# Function to calculate saturation vapor pressure\n",
    "def calculate_saturation_vapor_pressure(T):\n",
    "    es = 610.78 * np.exp(17.2694 * (T - Ttrip) / (T - 35.86))\n",
    "    return es\n",
    "\n",
    "# Function to calculate specific humidity (qv)\n",
    "def calculate_specific_humidity(ev, p):\n",
    "    return ev * 1000 / (p * 100 + (((Rv / Rd) - 1) * (p * 100 - ev)))\n",
    "\n",
    "\n",
    "# Function to calculate potential temperature (theta) based on current pressure level and surface pressure\n",
    "def calculate_potential_temperature(T, P_i, P_0):\n",
    "    \"\"\"\n",
    "    Calculate potential temperature (theta) given temperature T (K) and pressures at level P_i (hPa) and surface pressure P_0 (hPa).\n",
    "    \"\"\"\n",
    "    return T * (P_0 / P_i) ** (Rd / C_p)\n",
    "\n",
    "# Function to calculate theta_v for each altitude level\n",
    "def calculate_theta_v_alternative_by_altitude(row):\n",
    "    theta_v_profile = []  # Store results for this timestamp\n",
    "    temperature_profile = row['temperature_profile']  # Temperature profile for this timestamp\n",
    "    calculated_pressures = row['calculated_pressures']  # Pressures for this timestamp\n",
    "    specific_humidity = row['specific_humidity']  # Specific humidity for this timestamp\n",
    "    \n",
    "    for i in range(len(temperature_profile)):\n",
    "        T = temperature_profile[i]  # Temperature at current altitude (K)\n",
    "        P_i = calculated_pressures[i]  # Pressure at current altitude (hPa)\n",
    "        qv = specific_humidity[i]  # Specific humidity at current altitude (kg/kg)\n",
    "        \n",
    "        if i == 0:  # Surface level\n",
    "            P_0 = row['surf_pres']  # Surface pressure (hPa)\n",
    "            theta = T  # Potential temperature at the surface is just T\n",
    "        else:\n",
    "            P_0 = row['surf_pres']  # Surface pressure remains constant\n",
    "            theta = calculate_potential_temperature(T, P_i, P_0)  # Potential temperature at altitude\n",
    "        \n",
    "        # Calculate virtual potential temperature (theta_v) for this altitude\n",
    "        theta_v_alt = theta * (1 + (epsilon * qv/1000))  # qv is in kg/kg\n",
    "        \n",
    "        # Append the calculated theta_v to the profile list\n",
    "        theta_v_profile.append(theta_v_alt)\n",
    "    \n",
    "    return theta_v_profile"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "930017a9",
   "metadata": {},
   "outputs": [],
   "source": [
    "#Edit before running!!\n",
    "# Open the NetCDF file\n",
    "#Single LV0 file you inspect: \n",
    "file_path = r\"C:\\path\\to\\your\\Cloud_radar\\2024-05\\2024-05-03\\240503_000002_P00_ZEN.LV0.NC\" #or other day\n",
    "dataset = nc.Dataset(file_path, mode='r')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0693e20f",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# Print dataset information\n",
    "print(\"NetCDF Dataset Information:\")\n",
    "print(dataset)\n",
    "\n",
    "# Print global attributes\n",
    "print(\"\\nGlobal Attributes:\")\n",
    "for attr_name in dataset.ncattrs():\n",
    "    print(f\"{attr_name}: {getattr(dataset, attr_name)}\")\n",
    "\n",
    "# Print dimensions\n",
    "print(\"\\nDimensions:\")\n",
    "for dim_name, dim in dataset.dimensions.items():\n",
    "    print(f\"{dim_name}: {len(dim)}\")\n",
    "\n",
    "# Print variables and their attributes\n",
    "print(\"\\nVariables:\")\n",
    "for var_name, var in dataset.variables.items():\n",
    "    print(f\"\\nVariable Name: {var_name}\")\n",
    "    print(f\"Dimensions: {var.dimensions}\")\n",
    "    print(f\"Shape: {var.shape}\")\n",
    "    print(f\"Data Type: {var.dtype}\")\n",
    "    for attr_name in var.ncattrs():\n",
    "        print(f\"    {attr_name}: {getattr(var, attr_name)}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "75c006f9",
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "#Edit this before running!!!\n",
    "# Path to the folder containing the data files\n",
    "#Folder holding LV1 files for a day: \n",
    "folder_path = r\"C:\\path\\to\\your\\Cloud_radar\\2024-05\\2024-05-03\" #or other day\n",
    "file_list = [f for f in os.listdir(folder_path) if f.endswith(\"LV1.nc\")]\n",
    "\n",
    "print(\"Files found in the folder:\")\n",
    "print(len(file_list))\n",
    "# Loop through each file\n",
    "for file_name in file_list:\n",
    "    # Open the NetCDF file\n",
    "    file_path = os.path.join(folder_path, file_name)\n",
    "    dataset = Dataset(file_path)\n",
    "\n",
    "    # Read necessary variables\n",
    "    surf_temp_data = dataset.variables['SurfTemp'][:]\n",
    "    rh_data = dataset.variables['SurfRelHum'][:]\n",
    "    surf_ws_data = dataset.variables['SurfWS'][:]\n",
    "    lwp_data = dataset.variables['LWP'][:]\n",
    "    surf_pres_data = dataset.variables['SurfPres'][:]\n",
    "    rain_rate_data = dataset.variables['Rain'][:]\n",
    "    \n",
    "    # Get time data\n",
    "    time_data = dataset.variables['Time'][:]\n",
    "    timems_data = dataset.variables['Timems'][:]\n",
    "    start_time = datetime(2001, 1, 1, 0, 0, 0)\n",
    "    time = [start_time + timedelta(seconds=int(time_data[i]), milliseconds=int(timems_data[i])) for i in range(len(time_data))]\n",
    "    \n",
    "    # Plot variables\n",
    "    plt.figure(figsize=(12, 8))\n",
    "    \n",
    "    plt.subplot(3, 2, 1)\n",
    "    plt.plot(time, surf_temp_data, color='b')\n",
    "    plt.title('Surface Temperature')\n",
    "    plt.xlabel('Time')\n",
    "    plt.ylabel('Temperature (K)')\n",
    "    \n",
    "    plt.subplot(3, 2, 2)\n",
    "    plt.plot(time, rh_data, color='r')\n",
    "    plt.title('Relative Humidity')\n",
    "    plt.xlabel('Time')\n",
    "    plt.ylabel('Relative Humidity (%)')\n",
    "    \n",
    "    plt.subplot(3, 2, 3)\n",
    "    plt.plot(time, surf_ws_data, color='g')\n",
    "    plt.title('Surface Wind Speed')\n",
    "    plt.xlabel('Time')\n",
    "    plt.ylabel('Wind Speed (m/s)')\n",
    "    \n",
    "    plt.subplot(3, 2, 4)\n",
    "    plt.plot(time, lwp_data, color='m')\n",
    "    plt.title('Liquid Water Path')\n",
    "    plt.xlabel('Time')\n",
    "    plt.ylabel('LWP (g/m^2)')\n",
    "    \n",
    "    plt.subplot(3, 2, 5)\n",
    "    plt.plot(time, surf_pres_data, color='c')\n",
    "    plt.title('Surface Pressure')\n",
    "    plt.xlabel('Time')\n",
    "    plt.ylabel('Pressure (hPa)')\n",
    "    \n",
    "    plt.subplot(3, 2, 6)\n",
    "    plt.plot(time, rain_rate_data, color='y')\n",
    "    plt.title('Rain Rate')\n",
    "    plt.xlabel('Time')\n",
    "    plt.ylabel('Rain Rate (mm/hr)')\n",
    "    \n",
    "    plt.tight_layout()\n",
    "    \n",
    "    # Save plot\n",
    "    plot_name = os.path.splitext(file_name)[0] + '_plot.png'\n",
    "    plt.savefig(os.path.join(folder_path, plot_name))\n",
    "    plt.close()\n",
    "    \n",
    "  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a07089d6",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(dataset['TAlts'][:])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cf535169",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Initialize lists to store data\n",
    "all_surf_temp_data = []\n",
    "all_rh_data = []\n",
    "all_surf_ws_data = []\n",
    "all_lwp_data = []\n",
    "all_surf_pres_data = []\n",
    "all_rain_rate_data = []\n",
    "all_time = []\n",
    "# Initialize lists to store data for profiles\n",
    "all_TProf_data = []\n",
    "all_AHProf_data = []\n",
    "all_RHProf_data = []\n",
    "all_profile_time_utc = []\n",
    "\n",
    "# Loop through each file\n",
    "for file_name in file_list:\n",
    "    # Open the NetCDF file\n",
    "    file_path = os.path.join(folder_path, file_name)\n",
    "    dataset = Dataset(file_path)\n",
    "\n",
    "    # Read necessary variables\n",
    "    surf_temp_data = dataset.variables['SurfTemp'][:]\n",
    "    rh_data = dataset.variables['SurfRelHum'][:]\n",
    "    surf_ws_data = dataset.variables['SurfWS'][:]\n",
    "    lwp_data = dataset.variables['LWP'][:]\n",
    "    surf_pres_data = dataset.variables['SurfPres'][:]\n",
    "    rain_rate_data = dataset.variables['Rain'][:]\n",
    "\n",
    "    # Get time data\n",
    "    time_data = dataset.variables['Time'][:]\n",
    "    timems_data = dataset.variables['Timems'][:]\n",
    "    start_time = datetime(2001, 1, 1, 0, 0, 0)\n",
    "    time = [start_time + timedelta(seconds=int(time_data[i]), milliseconds=int(timems_data[i])) for i in range(len(time_data))]\n",
    "\n",
    "    # Append data to lists\n",
    "    all_surf_temp_data.extend(surf_temp_data)\n",
    "    all_rh_data.extend(rh_data)\n",
    "    all_surf_ws_data.extend(surf_ws_data)\n",
    "    all_lwp_data.extend(lwp_data)\n",
    "    all_surf_pres_data.extend(surf_pres_data)\n",
    "    all_rain_rate_data.extend(rain_rate_data)\n",
    "    all_time.extend(time)\n",
    "    #Read profile variables\n",
    "    # Read profile variables\n",
    "    TProf_data = dataset.variables['TProf'][:]\n",
    "    AHProf_data = dataset.variables['AHProf'][:]\n",
    "    RHProf_data = dataset.variables['RHProf'][:]\n",
    "    profile_time_data = dataset.variables['Time'][:]\n",
    "\n",
    "    # Convert profile time data to readable UTC format\n",
    "    profile_start_time = datetime(2001, 1, 1, 0, 0, 0)\n",
    "    profile_time_utc = [profile_start_time + timedelta(seconds=int(t)) for t in profile_time_data]\n",
    "    # Append profile data to lists\n",
    "    all_TProf_data.append(TProf_data)\n",
    "    all_profile_time_utc.extend(profile_time_utc)\n",
    "    all_AHProf_data.append(AHProf_data)\n",
    "    all_RHProf_data.append(RHProf_data)\n",
    "    # Get altitude data\n",
    "    TAlt_data = dataset.variables['TAlts'][:]\n",
    "    HAlt_data = dataset.variables['HAlts'][:]\n",
    "\n",
    "    # Plot the profiles\n",
    "    fig, axs = plt.subplots(3, 1, figsize=(15, 15))\n",
    "\n",
    "    # Temperature Profile\n",
    "    cax1 = axs[0].imshow(TProf_data.T, extent=[np.min(profile_time_utc), np.max(profile_time_utc), np.min(TAlt_data), np.max(TAlt_data)], aspect='auto', origin='lower')\n",
    "    axs[0].set_title('Temperature Profile')\n",
    "    axs[0].set_xlabel('Time (UTC)')\n",
    "    axs[0].set_ylabel('Altitude (m)')\n",
    "    plt.colorbar(cax1, ax=axs[0], label='Temperature (K)')\n",
    "\n",
    "    # Absolute Humidity Profile\n",
    "    cax2 = axs[1].imshow(AHProf_data.T, extent=[np.min(profile_time_utc), np.max(profile_time_utc), np.min(HAlt_data), np.max(HAlt_data)], aspect='auto', origin='lower')\n",
    "    axs[1].set_title('Absolute Humidity Profile')\n",
    "    axs[1].set_xlabel('Time (UTC)')\n",
    "    axs[1].set_ylabel('Altitude (m)')\n",
    "    plt.colorbar(cax2, ax=axs[1], label='Absolute Humidity (g/m^3)')\n",
    "\n",
    "    # Relative Humidity Profile\n",
    "    cax3 = axs[2].imshow(RHProf_data.T, extent=[np.min(profile_time_utc), np.max(profile_time_utc), np.min(HAlt_data), np.max(HAlt_data)], aspect='auto', origin='lower')\n",
    "    axs[2].set_title('Relative Humidity Profile')\n",
    "    axs[2].set_xlabel('Time (UTC)')\n",
    "    axs[2].set_ylabel('Altitude (m)')\n",
    "    plt.colorbar(cax3, ax=axs[2], label='Relative Humidity (%)')\n",
    "\n",
    "    # Adjust layout\n",
    "    plt.tight_layout()\n",
    "    # Save plot\n",
    "    profile_plot_name = os.path.splitext(file_name)[0] + '_profile_plot.png'\n",
    "    plt.savefig(os.path.join(folder_path, profile_plot_name))\n",
    "    plt.close()\n",
    "\n",
    "print(\"Plots created for all files.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "51abb34c",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "\n",
    "# Plot variables for the entire day\n",
    "plt.figure(figsize=(12, 8))\n",
    "\n",
    "plt.subplot(3, 2, 1)\n",
    "plt.plot(all_time, all_surf_temp_data, color='b')\n",
    "plt.title('Surface Temperature')\n",
    "plt.xlabel('Time')\n",
    "plt.ylabel('Temperature (K)')\n",
    "plt.xticks(rotation=45)\n",
    "\n",
    "plt.subplot(3, 2, 2)\n",
    "plt.plot(all_time, all_rh_data, color='r')\n",
    "plt.title('Relative Humidity')\n",
    "plt.xlabel('Time')\n",
    "plt.ylabel('Relative Humidity (%)')\n",
    "plt.xticks(rotation=45)\n",
    "\n",
    "plt.subplot(3, 2, 3)\n",
    "plt.plot(all_time, all_surf_ws_data, color='g')\n",
    "plt.title('Surface Wind Speed')\n",
    "plt.xlabel('Time')\n",
    "plt.ylabel('Wind Speed (m/s)')\n",
    "plt.xticks(rotation=45)\n",
    "\n",
    "plt.subplot(3, 2, 4)\n",
    "plt.plot(all_time, all_lwp_data, color='m')\n",
    "plt.title('Liquid Water Path')\n",
    "plt.xlabel('Time')\n",
    "plt.ylabel('LWP (g/m^2)')\n",
    "plt.xticks(rotation=45)\n",
    "\n",
    "plt.subplot(3, 2, 5)\n",
    "plt.plot(all_time, all_surf_pres_data, color='c')\n",
    "plt.title('Surface Pressure')\n",
    "plt.xlabel('Time')\n",
    "plt.ylabel('Pressure (hPa)')\n",
    "plt.xticks(rotation=45)\n",
    "\n",
    "plt.subplot(3, 2, 6)\n",
    "plt.plot(all_time, all_rain_rate_data, color='y')\n",
    "plt.title('Rain Rate')\n",
    "plt.xlabel('Time')\n",
    "plt.ylabel('Rain Rate (mm/hr)')\n",
    "plt.xticks(rotation=45)\n",
    "\n",
    "plt.tight_layout()\n",
    "\n",
    "# Save plot\n",
    "plot_name = 'daily_plots.png'\n",
    "plt.savefig(os.path.join(folder_path, plot_name))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "369ad25d",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Combine all profile data for the whole day\n",
    "all_TProf_data = np.concatenate(all_TProf_data, axis=0)\n",
    "all_AHProf_data = np.concatenate(all_AHProf_data, axis=0)\n",
    "all_RHProf_data = np.concatenate(all_RHProf_data, axis=0)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "37069fa7",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "# Plot combined profiles for the whole day\n",
    "fig, axs = plt.subplots(3, 1, figsize=(15, 15))\n",
    "\n",
    "# Combined Temperature Profile\n",
    "cax1 = axs[0].imshow(all_TProf_data.T, extent=[np.min(all_profile_time_utc), np.max(all_profile_time_utc), np.min(TAlt_data), np.max(TAlt_data)], aspect='auto', origin='lower')\n",
    "axs[0].set_title('Combined Temperature Profile for the Whole Day')\n",
    "axs[0].set_xlabel('Time (UTC)')\n",
    "axs[0].set_ylabel('Altitude (m)')\n",
    "plt.colorbar(cax1, ax=axs[0], label='Temperature (K)')\n",
    "\n",
    "# Combined Absolute Humidity Profile\n",
    "cax2 = axs[1].imshow(all_AHProf_data.T, extent=[np.min(all_profile_time_utc), np.max(all_profile_time_utc), np.min(HAlt_data), np.max(HAlt_data)], aspect='auto', origin='lower')\n",
    "axs[1].set_title('Combined Absolute Humidity Profile for the Whole Day')\n",
    "axs[1].set_xlabel('Time (UTC)')\n",
    "axs[1].set_ylabel('Altitude (m)')\n",
    "plt.colorbar(cax2, ax=axs[1], label='Absolute Humidity (g/m^3)')\n",
    "\n",
    "# Combined Relative Humidity Profile\n",
    "cax3 = axs[2].imshow(all_RHProf_data.T, extent=[np.min(all_profile_time_utc), np.max(all_profile_time_utc), np.min(HAlt_data), np.max(HAlt_data)], aspect='auto', origin='lower')\n",
    "axs[2].set_title('Combined Relative Humidity Profile for the Whole Day')\n",
    "axs[2].set_xlabel('Time (UTC)')\n",
    "axs[2].set_ylabel('Altitude (m)')\n",
    "plt.colorbar(cax3, ax=axs[2], label='Relative Humidity (%)')\n",
    "\n",
    "# Adjust layout\n",
    "plt.tight_layout()\n",
    "# Save combined profile plot\n",
    "combined_profile_plot_name = 'combined_daily_profile_plot.png'\n",
    "plt.savefig(os.path.join(folder_path, combined_profile_plot_name))\n",
    "plt.close()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "212ac60f",
   "metadata": {},
   "source": [
    "### Explore cloud base"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "50680736",
   "metadata": {},
   "outputs": [],
   "source": [
    "'''\n",
    "#Edit before running!!\n",
    "# Base directory containing monthly folders\n",
    "base_dir_cr = r\"C:\\path\\to\\your\\Cloud_radar\"\n",
    "month       = \"2024-05\"       # <- change to target month\n",
    "day         = \"2024-05-02\"    # <- change to target day\n",
    "day_folder_path = os.path.join(base_dir_cr, month, day)\n",
    "\n",
    "\n",
    "# Construct the directory path for the specific date\n",
    "day_folder_path = os.path.join(base_dir_cr, month, day)\n",
    "\n",
    "# Print the constructed path for debugging\n",
    "print(f\"Constructed day folder path: {day_folder_path}\")\n",
    "\n",
    "# Define the specific file name you want to focus on\n",
    "file_name = '240503_000002_P00_ZEN.LV0.nc'  # Replace with the actual file name if known\n",
    "file_path = os.path.join(day_folder_path, file_name)\n",
    "\n",
    "# Create a PDF file\n",
    "pdf_path = 'LV0_NC_Variables_Report.pdf'\n",
    "doc = SimpleDocTemplate(pdf_path, pagesize=letter)\n",
    "story = []\n",
    "\n",
    "# Define styles\n",
    "styles = getSampleStyleSheet()\n",
    "title_style = styles['Title']\n",
    "normal_style = styles['Normal']\n",
    "\n",
    "# Check if the file exists\n",
    "if not os.path.exists(file_path):\n",
    "    print(f\"File {file_path} does not exist.\")\n",
    "else:\n",
    "    print(f\"File {file_path} exists. Generating report...\")\n",
    "\n",
    "    # Open the NetCDF file\n",
    "    dataset = Dataset(file_path, 'r')\n",
    "\n",
    "    # Add file name as title\n",
    "    story.append(Paragraph(f'File: {file_name}', title_style))\n",
    "    story.append(Spacer(1, 12))\n",
    "\n",
    "    # Add variable information\n",
    "    story.append(Paragraph('Variables and Attributes:', title_style))\n",
    "    story.append(Spacer(1, 12))\n",
    "    \n",
    "    for var_name, var in dataset.variables.items():\n",
    "        var_info = f\"Variable Name: {var_name}\\n\"\n",
    "        var_info += f\"   Dimensions: {var.dimensions}\\n\"\n",
    "        var_info += f\"   Shape: {var.shape}\\n\"\n",
    "        var_info += f\"   Data Type: {var.dtype}\\n\"\n",
    "        \n",
    "        # Add attributes\n",
    "        for attr_name in var.ncattrs():\n",
    "            var_info += f\"    {attr_name}: {getattr(var, attr_name)}\\n\"\n",
    "        \n",
    "        var_info += \"\\n\"\n",
    "        \n",
    "        story.append(Paragraph(var_info, normal_style))\n",
    "\n",
    "    # Close the dataset\n",
    "    dataset.close()\n",
    "\n",
    "    # Build the PDF\n",
    "    doc.build(story)\n",
    "\n",
    "   # print(f\"PDF saved to {pdf_path}\")\n",
    "'''"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "051161f5",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "'''\n",
    "# Initialize a list to store the CBH data\n",
    "cbh_data = []\n",
    "\n",
    "# Check if the directory exists\n",
    "if not os.path.exists(day_folder_path):\n",
    "    print(f\"Directory {day_folder_path} does not exist.\")\n",
    "else:\n",
    "    print(f\"Directory {day_folder_path} exists. Searching for LV1.nc files...\")\n",
    "\n",
    "    # List all files in the specific date folder for debugging\n",
    "    all_files = os.listdir(day_folder_path)\n",
    "    print(f\"All files in {day_folder_path}: {all_files}\")\n",
    "\n",
    "    # Search for LV1.nc files in the specific date folder\n",
    "    for file_name in all_files:\n",
    "        if file_name.lower().endswith('lv1.nc'):\n",
    "            file_path = os.path.join(day_folder_path, file_name)\n",
    "            print(f\"Found file: {file_path}\")  # Debug: Print the file path\n",
    "            \n",
    "            # Open the NetCDF file\n",
    "            dataset = Dataset(file_path, 'r')\n",
    "            \n",
    "            # Extract the CBH variable and the time variable\n",
    "            cbh = dataset.variables['CBH'][:]\n",
    "            time_data = dataset.variables['Time'][:]\n",
    "            timems_data = dataset.variables['Timems'][:]\n",
    "            start_time = datetime(2001, 1, 1, 0, 0, 0)\n",
    "            time = [start_time + timedelta(seconds=int(time_data[i]), milliseconds=int(timems_data[i])) for i in range(len(time_data))]\n",
    "            \n",
    "            # Create a DataFrame for this file's data\n",
    "            df = pd.DataFrame({'Timestamp': time, 'CBH': cbh})\n",
    "            \n",
    "            # Append the DataFrame to the list\n",
    "            cbh_data.append(df)\n",
    "            \n",
    "            # Close the dataset\n",
    "            dataset.close()\n",
    "\n",
    "# Combine all DataFrames into a single DataFrame if any data was collected\n",
    "if cbh_data:\n",
    "    df_cbh_combined = pd.concat(cbh_data, ignore_index=True)\n",
    "    \n",
    "    # Print the combined DataFrame\n",
    "    print(df_cbh_combined)\n",
    "    \n",
    "    # Optionally, save the combined DataFrame to a CSV or Parquet file\n",
    "    #output_csv_path = os.path.join(base_dir_cr, 'CBH_Data_2024-05-03.csv')\n",
    "    #df_cbh_combined.to_csv(output_csv_path, index=False)\n",
    "    #print(f\"Combined data saved to {output_csv_path}\")\n",
    "else:\n",
    "    print(\"No LV1.nc files were found in the directory.\")\n",
    "'''"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c16ad0d2",
   "metadata": {},
   "source": [
    "### Load Rain "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b32f3d85",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "#Edit before running!!\n",
    "# Base directory containing monthly folders\n",
    "base_dir_cr = r\"C:\\path\\to\\your\\Cloud_radar\"\n",
    "month       = \"2024-05\"       # <- change to target month\n",
    "day         = \"2024-05-23\"    # <- change to target day\n",
    "\n",
    "# Construct the directory path for the specific date\n",
    "day_folder_path = os.path.join(base_dir_cr, month, day)\n",
    "\n",
    "# Initialize a list to store the Rain data\n",
    "rain_data_list = []\n",
    "\n",
    "# Check if the directory exists\n",
    "if not os.path.exists(day_folder_path):\n",
    "    print(f\"Directory {day_folder_path} does not exist.\")\n",
    "else:\n",
    "    print(f\"Directory {day_folder_path} exists. Searching for LV1.nc files...\")\n",
    "\n",
    "    # List all files in the specific date folder for debugging\n",
    "    all_files = os.listdir(day_folder_path)\n",
    "    print(f\"All files in {day_folder_path}: {all_files}\")\n",
    "\n",
    "    # Search for LV1.nc files in the specific date folder\n",
    "    for file_name in all_files:\n",
    "        if file_name.lower().endswith('lv1.nc'):\n",
    "            file_path = os.path.join(day_folder_path, file_name)\n",
    "            print(f\"Found file: {file_path}\")  # Debug: Print the file path\n",
    "            \n",
    "            # Open the NetCDF file\n",
    "            dataset = Dataset(file_path, 'r')\n",
    "            \n",
    "            # Extract the Rain variable and the time variable\n",
    "            rain_data = dataset.variables['Rain'][:]\n",
    "            time_data = dataset.variables['Time'][:]\n",
    "            timems_data = dataset.variables['Timems'][:]\n",
    "            start_time = datetime(2001, 1, 1, 0, 0, 0)\n",
    "            time = [start_time + timedelta(seconds=int(time_data[i]), milliseconds=int(timems_data[i])) for i in range(len(time_data))]\n",
    "            \n",
    "            # Create a DataFrame for this file's data\n",
    "            df = pd.DataFrame({'TIMESTAMP': time, 'Rain': rain_data})\n",
    "            \n",
    "            # Append the DataFrame to the list\n",
    "            rain_data_list.append(df)\n",
    "            \n",
    "            # Close the dataset\n",
    "            dataset.close()\n",
    "\n",
    "## Combine all DataFrames into a single DataFrame if any data was collected\n",
    "if rain_data_list:\n",
    "    df_rain_combined = pd.concat(rain_data_list, ignore_index=True)\n",
    "\n",
    "    # Print the combined DataFrame (optional)\n",
    "    print(df_rain_combined)\n",
    "\n",
    "    # Plot: Rain Rate vs Time (clean & presentation-ready)\n",
    "   # plt.figure(figsize=(8,6))\n",
    "    #plt.plot(df_rain_combined['TIMESTAMP'], df_rain_combined['Rain'], \n",
    "           #  label='Rain Rate', linewidth=2, color='teal')\n",
    "\n",
    "    # Axis labels and title\n",
    "    #plt.xlabel('Time', fontsize=20)\n",
    "    #plt.ylabel('Rain Rate (mm/h)', fontsize=20)\n",
    "    #plt.title('Rain Rate vs Time', fontsize=20, weight='bold')\n",
    "\n",
    "    # Ticks and grid\n",
    "    #plt.xticks(fontsize=18, rotation=45)\n",
    "    #plt.yticks(fontsize=18)\n",
    "    #plt.grid(True, linestyle='--', alpha=0.7)\n",
    "\n",
    "    # Create a larger figure\n",
    "    fig, ax = plt.subplots(figsize=(8, 6))\n",
    "        # 2) Thicken all spines (axis borders)\n",
    "    for spine in ax.spines.values():\n",
    "        spine.set_linewidth(1.5)\n",
    "    # 1) Plot LWP_Corrected with a bolder line\n",
    "    ax.plot(df_rain_combined['TIMESTAMP'], df_rain_combined['Rain'], \n",
    "                 label='Rain Rate', linewidth=2, color='blue')\n",
    "    # 3) Tick parameters for major & minor ticks\n",
    "    ax.tick_params(axis='both', which='major', labelsize=12, width=1.5, length=6)\n",
    "    ax.tick_params(axis='both', which='minor', width=1.0, length=4)\n",
    "\n",
    "    # 4) Format x‐axis to show time in HH:MM\n",
    "    date_format = mdates.DateFormatter('%H:%M')\n",
    "    ax.xaxis.set_major_formatter(date_format)\n",
    "    ax.xaxis.set_major_locator(mdates.HourLocator(interval=2))\n",
    "    ax.xaxis.set_minor_locator(mdates.MinuteLocator(interval=30))\n",
    "\n",
    "    # 5) Labels and title with bold font\n",
    "    ax.set_title('Rain Rate vs Time', fontsize=18, fontweight='bold')\n",
    "    ax.set_xlabel('Time', fontsize=16, fontweight='bold')\n",
    "    ax.set_ylabel('Rain Rate (mm/h)', fontsize=16, fontweight='bold')\n",
    "\n",
    "    # 6) Y‐axis tick font size\n",
    "    ax.tick_params(axis='y', labelsize=12)\n",
    "\n",
    "    # 7) Grid styling\n",
    "    ax.grid(True, which='major', linestyle='--', linewidth=0.8, alpha=0.7)\n",
    "    ax.grid(True, which='minor', linestyle=':', linewidth=0.5, alpha=0.5)\n",
    "\n",
    "    # 8) Rotate x‐tick labels\n",
    "    plt.xticks(rotation=45)\n",
    "\n",
    "    # 9) (Optional) Legend inside plot\n",
    "    legend = ax.legend(fontsize=14, frameon=True)\n",
    "    legend.get_frame().set_linewidth(1.5)\n",
    "\n",
    "    # 10) Tight layout and save\n",
    "    plt.tight_layout()\n",
    "\n",
    "    # Legend and layout\n",
    "    #plt.legend(fontsize=12)\n",
    "   # plt.tight_layout()\n",
    "    file_path = os.path.join(day_folder_path, 'RR_report.png')\n",
    "    plt.savefig(file_path, dpi=300)\n",
    "    plt.show()\n",
    "else:\n",
    "    print(\"No rain data available.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4e84fc45",
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "# Resample the data to 10-minute averages\n",
    "df_rain_combined.set_index('TIMESTAMP', inplace=True)\n",
    "df_rain_resampled = df_rain_combined.resample('10T').mean().reset_index()\n",
    "    \n",
    "    # Print the resampled DataFrame\n",
    "print(df_rain_resampled)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "de4d8c14",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "# Save the resampled DataFrame to a CSV file in the specific date folder\n",
    "output_csv_path = os.path.join(day_folder_path, 'Rain_10min_Averages.csv')\n",
    "df_rain_resampled.to_csv(output_csv_path, index=False)\n",
    "print(f\"10-minute averages saved to {output_csv_path}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "83c1caad",
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure(figsize=(12, 6))  # Wider figure for better visibility\n",
    "\n",
    "# Plot with thicker line and improved font\n",
    "plt.plot(df_rain_resampled['TIMESTAMP'], df_rain_resampled['Rain'], \n",
    "         label='Rain Rate', linewidth=2, color='steelblue')\n",
    "\n",
    "# Axis labels with larger font\n",
    "plt.xlabel('Time', fontsize=14)\n",
    "plt.ylabel('Rain Rate (mm/h)', fontsize=14)\n",
    "plt.title('10-Minute Average Rain Rate vs Time', fontsize=16, weight='bold')\n",
    "\n",
    "# Tick parameters\n",
    "plt.xticks(fontsize=12, rotation=45)  # Rotate for time series clarity\n",
    "plt.yticks(fontsize=12)\n",
    "\n",
    "# Format x-axis if needed (e.g., showing HH:MM)\n",
    "# plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))\n",
    "\n",
    "# Add grid, legend, and tighter layout\n",
    "plt.grid(True, linestyle='--', alpha=0.7)\n",
    "plt.legend(fontsize=12)\n",
    "plt.tight_layout()\n",
    "\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "62c027e9",
   "metadata": {},
   "source": [
    "### Vertical Profiles"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e280d03b",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "\n",
    "#Edit before running!!\n",
    "# Base directory containing monthly folders\n",
    "base_dir_cr = r\"C:\\path\\to\\your\\Cloud_radar\"\n",
    "month       = \"2024-05\"       # <- change to target month\n",
    "day         = \"2024-05-02\"    # <- change to target day\n",
    "day_folder_path = os.path.join(base_dir_cr, month, day)\n",
    "\n",
    "# Initialize lists to store the data for each timestamp\n",
    "profiles_with_time = []\n",
    "\n",
    "# Variables to hold altitude data\n",
    "temperature_altitudes = None\n",
    "humidity_altitudes = None\n",
    "\n",
    "# Check if the directory exists\n",
    "if not os.path.exists(day_folder_path):\n",
    "    print(f\"Directory {day_folder_path} does not exist.\")\n",
    "else:\n",
    "    print(f\"Directory {day_folder_path} exists. Searching for LV1.nc files...\")\n",
    "\n",
    "    # List all files in the specific date folder for debugging\n",
    "    all_files = os.listdir(day_folder_path)\n",
    "    print(f\"All files in {day_folder_path}: {all_files}\")\n",
    "\n",
    "    # Search for LV1.nc files in the specific date folder\n",
    "    for file_name in all_files:\n",
    "        if file_name.lower().endswith('lv1.nc'):\n",
    "            file_path = os.path.join(day_folder_path, file_name)\n",
    "            print(f\"Found file: {file_path}\")  # Debug: Print the file path\n",
    "            \n",
    "            try:\n",
    "                # Try to open the NetCDF file\n",
    "                dataset = Dataset(file_path, 'r')\n",
    "\n",
    "                # Extract altitude data (only once)\n",
    "                if temperature_altitudes is None:\n",
    "                    temperature_altitudes = dataset.variables['TAlts'][:]  # Altitude for temperature\n",
    "                    humidity_altitudes = dataset.variables['HAlts'][:]  # Altitude for humidity\n",
    "\n",
    "                # Extract profiles and surface data\n",
    "                temperature_data = dataset.variables['TProf'][:]  # Temperature profiles\n",
    "                abs_humidity_data = dataset.variables['AHProf'][:]  # Absolute humidity profiles\n",
    "                rel_humidity_data = dataset.variables['RHProf'][:]  # Relative humidity profiles\n",
    "                surf_temp_data = dataset.variables['SurfTemp'][:]  # Surface temperature\n",
    "                surf_pres_data = dataset.variables['SurfPres'][:]  # Surface pressure\n",
    "                time_data = dataset.variables['Time'][:]\n",
    "                timems_data = dataset.variables['Timems'][:]\n",
    "\n",
    "                # Convert the time data\n",
    "                start_time = datetime(2001, 1, 1, 0, 0, 0)\n",
    "                times = [start_time + timedelta(seconds=int(time_data[i]), milliseconds=int(timems_data[i])) for i in range(len(time_data))]\n",
    "\n",
    "                # Append the data as dictionaries for each time step\n",
    "                for i, time in enumerate(times):\n",
    "                    profiles_with_time.append({\n",
    "                        'timestamp': time,\n",
    "                        'temperature_altitudes': temperature_altitudes,\n",
    "                        'temperature_profile': temperature_data[i, :],\n",
    "                        'humidity_altitudes': humidity_altitudes,\n",
    "                        'abs_humidity_profile': abs_humidity_data[i, :],\n",
    "                        'rel_humidity_profile': rel_humidity_data[i, :],\n",
    "                        'surf_temp': surf_temp_data[i],  # Surface temperature for this timestamp\n",
    "                        'surf_pres': surf_pres_data[i]   # Surface pressure for this timestamp\n",
    "                    })\n",
    "\n",
    "                # Close the dataset\n",
    "                dataset.close()\n",
    "\n",
    "            except OSError as e:\n",
    "                print(f\"Error reading file {file_path}: {e}\")\n",
    "\n",
    "# Create a DataFrame from the list of profiles\n",
    "df = pd.DataFrame(profiles_with_time)\n",
    "\n",
    "# Set the timestamp as the index\n",
    "#df['timestamp'] = pd.to_datetime(df['timestamp'])\n",
    "\n",
    "#df.set_index('timestamp', inplace=True)\n",
    "\n",
    "# Print the first few rows to verify the structure\n",
    "print(df.head())\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6d42a76b",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Apply the pressure calculation for every timestamp and add it to the DataFrame\n",
    "df['calculated_pressures'] = calculate_pressure_at_heights(df)\n",
    "\n",
    "# Calculate saturation vapor pressure for each temperature profile\n",
    "df['saturation_vapor_pressure'] = df['temperature_profile'].apply(\n",
    "    lambda temp_profile: [calculate_saturation_vapor_pressure(temp) for temp in temp_profile]\n",
    ")\n",
    "\n",
    "# Calculate vapor pressure using relative humidity and saturation vapor pressure\n",
    "df['vapor_pressure'] = df.apply(\n",
    "    lambda row: [rh * es / 100 for rh, es in zip(row['rel_humidity_profile'], row['saturation_vapor_pressure'])],\n",
    "    axis=1\n",
    ")\n",
    "\n",
    "# Calculate specific humidity for every altitude and time step\n",
    "df['specific_humidity'] = df.apply(\n",
    "    lambda row: [calculate_specific_humidity(ev, p) for ev, p in zip(row['vapor_pressure'], row['calculated_pressures'])],\n",
    "    axis=1\n",
    ")\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bec7b5b0",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Apply the theta_v calculation to each row in the DataFrame\n",
    "df['theta_v'] = df.apply(calculate_theta_v_alternative_by_altitude, axis=1)\n",
    "print(df.head())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8d8abe47",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# Function to compute the mean profile for arrays\n",
    "def compute_mean_profile(profiles):\n",
    "    if len(profiles) == 0:\n",
    "        return []\n",
    "    profiles_array = np.array(profiles)  # Convert to numpy array\n",
    "    return np.mean(profiles_array, axis=0).tolist()  # Convert back to list for JSON-friendly output\n",
    "\n",
    "# Resample and average function\n",
    "def resample_and_average(df, interval='10T'):\n",
    "    resampled = df.resample(interval).agg({\n",
    "        'temperature_altitudes': 'first',  # Assuming this does not change often\n",
    "        'temperature_profile': lambda x: compute_mean_profile(list(x)),\n",
    "        'humidity_altitudes': 'first',  # Same assumption\n",
    "        'abs_humidity_profile': lambda x: compute_mean_profile(list(x)),\n",
    "        'rel_humidity_profile': lambda x: compute_mean_profile(list(x)),\n",
    "        'surf_temp': 'mean',  # Average surface temperature\n",
    "        'surf_pres': 'mean',  # Average surface pressure\n",
    "        'calculated_pressures': lambda x: compute_mean_profile(list(x)),\n",
    "        'saturation_vapor_pressure': lambda x: compute_mean_profile(list(x)),\n",
    "        'vapor_pressure': lambda x: compute_mean_profile(list(x)),\n",
    "        'specific_humidity': lambda x: compute_mean_profile(list(x)),\n",
    "        'theta_v': lambda x: compute_mean_profile(list(x)),\n",
    "    })\n",
    "    return resampled\n",
    "\n",
    "df['timestamp'] = pd.to_datetime(df['timestamp'])\n",
    "# Set 'timestamp' as the index\n",
    "df.set_index('timestamp', inplace=True)\n",
    "\n",
    "\n",
    "# Apply the resampling function\n",
    "df_10min_avg = resample_and_average(df)  # Adjust interval as needed\n",
    "\n",
    "# Reset index if you want 'timestamp' back as a column\n",
    "df_10min_avg.reset_index(inplace=True)\n",
    "\n",
    "# Display the first few rows of the averaged DataFrame\n",
    "print(df_10min_avg.head())\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b4f6fa9d",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "# 1. Build the output‐path inside the same folder\n",
    "parquet_path = os.path.join(day_folder_path, \"cloud_radar_vertical_dataset_10min.parquet\")\n",
    "\n",
    "# 2. Save `df` to Parquet\n",
    "df_10min_avg.to_parquet(parquet_path, engine=\"pyarrow\", index=False)\n",
    "\n",
    "print(f\"Saved DataFrame to: {parquet_path}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e6ff7055",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "\n",
    "# Assuming df_10min_avg is your averaged DataFrame and it's structured correctly.\n",
    "# Select the 10th entry in the averaged DataFrame\n",
    "example_data = df_10min_avg.iloc[79]  # Indexing starts at 0, so 9 is the 10th entry\n",
    "print(example_data)\n",
    "# Create subplots in one row\n",
    "fig, axs = plt.subplots(1, 4, figsize=(20, 5), sharey=True)\n",
    "\n",
    "# Extracting the altitude array (assuming it's the same for all parameters)\n",
    "altitude = np.array(example_data['temperature_altitudes'])  # Adjusted for altitude data\n",
    "\n",
    "# Plot Temperature\n",
    "axs[0].scatter(example_data['temperature_profile'], altitude, label='Temperature (K)', color='red')\n",
    "axs[0].set_title('Temperature (K)', fontsize=14)\n",
    "axs[0].set_xlabel('Value')\n",
    "axs[0].grid()\n",
    "\n",
    "# Plot Theta_v\n",
    "axs[1].scatter(example_data['theta_v'], altitude, label='Theta_v (K)', color='orange')\n",
    "axs[1].set_title('Theta_v (K)', fontsize=14)\n",
    "axs[1].set_xlabel('Value')\n",
    "axs[1].grid()\n",
    "\n",
    "# Plot Relative Humidity (RH)\n",
    "axs[2].scatter(example_data['rel_humidity_profile'], altitude, label='Relative Humidity (%)', color='blue')\n",
    "axs[2].set_title('Relative Humidity (%)', fontsize=14)\n",
    "axs[2].set_xlabel('Value')\n",
    "axs[2].grid()\n",
    "\n",
    "# Plot Specific Humidity (qv)\n",
    "axs[3].scatter(example_data['specific_humidity'], altitude, label='Specific Humidity (g/kg)', color='green')\n",
    "axs[3].set_title('Specific Humidity (g/kg)', fontsize=14)\n",
    "axs[3].set_xlabel('Value')\n",
    "axs[3].grid()\n",
    "\n",
    "# Set shared y-axis limits\n",
    "axs[0].set_ylabel('Altitude (m)')\n",
    "\n",
    "plt.tight_layout()\n",
    "\n",
    "# Add a horizontal line at 2 km\n",
    "for ax in axs:\n",
    "    ax.axhline(y=2000, color='red', linestyle='--')\n",
    "    ax.set_ylabel('Altitude (m)')\n",
    "#plt.ylim(0,2000)\n",
    "plt.suptitle(f'Profile at Time:  {example_data[\"timestamp\"]}', fontsize=16, y=1.02)  # Using example_data.name for the timestamp\n",
    "plt.show()\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}