{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "e485bb0a",
   "metadata": {},
   "source": [
    "### What this script does\n",
    "\n",
    "- Loads corrected LWP (MWR) and combined SWR/LWR parquet files.\n",
    "\n",
    "- Computes solar position and clear-sky GHI (pvlib/Ineichen), merges with data.\n",
    "\n",
    "- Builds plots of SW↓ vs apparent zenith, colors by LWP, plus binned summaries.\n",
    "\n",
    "- Computes CSI and filters by LWP < 300 g/m².\n",
    "\n",
    "- (Optional) Loads Cabauw dataset, adds solar geometry/clear-sky GHI, and compares Warmenhuizen vs Cabauw curves by LWP bins.\n",
    "\n",
    "#### Edit before running\n",
    "1) Your local data folders:\n",
    "- BASE_DIR_MWR = r\"C:\\path\\to\\your\\Microwave_radiometer\"\n",
    "- BASE_FOLDER  = r\"C:\\path\\to\\your\\radiometer\"\n",
    "\n",
    "2) Parquet filenames (change only if yours differ)\n",
    "- FILENAME_LWP_CORRECTED = \"Corrected_LWP_Data.parquet\"\n",
    "- FILENAME_SWR = \"Combined_SWR_Data.parquet\"\n",
    "- FILENAME_LWR = \"Combined_LWR_Data.parquet\"    # (loaded if you decide to use it)\n",
    "\n",
    "3) Site coordinates (used by pvlib)\n",
    "- LATITUDE  = 52.6324\n",
    "- LONGITUDE = 4.7534\n",
    "- ALTITUDE  = 0\n",
    "\n",
    "4) Optional: Cabauw dataset path (enable only if you have this file):\n",
    "\n",
    "   cabauw_parquet_path = r\"C:\\path\\to\\your\\Cabauw\\cabauw_merged_data_April_June_July.parquet\"\n",
    "\n",
    "5) Plot/filters you may want to tweak"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c5be916f",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Imports\n",
    "import os\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import matplotlib.colors as mcolors\n",
    "import pvlib\n",
    "import matplotlib as mpl\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ab8f8217",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "## Edit before running!!!\n",
    "# 1) Your local data folders:\n",
    "# Example:\n",
    "# BASE_DIR_MWR = r\"D:\\Thesis\\data\\Microwave_radiometer\"\n",
    "# BASE_FOLDER  = r\"D:\\Thesis\\data\\radiometer\"\n",
    "BASE_DIR_MWR = r\"C:\\path\\to\\your\\Microwave_radiometer\"\n",
    "BASE_FOLDER  = r\"C:\\path\\to\\your\\radiometer\"\n",
    "\n",
    "\n",
    "FILENAME_LWP_CORRECTED = 'Corrected_LWP_Data.parquet'\n",
    "FILENAME_SWR = 'Combined_SWR_Data.parquet'\n",
    "FILENAME_LWR = 'Combined_LWR_Data.parquet'\n",
    "# Constants\n",
    "LATITUDE = 52.6324  # Alkmaar, Netherlands\n",
    "LONGITUDE = 4.7534\n",
    "ALTITUDE = 0\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8a5352d8",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Function to load a Parquet file\n",
    "def load_parquet_file(base_dir, filename):\n",
    "    file_path = os.path.join(base_dir, filename)\n",
    "    return pd.read_parquet(file_path)\n",
    "\n",
    "def calculate_solar_position_and_clear_sky(latitude, longitude, timestamps):\n",
    "    # Calculate solar position\n",
    "    solar_position = pvlib.solarposition.get_solarposition(timestamps, latitude, longitude)\n",
    "\n",
    "    # Create location object\n",
    "    location = pvlib.location.Location(latitude, longitude)\n",
    "\n",
    "    # Calculate clear-sky GHI using the Ineichen model\n",
    "    clearsky = location.get_clearsky(timestamps, model='ineichen')\n",
    "    \n",
    "    return solar_position, clearsky['ghi']\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2a47b30d",
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "def load_and_prepare_data():\n",
    "    \"\"\"Load and prepare the initial merged data.\"\"\"\n",
    "    # Load LWP corrected data\n",
    "    df_lwp_corrected = load_parquet_file(BASE_DIR_MWR, FILENAME_LWP_CORRECTED)\n",
    "    df_combined_swr = load_parquet_file(BASE_FOLDER, FILENAME_SWR)\n",
    "\n",
    "    # Ensure TIMESTAMP columns are in datetime format\n",
    "    df_lwp_corrected['TIMESTAMP'] = pd.to_datetime(df_lwp_corrected['TIMESTAMP'], errors='coerce')\n",
    "    df_combined_swr['TIMESTAMP'] = pd.to_datetime(df_combined_swr['TIMESTAMP'], errors='coerce')\n",
    "    \n",
    "    # Drop rows with NaT in TIMESTAMP\n",
    "    df_lwp_corrected.dropna(subset=['TIMESTAMP'], inplace=True)\n",
    "    df_combined_swr.dropna(subset=['TIMESTAMP'], inplace=True)\n",
    "    \n",
    "    # Filter SWR data\n",
    "    df_combined_swr = df_combined_swr[df_combined_swr['SR15D1Dn_Irr'] > 10].reset_index(drop=True)\n",
    "    \n",
    "   \n",
    "    #df_combined_swr['TIMESTAMP'] = df_combined_swr['TIMESTAMP'].dt.round('10T')\n",
    "    # Optional: average values over 10-minute intervals (if desired)\n",
    "    #df_combined_swr = df_combined_swr.groupby('TIMESTAMP').mean().reset_index()\n",
    "\n",
    "    # Calculate solar position and clear-sky GHI\n",
    "    timestamps = pd.DatetimeIndex(df_combined_swr['TIMESTAMP'])\n",
    "    solar_position, clear_sky_ghi = calculate_solar_position_and_clear_sky(LATITUDE, LONGITUDE, timestamps)\n",
    "\n",
    "    # Add calculated values to SWR DataFrame\n",
    "    df_combined_swr['apparent_zenith'] = solar_position['apparent_zenith'].values\n",
    "    df_combined_swr['clear_sky_ghi'] = clear_sky_ghi.values\n",
    "\n",
    "    # Merge with LWP corrected data on TIMESTAMP\n",
    "    merged_final_df = pd.merge(df_combined_swr, df_lwp_corrected, on='TIMESTAMP', how='inner')\n",
    "    \n",
    "    return df_combined_swr, merged_final_df\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3a2ba87a",
   "metadata": {},
   "outputs": [],
   "source": [
    "def calculate_csi(df):\n",
    "    \"\"\"Calculate CSI and add it to the DataFrame.\"\"\"\n",
    "    df['CSI'] = df['SR15D1Dn_Irr'] / df['clear_sky_ghi']\n",
    "    return df\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c1ad21b3",
   "metadata": {},
   "outputs": [],
   "source": [
    "def plot_data(df):\n",
    "    # Set font properties for presentation slides without bold text\n",
    "    plt.rcParams.update({\n",
    "    'font.size': 16,        # Increase the default font size\n",
    "    'axes.titlesize': 20,   # Larger title size\n",
    "    'axes.labelsize': 18,   # Larger label size\n",
    "    'legend.fontsize': 16,  # Increase legend font size\n",
    "    'xtick.labelsize': 14,  # Increase x-tick size\n",
    "    'ytick.labelsize': 14,  # Increase y-tick size\n",
    "        })\n",
    "\n",
    "    \"\"\"Plot the data for Shortwave Down Radiation vs. Solar Zenith Angle.\"\"\"\n",
    "    plt.figure(figsize=(10, 6))\n",
    "    plt.plot(df['apparent_zenith'], df['SR15D1Dn_Irr'], 'ko', markersize=0.1, alpha=0.1, label='Measured SWR')\n",
    "    plt.plot(df['apparent_zenith'], df['clear_sky_ghi'], 'ro', markersize=0.1, alpha=0.5, label='Clear-Sky GHI', zorder=5)\n",
    "    \n",
    "    plt.xlabel('Apparent Zenith Angle (degrees)',fontsize=16)\n",
    "    plt.ylabel('$SW_{\\downarrow}$ Radiation (W/m²)',fontsize=16)\n",
    "    plt.title('$SW_{\\downarrow}$ Radiation vs Solar Zenith Angle (March-June)',fontsize=18)\n",
    "    #plt.legend()\n",
    "    plt.grid(True, linestyle='--', linewidth=0.6, alpha=0.7)\n",
    "\n",
    "    plt.tight_layout(rect=[0, 0, 1, 0.96])  # Adjust rect to leave space for the title\n",
    "\n",
    "    plt.show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6a6a6b7c",
   "metadata": {},
   "outputs": [],
   "source": [
    "def filter_clear_sky_days(df):\n",
    "    \"\"\"Filter the DataFrame for clear-sky conditions and low LWP.\"\"\"\n",
    "    # Filter for LWP_Corrected less than 50 and CSI values greater than 0.9\n",
    "    filtered_df = df[(df['LWP_Corrected'] < 300)].reset_index(drop=True)#& (df['CSI'] > 0.9)].reset_index(drop=True)\n",
    "    return filtered_df\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6a6078ab",
   "metadata": {},
   "outputs": [],
   "source": [
    "def plot_zenith_vs_sw_down(df):\n",
    "    \"\"\"Plot Apparent Zenith vs. SW Down Radiation with color map based on LWP values.\"\"\"\n",
    "    plt.figure(figsize=(10, 6))\n",
    "    \n",
    "      # Define bins for LWP_Corrected from 0 to 300 with 60 bins\n",
    "    bins = np.linspace(0, 300, num=61)  # Create 60 bins from 0 to 300\n",
    "    #cmap = plt.cm.viridis  # Use a continuous colormap like viridis for better clarity\n",
    "    cmap = plt.cm.jet  # Use the jet colormap for coloring\n",
    "\n",
    "    # Use `cut` to assign bin indices based on the LWP_Corrected values\n",
    "    df['LWP_Binned'] = pd.cut(df['LWP_Corrected'], bins=bins, labels=False, include_lowest=True)\n",
    "\n",
    "    # Create scatter plot with binned colors\n",
    "    scatter = plt.scatter(\n",
    "        df['apparent_zenith'], \n",
    "        df['SR15D1Dn_Irr'], \n",
    "        c=df['LWP_Corrected'],  # Use the actual LWP values for coloring\n",
    "        cmap=cmap,\n",
    "        s=1,                   \n",
    "        alpha=0.5              \n",
    "    )\n",
    "    \n",
    "    # Add a color bar with clear labeling\n",
    "    cbar = plt.colorbar(scatter)\n",
    "    cbar.set_label('LWP Corrected (g/m²)', fontsize=12)\n",
    "    cbar.ax.tick_params(labelsize=10)  # Adjust tick size for readability\n",
    "    \n",
    "    \n",
    "    # Set labels and title\n",
    "    plt.xlabel('Apparent Zenith Angle (degrees)')\n",
    "    plt.ylabel('$SW_{\\downarrow}$ Radiation (W/m²)')\n",
    "    plt.title('$SW_{\\downarrow}$ Radiation vs Apparent Zenith (Colored by LWP)')\n",
    "    \n",
    "    plt.grid(True, linestyle='--', linewidth=0.6, alpha=0.7)\n",
    "\n",
    "    plt.tight_layout(rect=[0, 0, 1, 0.96])  # Adjust rect to leave space for the title\n",
    "\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "99820476",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "def main():\n",
    "    global df_combined_swr, merged_final_df\n",
    "    # Load and prepare the data only once\n",
    "    df_combined_swr, merged_final_df = load_and_prepare_data()\n",
    "    \n",
    "    # Display the merged DataFrame without CSI\n",
    "    print(\"\\SWR combined with clear sky ghi:\")\n",
    "    print(df_combined_swr)\n",
    "    print(\"\\nMerged DataFrame without CSI:\")\n",
    "    print(merged_final_df.head())\n",
    "\n",
    "if __name__ == \"__main__\":\n",
    "    main()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "19f54d25",
   "metadata": {},
   "outputs": [],
   "source": [
    "plot_data(df_combined_swr)  # Plot the data\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9fd06139",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "merged_final_df = calculate_csi(merged_final_df)  # Calculate CSI\n",
    "print(\"\\nMerged DataFrame with CSI:\")\n",
    "print(merged_final_df)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ae78e175",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# Filter for clear-sky days and low LWP\n",
    "clear_sky_df = filter_clear_sky_days(merged_final_df)\n",
    "print(\"\\nMerged DataFrame filtered for LWP<300:\")\n",
    "\n",
    "print(clear_sky_df)\n",
    "plot_zenith_vs_sw_down(clear_sky_df)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2e271ad0",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "plot_zenith_vs_sw_down(merged_final_df)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1ab09473",
   "metadata": {},
   "outputs": [],
   "source": [
    "def plot_binned_zenith_vs_sw_down_colored_by_lwp(df, lwp_bin_size=30, zenith_bin_size=3):\n",
    "    \"\"\"Plot SW Down Radiation vs. Apparent Zenith with color map based on binned LWP.\"\"\"\n",
    "    \n",
    "    # Step 1: Bin LWP_Corrected in intervals of 5 and label the bins with their midpoint values\n",
    "    lwp_min, lwp_max = df['LWP_Corrected'].min(), df['LWP_Corrected'].max()\n",
    "    lwp_bins = np.arange(lwp_min, lwp_max + lwp_bin_size, lwp_bin_size)\n",
    "    df['LWP_Binned'] = pd.cut(df['LWP_Corrected'], bins=lwp_bins, labels=lwp_bins[:-1] + lwp_bin_size / 2, include_lowest=True)\n",
    "    \n",
    "    # Step 2: Bin apparent_zenith and calculate mean SW Down Radiation and mean LWP for each zenith bin\n",
    "    zenith_min, zenith_max = df['apparent_zenith'].min(), df['apparent_zenith'].max()\n",
    "    zenith_bins = np.arange(zenith_min, zenith_max + zenith_bin_size, zenith_bin_size)\n",
    "    df['Zenith_Binned'] = pd.cut(df['apparent_zenith'], bins=zenith_bins, labels=zenith_bins[:-1] + zenith_bin_size / 2, include_lowest=True)\n",
    "    \n",
    "    # Step 3: Group by Zenith_Binned and LWP_Binned and calculate the mean of SW Down Radiation and LWP\n",
    "    binned_data = df.groupby(['Zenith_Binned', 'LWP_Binned']).agg({\n",
    "        'SR15D1Dn_Irr': 'mean',\n",
    "        'LWP_Corrected': 'mean'\n",
    "    }).dropna().reset_index()\n",
    "\n",
    "    # Plot the data\n",
    "    plt.figure(figsize=(12, 7))\n",
    "    \n",
    "    # Use scatter plot to show SW Down Radiation with colors representing LWP levels\n",
    "    scatter = plt.scatter(\n",
    "        binned_data['Zenith_Binned'].astype(float),   # Convert to float for continuous axis\n",
    "        binned_data['SR15D1Dn_Irr'], \n",
    "        c=binned_data['LWP_Corrected'],               # Color by mean LWP_Corrected\n",
    "        cmap='jet',                                   # Jet colormap for a vibrant color range\n",
    "        s=50,                                         # Size of scatter points\n",
    "        edgecolor='k',                                # Outline to make points distinct\n",
    "        alpha=0.7\n",
    "    )\n",
    "    \n",
    "    # Add a color bar to indicate LWP values\n",
    "    cbar = plt.colorbar(scatter)\n",
    "    cbar.set_label('Mean LWP Corrected (g/m²)', fontsize=12)\n",
    "    cbar.ax.tick_params(labelsize=10)\n",
    "    \n",
    "    # Set labels and title\n",
    "    plt.xlabel('Binned Apparent Zenith Angle (degrees)')\n",
    "    plt.ylabel('Mean SW Down Radiation (W/m²)')\n",
    "    plt.title('Mean SW Down Radiation vs Binned Apparent Zenith (Colored by Mean LWP)')\n",
    "    plt.grid(True, linestyle='--', linewidth=0.5, alpha=0.7)\n",
    "    \n",
    "    plt.tight_layout()\n",
    "    plt.show()\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b1b5920e",
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "plot_binned_zenith_vs_sw_down_colored_by_lwp(clear_sky_df)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6b8be477",
   "metadata": {},
   "outputs": [],
   "source": [
    "def plot_sw_down_parametrized_vs_binned_theta_with_lwp_colorbar(df_lwp, S0=1361, rho_l=1000, re=10,lwp_bin_size=10, theta_bin_size=3):\n",
    "    \"\"\"Plot SW Down Radiation vs. Binned Apparent Zenith with LWP as colorbar.\"\"\"\n",
    "    \n",
    "    # Step 1: Calculate tau and A using the provided formulas\n",
    "    # Calculate tau using the formula: tau = (3/2) * (LWP / (rho_l * re))\n",
    "    df_lwp['tau'] = (3 / 2) * (df_lwp['LWP_Corrected'] * 1000 / (rho_l * re))  # Convert LWP to kg/m²\n",
    "    # Calculate cloud transmission A using the formula: A = tau / (6.8 + tau)\n",
    "    df_lwp['A'] = df_lwp['tau'] / (6.8 + df_lwp['tau'])  \n",
    "    \n",
    "    # Step 2: Extract solar zenith angle (theta) and calculate SW_toa (top of the atmosphere radiation)\n",
    "    df_lwp['theta_0'] = df_lwp['apparent_zenith']  # Assuming solar_position['apparent_zenith'] is available\n",
    "    df_lwp['SW_TOA'] = S0 * np.maximum(np.cos(np.radians(df_lwp['theta_0'])), 0)  # Calculate SW_toa\n",
    "    \n",
    "    # Step 3: Calculate SW Down Radiation (SW_dn = SW_toa * (1 - A))\n",
    "    df_lwp['SW_dn_parameterized'] = df_lwp['SW_TOA'] * (1 - df_lwp['A'])  # Use the formula for SW_down\n",
    "    \n",
    "    # Step 4: Bin apparent_zenith (theta) and calculate the mean of SW_dn and LWP for each bin\n",
    "    zenith_min, zenith_max = df_lwp['theta_0'].min(), df_lwp['theta_0'].max()\n",
    "    zenith_bins = np.arange(zenith_min, zenith_max + theta_bin_size, theta_bin_size)\n",
    "    df_lwp['Zenith_Binned'] = pd.cut(df_lwp['theta_0'], bins=zenith_bins, labels=zenith_bins[:-1] + theta_bin_size / 2, include_lowest=True)\n",
    "     # Step 1: Bin LWP_Corrected in intervals of 5 and label the bins with their midpoint values\n",
    "    lwp_min, lwp_max = df_lwp['LWP_Corrected'].min(), df_lwp['LWP_Corrected'].max()\n",
    "    lwp_bins = np.arange(lwp_min, lwp_max + lwp_bin_size, lwp_bin_size)\n",
    "    df_lwp['LWP_Binned'] = pd.cut(df_lwp['LWP_Corrected'], bins=lwp_bins, labels=lwp_bins[:-1] + lwp_bin_size / 2, include_lowest=True)\n",
    "    \n",
    "    # Step 5: Group by Zenith_Binned and calculate mean SW_dn and LWP for each zenith bin\n",
    "    # Step 3: Group by Zenith_Binned and LWP_Binned and calculate the mean of SW Down Radiation and LWP\n",
    "    binned_data = df_lwp.groupby(['Zenith_Binned', 'LWP_Binned']).agg({\n",
    "        'SW_dn_parameterized': 'mean',\n",
    "        'LWP_Corrected': 'mean'\n",
    "    }).dropna().reset_index()\n",
    "    \n",
    "    # Step 6: Plot the data\n",
    "    plt.figure(figsize=(12, 7))\n",
    "    \n",
    "    # Scatter plot with color map based on LWP_Corrected\n",
    "    scatter = plt.scatter(\n",
    "        binned_data['Zenith_Binned'].astype(float),  # Convert binned theta values to float for continuous axis\n",
    "        binned_data['SW_dn_parameterized'],                       # SW Down Radiation on the y-axis\n",
    "        c=binned_data['LWP_Corrected'],             # Color by LWP values\n",
    "        cmap='jet',                             # Use 'viridis' for perceptually uniform colormap\n",
    "        s=50,                                       # Size of scatter points\n",
    "        edgecolor='k',                              # Black edge for distinct points\n",
    "        alpha=0.7                                   # Transparency for overlapping points\n",
    "    )\n",
    "    \n",
    "    # Add colorbar for LWP values\n",
    "    cbar = plt.colorbar(scatter)\n",
    "    cbar.set_label('Mean LWP Corrected (g/m²)', fontsize=12)\n",
    "    cbar.ax.tick_params(labelsize=10)\n",
    "    \n",
    "    # Set labels and title\n",
    "    plt.xlabel('Binned Apparent Zenith Angle (degrees)', fontsize=14)\n",
    "    plt.ylabel('SW Down Radiation (W/m²)', fontsize=14)\n",
    "    plt.title('SW Down Radiation vs Binned Apparent Zenith (Colored by Mean LWP)', fontsize=16)\n",
    "    \n",
    "    # Optional: Add grid and axis limits\n",
    "    plt.grid(True, linestyle='--', linewidth=0.5, alpha=0.7)\n",
    "    plt.tight_layout()\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fff67b92",
   "metadata": {},
   "outputs": [],
   "source": [
    "plot_sw_down_parametrized_vs_binned_theta_with_lwp_colorbar(clear_sky_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b17e3339",
   "metadata": {},
   "outputs": [],
   "source": [
    "def plot_binned_sw_down_vs_lwp_colored_by_zenith(df, lwp_bin_size=5, zenith_bin_size=3):\n",
    "    \"\"\"Plot SW Down Radiation vs. LWP with color map based on binned Zenith.\"\"\"\n",
    "    \n",
    "    # Step 1: Bin LWP in intervals of `lwp_bin_size` and label the bins with midpoint values\n",
    "    lwp_min, lwp_max = df['LWP_Corrected'].min(), df['LWP_Corrected'].max()\n",
    "    lwp_bins = np.arange(lwp_min, lwp_max + lwp_bin_size, lwp_bin_size)\n",
    "    df['LWP_Binned'] = pd.cut(df['LWP_Corrected'], bins=lwp_bins, labels=lwp_bins[:-1] + lwp_bin_size / 2, include_lowest=True)\n",
    "    \n",
    "    # Step 2: Bin apparent_zenith and calculate mean SW Down Radiation and mean Zenith for each zenith bin\n",
    "    zenith_min, zenith_max = df['apparent_zenith'].min(), df['apparent_zenith'].max()\n",
    "    zenith_bins = np.arange(zenith_min, zenith_max + zenith_bin_size, zenith_bin_size)\n",
    "    df['Zenith_Binned'] = pd.cut(df['apparent_zenith'], bins=zenith_bins, labels=zenith_bins[:-1] + zenith_bin_size / 2, include_lowest=True)\n",
    "    \n",
    "    # Step 3: Group by LWP_Binned and Zenith_Binned to calculate the mean of SW Down Radiation and Zenith\n",
    "    binned_data = df.groupby(['LWP_Binned', 'Zenith_Binned']).agg({\n",
    "        'SR15D1Dn_Irr': 'mean',\n",
    "        'apparent_zenith': 'mean'\n",
    "    }).dropna().reset_index()\n",
    "\n",
    "    # Plot the data\n",
    "    plt.figure(figsize=(12, 7))\n",
    "    \n",
    "    # Scatter plot to show SW Down Radiation vs. LWP with colors representing Zenith\n",
    "    scatter = plt.scatter(\n",
    "        binned_data['LWP_Binned'].astype(float),      # Convert to float for continuous axis\n",
    "        binned_data['SR15D1Dn_Irr'], \n",
    "        c=binned_data['apparent_zenith'],             # Color by mean apparent_zenith\n",
    "        cmap='jet',                                   # Use jet colormap\n",
    "        s=50,                                         # Size of scatter points\n",
    "        edgecolor='k',                                # Outline for clarity\n",
    "        alpha=0.7\n",
    "    )\n",
    "    \n",
    "    # Add color bar for Zenith values\n",
    "    cbar = plt.colorbar(scatter)\n",
    "    cbar.set_label('Mean Apparent Zenith Angle (degrees)', fontsize=12)\n",
    "    cbar.ax.tick_params(labelsize=10)\n",
    "    \n",
    "    # Set labels and title\n",
    "    plt.xlabel('Binned LWP (g/m²)')\n",
    "    plt.ylabel('Mean SW Down Radiation (W/m²)')\n",
    "    plt.title('Mean SW Down Radiation vs Binned LWP (Colored by Mean Zenith Angle)')\n",
    "    plt.grid(True, linestyle='--', linewidth=0.5, alpha=0.7)\n",
    "    \n",
    "    plt.tight_layout()\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f290388b",
   "metadata": {},
   "outputs": [],
   "source": [
    "plot_binned_sw_down_vs_lwp_colored_by_zenith(clear_sky_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1bc7f8c1",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Edit before running!!! \n",
    "#Cabauw dataset path (enable only if you have this file)\n",
    "# Example: cabauw_parquet_path = r\"D:\\Thesis\\data\\Cabauw\\cabauw_merged_data_April_June_July.parquet\"\n",
    "cabauw_parquet_path = r\"C:\\path\\to\\your\\Cabauw\\cabauw_merged_data_April_June_July.parquet\"\n",
    "\n",
    "# Load Cabauw data\n",
    "cabauw_df = pd.read_parquet(cabauw_parquet_path)\n",
    "cabauw_df['TIMESTAMP'] = pd.to_datetime(cabauw_df['TIMESTAMP'], errors='coerce')\n",
    "\n",
    "# Step 3: Call your existing function\n",
    "timestamps = pd.DatetimeIndex(cabauw_df['TIMESTAMP'])\n",
    "solar_position, clear_sky_ghi = calculate_solar_position_and_clear_sky(51.971, 4.927, timestamps)\n",
    "\n",
    "# Step 4: Assign values back to DataFrame\n",
    "cabauw_df['apparent_zenith'] = solar_position['apparent_zenith'].values\n",
    "cabauw_df['clear_sky_ghi'] = clear_sky_ghi.values\n",
    "\n",
    "print(cabauw_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "39bf53f5",
   "metadata": {},
   "outputs": [],
   "source": [
    "def plot_swd_vs_theta_lines_only_lwp_under_300(warmenhuizen_df, cabauw_df, lwp_bin_size=50, theta_bin_size=3):\n",
    "    \"\"\"\n",
    "    Plot SW↓ vs apparent zenith angle for Warmenhuizen and Cabauw, grouped by LWP bins < 300 g/m².\n",
    "    Warmenhuizen = solid lines, Cabauw = dashed lines. Uses colorbar for LWP bins.\n",
    "    \"\"\"\n",
    "    # Filter for LWP < 300 g/m²\n",
    "    warmenhuizen_df = warmenhuizen_df[warmenhuizen_df['LWP_Corrected'] < 300]\n",
    "    cabauw_df = cabauw_df[cabauw_df['LWP (kg m-2)'] * 1000 < 300]\n",
    "\n",
    "    # Bin LWP\n",
    "    lwp_bins = np.arange(0, 301, lwp_bin_size)\n",
    "    lwp_labels = [f\"{int(b)}-{int(b + lwp_bin_size)}\" for b in lwp_bins[:-1]]\n",
    "\n",
    "    warmenhuizen_df['LWP_Binned'] = pd.cut(\n",
    "        warmenhuizen_df['LWP_Corrected'],\n",
    "        bins=lwp_bins,\n",
    "        labels=lwp_labels,\n",
    "        include_lowest=True\n",
    "    )\n",
    "\n",
    "    cabauw_df['LWP_Binned'] = pd.cut(\n",
    "        cabauw_df['LWP (kg m-2)'] * 1000,\n",
    "        bins=lwp_bins,\n",
    "        labels=lwp_labels,\n",
    "        include_lowest=True\n",
    "    )\n",
    "\n",
    "    # Bin zenith\n",
    "    zenith_bins = np.arange(0, 95, theta_bin_size)\n",
    "    warmenhuizen_df['Zenith_Binned'] = pd.cut(warmenhuizen_df['apparent_zenith'], bins=zenith_bins, labels=zenith_bins[:-1] + theta_bin_size / 2)\n",
    "    cabauw_df['Zenith_Binned'] = pd.cut(cabauw_df['apparent_zenith'], bins=zenith_bins, labels=zenith_bins[:-1] + theta_bin_size / 2)\n",
    "\n",
    "    # Plot setup\n",
    "    fig, ax = plt.subplots(figsize=(12, 7))\n",
    "    cmap = plt.cm.get_cmap('jet', len(lwp_labels))\n",
    "    norm = mpl.colors.Normalize(vmin=0, vmax=len(lwp_labels) - 1)\n",
    "\n",
    "    for i, lwp_label in enumerate(lwp_labels):\n",
    "        color = cmap(i)\n",
    "\n",
    "        # Warmenhuizen\n",
    "        wh_bin = warmenhuizen_df[warmenhuizen_df['LWP_Binned'] == lwp_label]\n",
    "        wh_grouped = wh_bin.groupby('Zenith_Binned', observed=True)['SR15D1Dn_Irr'].mean().dropna()\n",
    "        if not wh_grouped.empty:\n",
    "            ax.plot(wh_grouped.index.astype(float), wh_grouped.values,\n",
    "                   color=color, linestyle='-', label=f'{lwp_label} (WH)')\n",
    "\n",
    "        # Cabauw\n",
    "        cb_bin = cabauw_df[cabauw_df['LWP_Binned'] == lwp_label]\n",
    "        cb_grouped = cb_bin.groupby('Zenith_Binned', observed=True)['SWD'].mean().dropna()\n",
    "        if not cb_grouped.empty:\n",
    "             ax.plot(cb_grouped.index.astype(float), cb_grouped.values,\n",
    "                    color=color, linestyle='--', label=f'{lwp_label} (Cabauw)')\n",
    "\n",
    "    # Axis labels and grid\n",
    "    ax.set_xlabel('Apparent Zenith Angle (°)', fontsize=14)\n",
    "    ax.set_ylabel('SW↓ Radiation (W/m²)', fontsize=14)\n",
    "    ax.set_title('SW↓ vs Apparent Zenith (LWP < 300 g/m²)', fontsize=16)\n",
    "    #ax.grid(True, linestyle='--', alpha=0.7)\n",
    "\n",
    "    # Colorbar\n",
    "    sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)\n",
    "    sm.set_array([])\n",
    "    cbar = fig.colorbar(sm, ax=ax, ticks=np.arange(len(lwp_labels)))\n",
    "    cbar.ax.set_yticklabels(lwp_labels)\n",
    "    cbar.set_label('LWP Bin (g/m²)', fontsize=12)\n",
    "    # Custom legend for line styles\n",
    "    from matplotlib.lines import Line2D\n",
    "    custom_lines = [\n",
    "        Line2D([0], [0], color='black', linestyle='-', label='Warmenhuizen (solid)'),\n",
    "        Line2D([0], [0], color='black', linestyle='--', label='Cabauw (dashed)')\n",
    "    ]\n",
    "    ax.legend(handles=custom_lines, loc='upper right', fontsize=10, title=\"Site\")\n",
    "\n",
    "    plt.tight_layout()\n",
    "    plt.show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "393754eb",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "plot_swd_vs_theta_lines_only_lwp_under_300(clear_sky_df, cabauw_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "97115c6b",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "print(clear_sky_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f388a227",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(cabauw_df)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
