{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "2a656de0",
   "metadata": {},
   "source": [
    "### What this script does-\n",
    "\n",
    "- Scans your mast/sonic and cloud radar (rain) folders by month, loads daily CSVs, and inner-joins them on TIMESTAMP.\n",
    "\n",
    "- Builds QC flags:\n",
    "\n",
    "    - Flag_Rain from rain rate,\n",
    "\n",
    "    - Flag_Temp from mast–sonic temperature difference,\n",
    "\n",
    "    - Flag_WS from mast–sonic horizontal wind speed difference.\n",
    "\n",
    "- Summarizes QC pass/fail vs rain and overall usable fraction.\n",
    "\n",
    "- Creates a rain/temperature-filtered dataset, computes radiometer surface temperature from IR20Up via Stefan–Boltzmann, and plots:\n",
    "\n",
    "    - time series of u/v/w components,\n",
    "\n",
    "    - stats for vertical wind (Average_Wind_Uz),\n",
    "\n",
    "    - daily albedo,\n",
    "\n",
    "    - instrument temperature comparisons & diurnal bias,\n",
    "\n",
    "    - mast vs sonic wind speed (including calm/windy day comparisons),\n",
    "\n",
    "    - direction comparisons (mast vs sonic) with circular metrics and wind-rose plots,\n",
    "\n",
    "    - mast vs KNMI wind direction hourly circular mean + circular error metrics and a rose of Δθ.\n",
    "\n",
    "#### Edit these lines before running\n",
    "1) Root folders for mast/sonic data and cloud radar rain data\n",
    "- BASE_MAST  = r\"C:\\path\\to\\Sonic\"        \n",
    "- BASE_RADAR = r\"C:\\path\\to\\Cloud_radar\"  \n",
    "\n",
    "2) Folder names under each root:\n",
    "   MONTHS = ['2024-03', '2024-04', '2024-05']   # add/remove as needed\n",
    "\n",
    "3) QC thresholds: Adjust if your instruments differ\n",
    "\n",
    "\n",
    "4) Optional filtering for plots\n",
    "\n",
    "5) KNMI reference file:\n",
    "   file_path = r\"C:\\path\\to\\KNMI\\uurgeg_235_2021-2030.txt\"  # update to your KNMI file location\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "97ff4b6d",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "import matplotlib.dates as mdates\n",
    "import matplotlib.patches as mpatches\n",
    "import matplotlib.lines as mlines"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e7dc8161",
   "metadata": {},
   "outputs": [],
   "source": [
    "sigma=5.67e-8 #W*m^-2*K^-4\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0dc3745c",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "# 3) Helper to summarize QC vs rain\n",
    "def summarize_flag(big, flag_col, rain_flag_col):\n",
    "    N_rain      = (big[rain_flag_col]==1).sum()\n",
    "    N_dry       = (big[rain_flag_col]==0).sum()\n",
    "    N_rain_fail = ((big[rain_flag_col]==1) & (big[flag_col]==1)).sum()\n",
    "    N_dry_fail  = ((big[rain_flag_col]==0) & (big[flag_col]==1)).sum()\n",
    "    return {\n",
    "        'N_rain':      N_rain,\n",
    "        'rain_fail':   N_rain_fail,\n",
    "        'rain_pass':   N_rain - N_rain_fail,\n",
    "        'rain_fail_%': 100 * N_rain_fail / N_rain if N_rain else 0,\n",
    "        'N_dry':       N_dry,\n",
    "        'dry_fail':    N_dry_fail,\n",
    "        'dry_pass':    N_dry - N_dry_fail,\n",
    "        'dry_fail_%':  100 * N_dry_fail / N_dry if N_dry else 0,\n",
    "    }\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "06d3ae60",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Edit these lines before running!!1\n",
    "#1) Root folders for mast/sonic data and cloud radar rain data\n",
    "BASE_MAST  = r\"C:\\path\\to\\Sonic\"        \n",
    "BASE_RADAR = r\"C:\\path\\to\\Cloud_radar\" \n",
    "\n",
    "\n",
    "# 2) Months to scan\n",
    "MONTHS = ['2024-03', '2024-04', '2024-05']\n",
    "\n",
    "all_days = []\n",
    "\n",
    "for month in MONTHS:\n",
    "    mast_month_dir  = os.path.join(BASE_MAST, month)\n",
    "    radar_month_dir = os.path.join(BASE_RADAR, month)\n",
    "    if not os.path.isdir(mast_month_dir) or not os.path.isdir(radar_month_dir):\n",
    "        continue\n",
    "\n",
    "    # look for date-folders\n",
    "    for date_dir in os.listdir(mast_month_dir):\n",
    "        mast_path  = os.path.join(mast_month_dir, date_dir, 'merged_data_10min.csv')\n",
    "        radar_path = os.path.join(radar_month_dir, date_dir, 'Rain_10min_Averages.csv')\n",
    "        if not (os.path.isfile(mast_path) and os.path.isfile(radar_path)):\n",
    "            continue\n",
    "\n",
    "        # load\n",
    "        df_mast = pd.read_csv(mast_path,  parse_dates=['TIMESTAMP'])\n",
    "        df_rain = pd.read_csv(radar_path, parse_dates=['TIMESTAMP'])\n",
    "\n",
    "        \n",
    "        # then do a simple inner join\n",
    "        df = pd.merge(\n",
    "            df_mast,\n",
    "            df_rain[['TIMESTAMP','Rain']],  # only bring in the Rain column\n",
    "            on='TIMESTAMP',\n",
    "            how='inner'                     # keep only timestamps in both\n",
    "        )\n",
    "\n",
    "\n",
    "        # compute diffs & flags\n",
    "        df['Temp_Diff']      = df['Temperature_K_2.99'] - df['Average_Temperature_Corr']#\n",
    "        df['Flag_Temp']      = (df['Temp_Diff'].abs() > 1).astype(int)\n",
    "        df['Flag_Rain']      = (df['Rain'] > 0).astype(int)\n",
    "        df['wind_speed_sonic_hor']=np.sqrt(df['Average_Wind_Ux']**2+df['Average_Wind_Uy']**2)\n",
    "        df['Windspeed_Diff'] = df['WS_ms_D15014_Avg']-df['wind_speed_sonic_hor']\n",
    "        #df['Windspeed_Diff'] = df['WS_ms_D15014_Avg'] - df['Wind_Speed']\n",
    "        df['Flag_WS']        = (df['Windspeed_Diff'].abs() > 1).astype(int)\n",
    "        df['Flag_Rain_WS']   = df['Flag_Rain']  # same rain mask\n",
    "\n",
    "        all_days.append(df)\n",
    "\n",
    "# concatenate campaign\n",
    "big = pd.concat(all_days, ignore_index=True)\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "75d136d6",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(big.columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b4013c5a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# summarize\n",
    "temp_stats = summarize_flag(big, 'Flag_Temp',    'Flag_Rain')\n",
    "ws_stats   = summarize_flag(big, 'Flag_WS',      'Flag_Rain_WS')\n",
    "\n",
    "print(\"=== Campaign‐Wide Sonic QC vs Rain (Temperature) ===\")\n",
    "print(f\"Rainy intervals : {temp_stats['N_rain']}  |  Fail: {temp_stats['rain_fail']} ({temp_stats['rain_fail_%']:.1f}%)  |  Pass: {temp_stats['rain_pass']}\")\n",
    "print(f\"Dry intervals   : {temp_stats['N_dry']}   |  Fail: {temp_stats['dry_fail']} ({temp_stats['dry_fail_%']:.1f}%)  |  Pass: {temp_stats['dry_pass']}\")\n",
    "\n",
    "print(\"\\n=== Campaign‐Wide Sonic QC vs Rain (Wind Speed) ===\")\n",
    "print(f\"Rainy intervals : {ws_stats['N_rain']}  |  Fail: {ws_stats['rain_fail']} ({ws_stats['rain_fail_%']:.1f}%)  |  Pass: {ws_stats['rain_pass']}\")\n",
    "print(f\"Dry intervals   : {ws_stats['N_dry']}   |  Fail: {ws_stats['dry_fail']} ({ws_stats['dry_fail_%']:.1f}%)  |  Pass: {ws_stats['dry_pass']}\")\n",
    "\n",
    "# overall usable fraction after removing all rain-flagged intervals\n",
    "usable = ((big['Flag_Rain']==0) & (big['Flag_Temp']==0)).sum()\n",
    "total  = len(big)\n",
    "print(f\"\\nOverall usable sonic‐temp intervals (dry & ΔT≤1 K): {usable}/{total} = {100*usable/total:.1f}%\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e0235608",
   "metadata": {},
   "outputs": [],
   "source": [
    "unique_days = big['TIMESTAMP'].dt.date.nunique()\n",
    "print(f\"Total unique days in merged data: {unique_days}\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6c0d6a28",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Filter out rain or temperature‐flagged intervals\n",
    "filtered = big[(big['Flag_Rain'] == 0) & (big['Flag_Temp'] == 0)].copy()\n",
    "\n",
    "# Compute radiometer‐derived temperature\n",
    "filtered['T_radiometer'] = (filtered['IR20Up'] / sigma) ** 0.25\n",
    "#filtered = filtered[filtered['SR15D1Dn_Irr'] > 10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8b9f1f5f",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# Plot Average_Wind_Uz over time\n",
    "plt.figure(figsize=(12, 6))\n",
    "plt.plot(filtered[\"TIMESTAMP\"], filtered[\"Average_Wind_Uz\"], label='w', color='tab:red', alpha=0.7)\n",
    "plt.plot(filtered[\"TIMESTAMP\"], filtered[\"Average_Wind_Ux\"], label='u', color='tab:green', alpha=0.7)\n",
    "plt.plot(filtered[\"TIMESTAMP\"], filtered[\"Average_Wind_Uy\"], label='v', color='tab:blue', alpha=0.7)\n",
    "\n",
    "plt.xlabel(\"Time\")\n",
    "plt.ylabel(\"Wind Speed (m/s)\")\n",
    "plt.title(\"Wind Speed Over Time\")\n",
    "plt.grid(True)\n",
    "plt.tight_layout()\n",
    "plt.legend()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4cb811b3",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Select and clean\n",
    "w = filtered[['TIMESTAMP', 'Average_Wind_Uz']].copy()\n",
    "w = w.dropna(subset=['Average_Wind_Uz'])\n",
    "w['abs_w'] = w['Average_Wind_Uz'].abs()\n",
    "\n",
    "# Core statistics\n",
    "stats = {\n",
    "    'count': len(w),\n",
    "    'mean_w': w['Average_Wind_Uz'].mean(),\n",
    "    'std_w': w['Average_Wind_Uz'].std(ddof=1),\n",
    "    'median_w': w['Average_Wind_Uz'].median(),\n",
    "    'median_abs_w': w['abs_w'].median(),\n",
    "    'rms_w': np.sqrt(np.mean(w['Average_Wind_Uz']**2)),\n",
    "    'min_w': w['Average_Wind_Uz'].min(),\n",
    "    'p10_w': w['Average_Wind_Uz'].quantile(0.10),\n",
    "    'p25_w': w['Average_Wind_Uz'].quantile(0.25),\n",
    "    'p75_w': w['Average_Wind_Uz'].quantile(0.75),\n",
    "    'p90_w': w['Average_Wind_Uz'].quantile(0.90),\n",
    "    'max_w': w['Average_Wind_Uz'].max(),\n",
    "}\n",
    "\n",
    "# Fractions within small thresholds (useful to show w ~ 0 near surface)\n",
    "thresholds = [0.05, 0.10, 0.20, 0.50]\n",
    "fractions = {f'frac_|w|<={thr}': (w['abs_w'] <= thr).mean()*100 for thr in thresholds}\n",
    "\n",
    "# Print\n",
    "print('--- Statistics for Average_Wind_Uz (10-min means) ---')\n",
    "for k, v in stats.items():\n",
    "    print(f'{k:>14}: {v: .3f} m/s' if 'count' not in k else f'{k:>14}: {v}')\n",
    "print('--- Fraction within thresholds ---')\n",
    "for k, v in fractions.items():\n",
    "    print(f'{k:>14}: {v:5.1f} %')\n",
    "\n",
    "# (Optional) quick check: median |w̄| to cite in text\n",
    "print(f\"\\nMedian |w̄|: {w['abs_w'].median():.3f} m/s\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "db23d87e",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Select a representative day with complete data (e.g., April 15, 2024)\n",
    "day_data = filtered[(filtered['TIMESTAMP'] >= '2024-05-03') & (filtered['TIMESTAMP'] < '2024-05-04')].copy()\n",
    "\n",
    "# Plot vertical wind speed (Uz) over the selected day\n",
    "plt.figure(figsize=(10, 5))\n",
    "plt.plot(day_data['TIMESTAMP'], day_data['Average_Wind_Uz'], label='Vertical Wind Speed (Uz)', color='darkred')\n",
    "plt.axhline(0, color='gray', linestyle='--', linewidth=1)\n",
    "plt.xlabel('Time (UTC)')\n",
    "plt.ylabel('Uz (m/s)')\n",
    "plt.title('Diurnal Variation of Vertical Wind Speed (Uz) on April 15, 2024')\n",
    "plt.grid(True)\n",
    "plt.legend()\n",
    "plt.tight_layout()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2072940d",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "'''\n",
    "# Create scatter plot again\n",
    "plt.figure(figsize=(10, 5))\n",
    "plt.scatter(filtered['TIMESTAMP'], 1/filtered['Albedo'], \n",
    "            color='tab:orange', s=10, alpha=0.7, edgecolor='k', linewidth=0.2)\n",
    "\n",
    "# Formatting\n",
    "plt.title('Surface Albedo (10-Minute)', fontsize=16)\n",
    "plt.xlabel('Date', fontsize=14)\n",
    "plt.ylabel('Albedo', fontsize=14)\n",
    "plt.grid(True, linestyle='--', alpha=0.6)\n",
    "plt.xticks(rotation=45, fontsize=11)\n",
    "plt.yticks(fontsize=11)\n",
    "plt.tight_layout()\n",
    "'''"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b13669e3",
   "metadata": {},
   "outputs": [],
   "source": [
    "'''\n",
    "# --- Plotting ---\n",
    "plt.figure(figsize=(10, 5))\n",
    "plt.scatter(daily_albedo.index, 1/daily_albedo.values,\n",
    "            color='tab:orange', edgecolor='k', s=35, alpha=0.8)\n",
    "\n",
    "# Formatting\n",
    "plt.title('Daily Average Surface Albedo ', fontsize=16)\n",
    "plt.xlabel('Date', fontsize=14)\n",
    "plt.ylabel('Albedo', fontsize=14)\n",
    "plt.grid(True, linestyle='--', alpha=0.6)\n",
    "plt.xticks(rotation=45, fontsize=11)\n",
    "plt.yticks(fontsize=11)\n",
    "plt.tight_layout()\n",
    "\n",
    "# Optional: save to file\n",
    "# plt.savefig('daily_albedo_scatter.png', dpi=300)\n",
    "plt.show()\n",
    "'''"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d4885634",
   "metadata": {},
   "outputs": [],
   "source": [
    "'''\n",
    "# Convert TIMESTAMP to datetime if not already\n",
    "filtered['TIMESTAMP'] = pd.to_datetime(filtered['TIMESTAMP'])\n",
    "\n",
    "# Set TIMESTAMP as index for resampling\n",
    "filtered.set_index('TIMESTAMP', inplace=True)\n",
    "\n",
    "# Daily average of Albedo\n",
    "daily_albedo = filtered['Albedo'].resample('D').mean().dropna()\n",
    "'''"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "21d1fcde",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 3) Scatter plot vs 1:1 line\n",
    "T_mast  = filtered['Temperature_K_2.99']\n",
    "T_sonic = filtered['Average_Temperature_Corr']\n",
    "T_rad   = filtered['T_radiometer']\n",
    "\n",
    "plt.figure(figsize=(8,6))\n",
    "plt.scatter(T_mast, T_sonic, s=30, alpha=0.6, label='Sonic')\n",
    "#plt.scatter(T_mast, T_rad,   s=30, alpha=0.6, label='Radiometer')\n",
    "lims = [min(T_mast.min(), T_sonic.min(), T_rad.min()),\n",
    "        max(T_mast.max(), T_sonic.max(), T_rad.max())]\n",
    "plt.plot(lims, lims, 'k--', linewidth=1.5, label='1:1 line')\n",
    "plt.xlabel('Mast Temperature (K)')\n",
    "plt.ylabel('Instrument Temperature (K)')\n",
    "plt.title('Scatter: Sonic & Radiometer vs Mast')\n",
    "plt.legend(loc='upper left')\n",
    "plt.tight_layout()\n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ac49ea6d",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "\n",
    "# 5) Derive metrics for each comparison\n",
    "# Sonic vs Mast 2.99 m\n",
    "T_mast_299  = filtered['Temperature_K_2.99']\n",
    "T_sonic     = filtered['Average_Temperature_Corr']\n",
    "delta_sonic = T_mast_299-T_sonic\n",
    "mean_sonic  = delta_sonic.mean()\n",
    "std_sonic   = delta_sonic.std()\n",
    "r_sonic     = T_mast_299.corr(T_sonic)\n",
    "\n",
    "# Radiometer vs Mast 2 m\n",
    "T_mast_2    = filtered['Temperature_K_2']\n",
    "T_rad       = filtered['T_radiometer']\n",
    "delta_rad   = T_rad - T_mast_2\n",
    "mean_rad    = delta_rad.mean()\n",
    "std_rad     = delta_rad.std()\n",
    "r_rad       = T_mast_2.corr(T_rad)\n",
    "\n",
    "# 6) Plot styling\n",
    "plt.rcParams.update({\n",
    "    'font.size': 20,\n",
    "    'axes.labelsize': 20,\n",
    "    'axes.titlesize': 20,\n",
    "    'legend.fontsize': 14,\n",
    "    'xtick.labelsize': 18,\n",
    "    'ytick.labelsize': 18\n",
    "})\n",
    "\n",
    "# 7) Plot Sonic vs Mast 2.99 m\n",
    "fig, ax = plt.subplots(figsize=(8,6))\n",
    "ax.scatter(T_mast_299, T_sonic, s=30, alpha=0.6, color='#1f77b4')\n",
    "lims = [min(T_mast_299.min(), T_sonic.min()), max(T_mast_299.max(), T_sonic.max())]\n",
    "ax.plot(lims, lims, 'k--', linewidth=1.5, label='Equality line')\n",
    "ax.set_xlabel('Mast Temperature at 2.99 m (K)')\n",
    "ax.set_ylabel('Sonic Temperature (K)')\n",
    "ax.set_title('Sonic vs. Mast (2.99 m) Temperature')\n",
    "ax.text(0.05, 0.95,\n",
    "        f'ΔT mean = {mean_sonic:.2f} K\\nσ = {std_sonic:.2f} K\\nr = {r_sonic:.2f}',\n",
    "        transform=ax.transAxes, verticalalignment='top',\n",
    "        bbox=dict(facecolor='white', alpha=0.8))\n",
    "ax.legend(loc='lower right')\n",
    "plt.tight_layout()\n",
    "plt.show()\n",
    "\n",
    "# 8) Plot Radiometer vs Mast 2 m\n",
    "fig, ax = plt.subplots(figsize=(8,6))\n",
    "ax.scatter(T_mast_2, T_rad, s=30, alpha=0.6, color='#ff7f0e')\n",
    "lims = [min(T_mast_2.min(), T_rad.min()), max(T_mast_2.max(), T_rad.max())]\n",
    "ax.plot(lims, lims, 'k--', linewidth=1.5, label='Equality line')\n",
    "ax.set_xlabel('Mast Temperature at 2 m (K)')\n",
    "ax.set_ylabel('Radiometer-derived Temperature (K)')\n",
    "ax.set_title('Radiometer vs. Mast (2 m) Temperature')\n",
    "ax.text(0.05, 0.95,\n",
    "        f'ΔT mean = {mean_rad:.2f} K\\nσ = {std_rad:.2f} K\\nr = {r_rad:.2f}',\n",
    "        transform=ax.transAxes, verticalalignment='top',\n",
    "        bbox=dict(facecolor='white', alpha=0.8))\n",
    "ax.legend(loc='lower right')\n",
    "plt.tight_layout()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "40e9fff9",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(filtered)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7bd09cd7",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 3) Extract hour and compute deltas\n",
    "filtered['Hour'] = filtered['TIMESTAMP'].dt.hour\n",
    "filtered['DeltaSonic'] = filtered['Temperature_K_2.99']-filtered['Average_Temperature_Corr'] \n",
    "# filtered['DeltaRad'] = filtered['T_radiometer'] - filtered['Temperature_K_2']\n",
    "\n",
    "# 4) Group by hour: mean & std\n",
    "hourly = filtered.groupby('Hour').agg({\n",
    "    'DeltaSonic': ['mean', 'std'],\n",
    "    # 'DeltaRad': ['mean', 'std']\n",
    "})\n",
    "hourly.columns = ['S_mean', 'S_std']  # Update if you include DeltaRad: + ['R_mean', 'R_std']\n",
    "hourly = hourly.reset_index()\n",
    "\n",
    "# 5) Plot diurnal cycle\n",
    "plt.figure(figsize=(8, 6))\n",
    "plt.errorbar(hourly['Hour'], hourly['S_mean'], yerr=hourly['S_std'],\n",
    "             marker='o', linestyle='-', label='Mast (2.99m) – Sonic', capsize=3)\n",
    "# plt.errorbar(hourly['Hour'], hourly['R_mean'], yerr=hourly['R_std'],\n",
    "#              marker='s', linestyle='--', label='Radiometer – Mast (2m)', capsize=3)\n",
    "\n",
    "plt.xticks(range(0, 24, 2))\n",
    "plt.xlabel('Hour of Day')\n",
    "plt.ylabel('Temperature Bias (K)')\n",
    "plt.title('Diurnal Variation of Temperature Bias')\n",
    "plt.legend()\n",
    "plt.grid(True)\n",
    "plt.tight_layout()\n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "77f461ae",
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "\n",
    "# 1) Filter out rain‐flagged and wind‐flagged intervals\n",
    "wind_data = big[(big.Flag_Rain == 0)].copy()# & (big.Flag_WS == 0)\n",
    "\n",
    "\n",
    "# 4) Compute metrics\n",
    "ws_mast     = wind_data['WS_ms_D15008_Avg']#WS_ms_D15014_Avg\n",
    "ws_sonic    = wind_data['wind_speed_sonic_hor']#wind_data['Wind_Speed']\n",
    "delta_ws    = ws_mast-ws_sonic\n",
    "mean_ws     = delta_ws.mean()\n",
    "std_ws      = delta_ws.std()\n",
    "r_ws        = ws_mast.corr(ws_sonic)\n",
    "pct_within1 = (delta_ws.abs() <= 1).mean() * 100\n",
    "\n",
    "# 5) Styled scatter plot\n",
    "plt.rcParams.update({\n",
    "    'font.size': 20,\n",
    "    'axes.labelsize': 18,\n",
    "    'axes.titlesize': 18,\n",
    "    'legend.fontsize': 12,\n",
    "})\n",
    "fig, ax = plt.subplots(figsize=(8,6))\n",
    "ax.scatter(ws_mast, ws_sonic, s=1, alpha=0.6, color='#1f77b4')#, label='10-min averages')\n",
    "lims = [min(ws_mast.min(), ws_sonic.min()), max(ws_mast.max(), ws_sonic.max())]\n",
    "ax.plot(lims, lims, 'k--', linewidth=1.5, label='1:1 equality')\n",
    "\n",
    "ax.text(0.02, 0.98,\n",
    "        f'Δw_s mean = {mean_ws:.2f} m/s\\nσ = {std_ws:.2f} m/s\\nr = {r_ws:.2f}\\n±1 m/s = {pct_within1:.1f}%',\n",
    "        transform=ax.transAxes, ha='left', va='top',\n",
    "        bbox=dict(facecolor='white', alpha=0.8))\n",
    "\n",
    "ax.set_xlabel('Mast Wind Speed (m/s)')\n",
    "ax.set_ylabel('Sonic Wind Speed (m/s)')\n",
    "ax.set_title('Mast (4.47 m) vs. Sonic Wind Speed')\n",
    "ax.legend(loc='lower right')\n",
    "plt.tight_layout()\n",
    "plt.show()\n",
    "\n",
    "# 4) Identify calmest & windiest dates\n",
    "wind_data['Date'] = wind_data['TIMESTAMP'].dt.date\n",
    "daily = wind_data.groupby('Date')['WS_ms_D15008_Avg'].mean().reset_index()\n",
    "calmest  = daily.nsmallest(1, 'WS_ms_D15008_Avg')['Date'].iloc[0]\n",
    "windiest = daily.nlargest(1,  'WS_ms_D15008_Avg')['Date'].iloc[0]\n",
    "\n",
    "# Extract time‐of‐day in hours (float)\n",
    "for label, date in [('Calm', calmest), ('Windy', windiest)]:\n",
    "    df = wind_data[wind_data['Date'] == date].copy()\n",
    "    # time since midnight in hours\n",
    "    df['HourOfDay'] = df['TIMESTAMP'].dt.hour + df['TIMESTAMP'].dt.minute/60.0\n",
    "    if label == 'Calm':\n",
    "        df_calm = df\n",
    "    else:\n",
    "        df_wind = df\n",
    "\n",
    "# Now plot both on the same HourOfDay axis:\n",
    "plt.figure(figsize=(8,6))\n",
    "\n",
    "# Calm day (circles)\n",
    "plt.scatter(df_calm['HourOfDay'], df_calm['WS_ms_D15008_Avg'],\n",
    "            s=50, marker='*', color='#1f77b4', alpha=0.6,\n",
    "            label=f'Calm Mast ({calmest:%m-%d})')\n",
    "plt.scatter(df_calm['HourOfDay'], df_calm['wind_speed_sonic_hor'],\n",
    "            s=50, marker='*', color='#ff7f0e', alpha=0.6,\n",
    "            label=f'Calm Sonic ({calmest:%m-%d})')\n",
    "plt.plot(df_calm['HourOfDay'], df_calm['Windspeed_Diff'],\n",
    "         color='#2a9d8f',linestyle='--', label='Calm Δw_s')\n",
    "\n",
    "# Windy day (diamonds)\n",
    "plt.scatter(df_wind['HourOfDay'], df_wind['WS_ms_D15008_Avg'],\n",
    "            s=50, marker='+', color='#1f77b4', alpha=0.8,\n",
    "            label=f'Windy Mast ({windiest:%m-%d})')\n",
    "plt.scatter(df_wind['HourOfDay'], df_wind['wind_speed_sonic_hor'],\n",
    "            s=50, marker='+', color='#ff7f0e', alpha=0.8,\n",
    "            label=f'Windy Sonic ({windiest:%m-%d})')\n",
    "plt.plot(df_wind['HourOfDay'], df_wind['Windspeed_Diff'],\n",
    "         color='#2a9d8f',linestyle='-', label='Windy Δw_s')\n",
    "\n",
    "# Formatting\n",
    "plt.xticks(np.arange(0,24+1,2))\n",
    "plt.xlim(-0.5, 23.5)\n",
    "plt.ylim(-1,10)\n",
    "plt.xlabel('Hour of Day')\n",
    "plt.ylabel('Wind Speed (m/s)')\n",
    "plt.title('Calm vs. Windy: Mast & Sonic Wind Speed vs Time')\n",
    "plt.legend(ncol=1, loc='upper right')# fontsize='small')\n",
    "plt.tight_layout()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2cde024f",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 1) Filter out rain-flagged intervals\n",
    "wind_data = big[(big.Flag_Rain == 0)].copy()\n",
    "\n",
    "\n",
    "# Compute wind speed differences\n",
    "df[\"Hour\"] = df[\"TIMESTAMP\"].dt.hour\n",
    "df[\"Delta_WS_4.47\"] = df[\"WS_ms_D15014_Avg\"] - df[\"Wind_Speed\"]\n",
    "df[\"Delta_WS_10\"] = df[\"WS_ms_D15463_Avg\"] - df[\"Wind_Speed\"]\n",
    "df[\"Delta_WS_2\"] = df[\"WS_ms_D15008_Avg\"] - df[\"Wind_Speed\"]\n",
    "\n",
    "# Group by hour and calculate mean and std\n",
    "grouped = df.groupby(\"Hour\").agg({\n",
    "    \"Delta_WS_4.47\": [\"mean\", \"std\"],\n",
    "    \"Delta_WS_10\": [\"mean\", \"std\"],\n",
    "    \"Delta_WS_2\": [\"mean\", \"std\"]\n",
    "})\n",
    "grouped.columns = [\"Delta_4.47_mean\", \"Delta_4.47_std\", \"Delta_10_mean\", \"Delta_10_std\", \"Delta_2_mean\", \"Delta_2_std\"]\n",
    "grouped = grouped.reset_index()\n",
    "\n",
    "# Plotting\n",
    "plt.figure(figsize=(8, 6))\n",
    "plt.errorbar(grouped[\"Hour\"], grouped[\"Delta_2_mean\"], yerr=grouped[\"Delta_2_std\"],\n",
    "             label=\"Mast 2.0 m – Sonic\", fmt='-o', capsize=3)\n",
    "plt.errorbar(grouped[\"Hour\"], grouped[\"Delta_4.47_mean\"], yerr=grouped[\"Delta_4.47_std\"],\n",
    "             label=\"Mast 4.47 m – Sonic\", fmt='-o', capsize=3)\n",
    "plt.errorbar(grouped[\"Hour\"], grouped[\"Delta_10_mean\"], yerr=grouped[\"Delta_10_std\"],\n",
    "             label=\"Mast 10.0 m – Sonic\", fmt='-o', capsize=3)\n",
    "\n",
    "plt.axhline(0, linestyle='--', color='gray', linewidth=1)\n",
    "plt.xlabel(\"Hour of Day\")\n",
    "plt.ylabel(\"Wind Speed Difference (m/s)\")\n",
    "plt.title(\"Diurnal Variation of Wind Speed Difference (Mast – Sonic)\")\n",
    "plt.legend()\n",
    "plt.grid(True)\n",
    "plt.tight_layout()\n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f889aaae",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "\n",
    "# 1. Compute Sonic Wind Direction (meteorological convention)\n",
    "ux = wind_data['Average_Wind_Ux']\n",
    "uy = wind_data['Average_Wind_Uy']\n",
    "wind_data['sonic_dir'] = (270 - np.degrees(np.arctan2(uy, ux))) % 360\n",
    "\n",
    "# 2. Copy Mast Wind Direction\n",
    "wind_data['mast_dir'] = wind_data['WindDir_D15014_Avg']\n",
    "\n",
    "# 3. Compute signed angular difference Δθ = sonic – mast in [-180,180]\n",
    "delta = (wind_data['sonic_dir'] - wind_data['mast_dir'] + 180) % 360 - 180\n",
    "wind_data['delta_dir'] = delta\n",
    "\n",
    "# 4. Circular metrics\n",
    "cos_mean = np.mean(np.cos(np.radians(delta)))\n",
    "sin_mean = np.mean(np.sin(np.radians(delta)))\n",
    "R = np.hypot(cos_mean, sin_mean)\n",
    "circ_mean = (np.degrees(np.arctan2(sin_mean, cos_mean))) % 360\n",
    "circ_std  = np.degrees(np.sqrt(-2 * np.log(R)))\n",
    "pct_within10 = (np.abs(delta) <= 10).mean() * 100\n",
    "\n",
    "print(f\"Circular bias (mean Δθ):   {circ_mean:.1f}°\")\n",
    "print(f\"Circular std dev:         {circ_std:.1f}°\")\n",
    "print(f\"Resultant vector length R: {R:.3f}\")\n",
    "print(f\"% within ±10°:            {pct_within10:.1f}%\")\n",
    "\n",
    "# =========== Plot 1: Hexbin Mast vs Sonic Direction ===========\n",
    "plt.figure(figsize=(6,6))\n",
    "hb = plt.hexbin(wind_data['mast_dir'], wind_data['sonic_dir'],\n",
    "                gridsize=36, cmap='viridis', extent=[0,360,0,360])\n",
    "plt.plot([0,360],[0,360],'k--',linewidth=1)\n",
    "plt.xlim(0,360); plt.ylim(0,360)\n",
    "plt.xlabel('Mast Direction (°)')\n",
    "plt.ylabel('Sonic Direction (°)')\n",
    "plt.title('Mast vs Sonic Wind Direction')\n",
    "plt.colorbar(hb, label='Counts')\n",
    "plt.tight_layout()\n",
    "plt.show()\n",
    "\n",
    "# =========== Plot 2: Wind Roses ===========\n",
    "\n",
    "def plot_rose(ax, directions, bins=16, title=''):\n",
    "    # create equal-width bins\n",
    "    edges = np.linspace(0, 360, bins+1)\n",
    "    counts, _ = np.histogram(directions, bins=edges)\n",
    "    angles = np.radians(edges[:-1] + (360/bins)/2)\n",
    "    ax.bar(angles, counts, width=np.radians(360/bins),\n",
    "           bottom=0, edgecolor='k', align='edge')\n",
    "    ax.set_theta_zero_location('N')\n",
    "    ax.set_theta_direction(-1)\n",
    "    ax.set_title(title)\n",
    "\n",
    "fig, (ax1, ax2, ax3) = plt.subplots(1,3, figsize=(14,4),\n",
    "                                    subplot_kw={'projection':'polar'})\n",
    "\n",
    "plot_rose(ax1, wind_data['mast_dir'],  bins=16, title='Mast Wind Rose')\n",
    "plot_rose(ax2, wind_data['sonic_dir'], bins=16, title='Sonic Wind Rose')\n",
    "# Δθ rose: center at zero\n",
    "# shift negative deltas to [0,360) for a circular histogram\n",
    "delta_pos = (wind_data['delta_dir'] + 360) % 360\n",
    "plot_rose(ax3, delta_pos, bins=16, title='Δθ = Sonic–Mast')\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "73e12e2b",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 1) Sonic mean direction from u,v (meteorological \"from\" convention) — your approach is fine\n",
    "ux = wind_data['Average_Wind_Ux']  # +east\n",
    "uy = wind_data['Average_Wind_Uy']  # +north\n",
    "wind_data['sonic_dir'] = (270 - np.degrees(np.arctan2(uy, ux))) % 360\n",
    "\n",
    "# (Optional sanity check) Mean vertical velocity should be ~0 for 10-min means\n",
    "if 'Average_Wind_Uz' in wind_data.columns:\n",
    "    w_med = np.nanmedian(np.abs(wind_data['Average_Wind_Uz']))\n",
    "    print(f\"Median |w̄| over 10-min means: {w_med:.3f} m/s\")\n",
    "\n",
    "# 2) Mast direction (same timestamps)\n",
    "wind_data['mast_dir'] = wind_data['WindDir_D15014_Avg']\n",
    "\n",
    "# 3) Circular difference Δθ = sonic – mast (wrapped to [-180, 180))\n",
    "delta = (wind_data['sonic_dir'] - wind_data['mast_dir'] + 180) % 360 - 180\n",
    "wind_data['delta_dir'] = delta\n",
    "\n",
    "# 4) Circular metrics (rotation angle, spread)\n",
    "cos_mean = np.mean(np.cos(np.radians(delta)))\n",
    "sin_mean = np.mean(np.sin(np.radians(delta)))\n",
    "R = np.hypot(cos_mean, sin_mean)\n",
    "theta_rot = (np.degrees(np.arctan2(sin_mean, cos_mean)))  # in (-180, 180]\n",
    "if theta_rot <= -180: theta_rot += 360\n",
    "if theta_rot > 180: theta_rot -= 360\n",
    "circ_std  = np.degrees(np.sqrt(-2 * np.log(max(R, 1e-12))))\n",
    "pct_within10 = (np.abs(delta) <= 10).mean() * 100\n",
    "\n",
    "print(f\"Rotation (circular mean Δθ): {theta_rot:.1f}°\")\n",
    "print(f\"Circular std dev:            {circ_std:.1f}°\")\n",
    "print(f\"Resultant vector length R:   {R:.3f}\")\n",
    "print(f\"% within ±10°:               {pct_within10:.1f}%\")\n",
    "\n",
    "# (Optional) circular mode of Δθ to explain 116° vs ~135° question\n",
    "bins = np.linspace(-180, 180, 37)  # 10° bins\n",
    "hist, edges = np.histogram(delta, bins=bins)\n",
    "mode_bin_idx = np.argmax(hist)\n",
    "mode_center = 0.5 * (edges[mode_bin_idx] + edges[mode_bin_idx+1])\n",
    "print(f\"Circular mode (histogram center): ~{mode_center:.1f}°\")\n",
    "\n",
    "# 5) Apply rotation to sonic directions and compute residuals\n",
    "wind_data['sonic_dir_corr'] = (wind_data['sonic_dir'] - theta_rot) % 360\n",
    "residual = (wind_data['sonic_dir_corr'] - wind_data['mast_dir'] + 180) % 360 - 180\n",
    "wind_data['residual_dir'] = residual\n",
    "print(f\"Post-rotation median |residual|: {np.nanmedian(np.abs(residual)):.1f}°\")\n",
    "\n",
    "# ========= Plot A: Hexbin BEFORE correction (wrap-aware) =========\n",
    "plt.figure(figsize=(6,6))\n",
    "# Duplicate points shifted by ±360 to reduce edge artifacts near 0/360\n",
    "x_mast = np.concatenate([wind_data['mast_dir'].to_numpy(),\n",
    "                         wind_data['mast_dir'].to_numpy()+360,\n",
    "                         wind_data['mast_dir'].to_numpy()-360])\n",
    "y_sonic = np.concatenate([wind_data['sonic_dir'].to_numpy(),\n",
    "                          wind_data['sonic_dir'].to_numpy()+360,\n",
    "                          wind_data['sonic_dir'].to_numpy()-360])\n",
    "\n",
    "hb = plt.hexbin(x_mast, y_sonic, gridsize=36, cmap='viridis',\n",
    "                extent=[-60, 420, -60, 420])\n",
    "plt.plot([-60, 420], [-60, 420], 'k--', linewidth=1)\n",
    "plt.xlim(0, 360); plt.ylim(0, 360)\n",
    "plt.xlabel('Mast Direction (°)')\n",
    "plt.ylabel('Sonic Direction (°)')\n",
    "plt.title('Mast vs Sonic Direction (before rotation)')\n",
    "plt.colorbar(hb, label='Counts')\n",
    "plt.tight_layout()\n",
    "plt.show()\n",
    "\n",
    "# ========= Plot B: Hexbin AFTER correction (wrap-aware) =========\n",
    "plt.figure(figsize=(6,6))\n",
    "x_mast = np.concatenate([wind_data['mast_dir'].to_numpy(),\n",
    "                         wind_data['mast_dir'].to_numpy()+360,\n",
    "                         wind_data['mast_dir'].to_numpy()-360])\n",
    "y_sonic_corr = np.concatenate([wind_data['sonic_dir_corr'].to_numpy(),\n",
    "                               wind_data['sonic_dir_corr'].to_numpy()+360,\n",
    "                               wind_data['sonic_dir_corr'].to_numpy()-360])\n",
    "\n",
    "hb = plt.hexbin(x_mast, y_sonic_corr, gridsize=36, cmap='viridis',\n",
    "                extent=[-60, 420, -60, 420])\n",
    "plt.plot([-60, 420], [-60, 420], 'k--', linewidth=1)\n",
    "plt.xlim(0, 360); plt.ylim(0, 360)\n",
    "plt.xlabel('Mast Direction (°)')\n",
    "plt.ylabel('Sonic Direction (°, rotated)')\n",
    "plt.title(f'Mast vs Sonic (after rotation {theta_rot:.1f}°)')\n",
    "plt.colorbar(hb, label='Counts')\n",
    "plt.tight_layout()\n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b32a7ab8",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ======== Apply Correction to Sonic Direction =========\n",
    "# Shift the sonic direction to align with mast using circular mean\n",
    "wind_data['sonic_corrected'] = (wind_data['sonic_dir'] - circ_mean) % 360\n",
    "\n",
    "# ======== Plot: Corrected Hexbin Mast vs Sonic =========\n",
    "plt.figure(figsize=(6,6))\n",
    "hb_corr = plt.hexbin(wind_data['mast_dir'], wind_data['sonic_corrected'],\n",
    "                     gridsize=36, cmap='viridis', extent=[0,360,0,360])\n",
    "plt.plot([0,360],[0,360],'k--',linewidth=1)\n",
    "plt.xlim(0,360); plt.ylim(0,360)\n",
    "plt.xlabel('Mast Direction (°)')\n",
    "plt.ylabel('Corrected Sonic Direction (°)')\n",
    "plt.title('Mast vs Corrected Sonic Wind Direction')\n",
    "plt.colorbar(hb_corr, label='Counts')\n",
    "plt.tight_layout()\n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "eca3d968",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "delta_corrected = (wind_data['sonic_corrected'] - wind_data['mast_dir'] + 180) % 360 - 180\n",
    "wind_data['delta_dir_corrected'] = delta_corrected\n",
    "\n",
    "# Shift to [0, 360) for circular rose plotting\n",
    "delta_corr_pos = (delta_corrected + 360) % 360\n",
    "\n",
    "# ======== Plot: Corrected Difference Wind Rose =========\n",
    "fig = plt.figure(figsize=(4.5, 4))\n",
    "ax = fig.add_subplot(111, projection='polar')\n",
    "\n",
    "plot_rose(ax, delta_corr_pos, bins=16, title='Corrected Δθ = Sonic – Mast')\n",
    "plt.tight_layout()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ae41ec25",
   "metadata": {},
   "outputs": [],
   "source": [
    "#Edit before running!!\n",
    "\n",
    "# ======== Load KNMI Wind Direction Data =========\n",
    "#KNMI reference file:\n",
    "file_path = r\"C:\\path\\to\\KNMI\\uurgeg_235_2021-2030.txt\"  # update to your KNMI file location\n",
    "\n",
    "header_row = 31\n",
    "\n",
    "df_knmi = pd.read_csv(file_path, skiprows=header_row, sep=\",\", engine='python', on_bad_lines='skip')\n",
    "df_knmi.columns = df_knmi.columns.str.strip()\n",
    "\n",
    "# Convert to datetime and filter\n",
    "df_knmi['Timestamp'] = pd.to_datetime(\n",
    "    df_knmi['YYYYMMDD'].astype(str) + df_knmi['HH'].astype(str).str.zfill(2),\n",
    "    format='%Y%m%d%H',\n",
    "    errors='coerce'\n",
    ")\n",
    "df_knmi['Timestamp'] = df_knmi['Timestamp'].dt.tz_localize('UTC')\n",
    "\n",
    "df_knmi = df_knmi[(df_knmi['Timestamp'] >= '2024-02-28') & (df_knmi['Timestamp'] <= '2024-06-13')]\n",
    "df_knmi = df_knmi.rename(columns={'DD': 'knmi_dir'})\n",
    "wind_data['TIMESTAMP'] = pd.to_datetime(wind_data['TIMESTAMP'])\n",
    "wind_data['TIMESTAMP'] = wind_data['TIMESTAMP'].dt.tz_localize('Europe/Amsterdam').dt.tz_convert('UTC')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e2620bde",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "print(df_knmi)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "08091d7f",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# ======== Compute Hourly Circular Mean of Mast Direction @ 2 m (convert local -> UTC) =========\n",
    "\n",
    "# Filter out flagged data\n",
    "wind_data = big[(big.Flag_Rain == 0) & (big.Flag_WS == 0)].copy()\n",
    "\n",
    "# Select mast wind direction and convert timestamp\n",
    "mast_dir_deg = wind_data[['TIMESTAMP', 'WindDir_D15463_Avg']].copy()\n",
    "\n",
    "# 1) Parse timestamps as naive, then localize to Europe/Amsterdam and convert to UTC\n",
    "mast_dir_deg['TIMESTAMP'] = pd.to_datetime(mast_dir_deg['TIMESTAMP'])\n",
    "mast_dir_deg['TIMESTAMP'] = mast_dir_deg['TIMESTAMP'].dt.tz_localize('Europe/Amsterdam').dt.tz_convert('UTC')\n",
    "\n",
    "# Use UTC index from here on\n",
    "mast_dir_deg = mast_dir_deg.set_index('TIMESTAMP')\n",
    "\n",
    "print(\"\\nRaw mast wind direction (sample, UTC):\")\n",
    "print(mast_dir_deg.head())\n",
    "\n",
    "# Convert to radians\n",
    "mast_rad = np.radians(mast_dir_deg['WindDir_D15463_Avg'])\n",
    "\n",
    "# Compute vector components and resample hourly (in UTC)\n",
    "df_vector = pd.DataFrame({\n",
    "    'u': np.cos(mast_rad),\n",
    "    'v': np.sin(mast_rad)\n",
    "}, index=mast_dir_deg.index)\n",
    "\n",
    "print(\"\\nVector components before resample (u, v):\")\n",
    "print(df_vector.head())\n",
    "\n",
    "# Resample to hourly mean (still UTC)\n",
    "df_hourly = df_vector.resample('H').mean()\n",
    "\n",
    "print(\"\\nHourly-averaged vector components (UTC):\")\n",
    "print(df_hourly[['u', 'v']].head())\n",
    "\n",
    "# Convert back to direction (degrees, 0–360)\n",
    "df_hourly['mast_dir'] = (np.degrees(np.arctan2(df_hourly['v'], df_hourly['u'])) + 360) % 360\n",
    "\n",
    "# Final cleanup: keep UTC timestamp as a column named 'Timestamp'\n",
    "df_hourly = df_hourly[['mast_dir']].dropna().reset_index().rename(columns={'TIMESTAMP': 'Timestamp'})\n",
    "\n",
    "print(\"\\nHourly mast wind direction (circular mean, UTC):\")\n",
    "print(df_hourly[['Timestamp', 'mast_dir']].head())\n",
    "\n",
    "# ======== Merge with KNMI Data on Overlapping Timestamps (both UTC) =========\n",
    "\n",
    "print(\"\\nKNMI data sample (UTC):\")\n",
    "print(df_knmi[['Timestamp', 'knmi_dir']].head())\n",
    "\n",
    "df_merged = pd.merge(df_hourly, df_knmi[['Timestamp', 'knmi_dir']],\n",
    "                     on='Timestamp', how='inner').dropna()\n",
    "\n",
    "print(\"\\nMerged mast + KNMI wind directions (UTC):\")\n",
    "print(df_merged.head())\n",
    "\n",
    "# ======== Circular Difference and Metrics (mast - KNMI) =========\n",
    "\n",
    "delta_knmi = (df_merged['mast_dir'] - df_merged['knmi_dir'] + 180) % 360 - 180\n",
    "df_merged['delta_dir'] = delta_knmi\n",
    "\n",
    "print(\"\\nΔθ (mast - KNMI) sample:\")\n",
    "print(df_merged[['Timestamp', 'mast_dir', 'knmi_dir', 'delta_dir']].head())\n",
    "\n",
    "cos_mean = np.mean(np.cos(np.radians(delta_knmi)))\n",
    "sin_mean = np.mean(np.sin(np.radians(delta_knmi)))\n",
    "R = np.hypot(cos_mean, sin_mean)\n",
    "circ_mean = (np.degrees(np.arctan2(sin_mean, cos_mean))) % 360\n",
    "circ_std  = np.degrees(np.sqrt(-2 * np.log(max(R, 1e-12))))\n",
    "pct_within10 = (np.abs(delta_knmi) <= 10).mean() * 100\n",
    "\n",
    "print(\"\\n--- Mast vs KNMI Direction Comparison (UTC) ---\")\n",
    "print(f\"Circular bias (mean Δθ):   {circ_mean:.1f}°\")\n",
    "print(f\"Circular std dev:          {circ_std:.1f}°\")\n",
    "print(f\"Resultant vector length R: {R:.3f}\")\n",
    "print(f\"% within ±10°:             {pct_within10:.1f}%\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dad2291a",
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "'''\n",
    "# ======== Compute Hourly Circular Mean of Mast Direction @ 2 m =========\n",
    "\n",
    "# Filter out flagged data\n",
    "wind_data = big[(big.Flag_Rain == 0) & (big.Flag_WS == 0)].copy()\n",
    "\n",
    "# Select 10 m wind direction and convert timestamp\n",
    "mast_dir_deg = wind_data[['TIMESTAMP', 'WindDir_D15463_Avg']].copy()\n",
    "mast_dir_deg['TIMESTAMP'] = pd.to_datetime(mast_dir_deg['TIMESTAMP'])\n",
    "mast_dir_deg.set_index('TIMESTAMP', inplace=True)\n",
    "# Print a preview of the original mast direction data\n",
    "print(\"\\nRaw mast wind direction (sample):\")\n",
    "print(mast_dir_deg.head())\n",
    "# Convert to radians\n",
    "mast_rad = np.radians(mast_dir_deg['WindDir_D15463_Avg'])\n",
    "\n",
    "# Compute vector components and resample hourly\n",
    "df_vector = pd.DataFrame({\n",
    "    'u': np.cos(mast_rad),\n",
    "    'v': np.sin(mast_rad)\n",
    "}, index=mast_dir_deg.index)\n",
    "\n",
    "# Print the vector components before resampling\n",
    "print(\"\\nVector components before resample (u, v):\")\n",
    "print(df_vector.head())\n",
    "\n",
    "df_hourly = df_vector.resample('H').mean()\n",
    "# After resampling hourly\n",
    "print(\"\\nHourly-averaged vector components:\")\n",
    "print(df_hourly[['u', 'v']].head())\n",
    "\n",
    "# Convert back to degrees\n",
    "df_hourly['mast_dir'] = (np.degrees(np.arctan2(df_hourly['v'], df_hourly['u'])) + 360) % 360\n",
    "\n",
    "\n",
    "# Final cleanup\n",
    "df_hourly = df_hourly[['mast_dir']].dropna().reset_index()\n",
    "df_hourly = df_hourly.rename(columns={df_hourly.columns[0]: 'Timestamp'})\n",
    "\n",
    "# After converting back to degrees\n",
    "print(\"\\nHourly mast wind direction (circular mean):\")\n",
    "print(df_hourly[['Timestamp', 'mast_dir']].head())\n",
    "\n",
    "# ======== Merge with KNMI Data on Overlapping Timestamps =========\n",
    "\n",
    "print(\"\\nKNMI data sample:\")\n",
    "print(df_knmi[['Timestamp', 'knmi_dir']].head())\n",
    "\n",
    "df_merged = pd.merge(df_hourly, df_knmi[['Timestamp', 'knmi_dir']], on='Timestamp', how='inner').dropna()\n",
    "print(\"\\nMerged mast + KNMI wind directions:\")\n",
    "print(df_merged.head())\n",
    "# ======== Circular Difference and Metrics =========\n",
    "\n",
    "delta_knmi = (df_merged['mast_dir'] - df_merged['knmi_dir'] + 180) % 360 - 180\n",
    "\n",
    "df_merged['delta_dir'] = delta_knmi\n",
    "# Print raw directional differences\n",
    "print(\"\\nΔθ (mast - KNMI) sample:\")\n",
    "print(df_merged[['Timestamp', 'mast_dir', 'knmi_dir', 'delta_dir']].head())\n",
    "cos_mean = np.mean(np.cos(np.radians(delta_knmi)))\n",
    "sin_mean = np.mean(np.sin(np.radians(delta_knmi)))\n",
    "R = np.hypot(cos_mean, sin_mean)\n",
    "circ_mean = (np.degrees(np.arctan2(sin_mean, cos_mean))) % 360\n",
    "circ_std = np.degrees(np.sqrt(-2 * np.log(R)))\n",
    "pct_within10 = (np.abs(delta_knmi) <= 10).mean() * 100\n",
    "\n",
    "print(\"\\n--- Mast vs KNMI Direction Comparison ---\")\n",
    "print(f\"Circular bias (mean Δθ):   {circ_mean:.1f}°\")\n",
    "print(f\"Circular std dev:          {circ_std:.1f}°\")\n",
    "print(f\"Resultant vector length R: {R:.3f}\")\n",
    "print(f\"% within ±10°:             {pct_within10:.1f}%\")\n",
    "'''"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "09eaa7c2",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ======== Plot Wind Rose of Δθ (Mast – KNMI) =========\n",
    "\n",
    "def plot_rose(ax, directions, bins=16, title=''):\n",
    "    edges = np.linspace(0, 360, bins+1)\n",
    "    counts, _ = np.histogram(directions, bins=edges)\n",
    "    angles = np.radians(edges[:-1] + (360/bins)/2)\n",
    "    ax.bar(angles, counts, width=np.radians(360/bins),\n",
    "           bottom=0, edgecolor='k', align='edge')\n",
    "    ax.set_theta_zero_location('N')\n",
    "    ax.set_theta_direction(-1)\n",
    "    ax.set_title(title)\n",
    "\n",
    "# Shift deltas to [0, 360) for circular rose\n",
    "delta_pos = (df_merged['delta_dir'] + 360) % 360\n",
    "\n",
    "fig = plt.figure(figsize=(5, 5))\n",
    "ax = fig.add_subplot(111, projection='polar')\n",
    "plot_rose(ax, delta_pos, bins=16, title='Δθ = Mast – KNMI')\n",
    "plt.tight_layout()\n",
    "plt.show()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
