{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "721584c7",
   "metadata": {},
   "source": [
    "### What this script does\n",
    "\n",
    "- Scans all data folders that contain classified clouds to count how many cloud profile plots (.png) exist per day and per cloud type.\n",
    "\n",
    "- Lists the top 3 days with the most plots for each cloud type.\n",
    "\n",
    "- Prints all dates that contain a cloud_profiles folder.\n",
    "\n",
    "Edit before running:\n",
    "\n",
    "Base directory containing the 10-min merged data of sonic/mast/radiometer with cloud profile subfolders: sonic_base_dir = r\"C:\\path\\to\\your\\Sonic\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "35680eb3",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "from collections import defaultdict\n",
    "from datetime import datetime"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3473f145",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# Base directory containing the 10-min merged data of sonic/mast/radiometer with cloud profile subfolders\n",
    "# ⚠️ Edit this path to match your local setup before running.\n",
    "sonic_base_dir = r\"C:\\path\\to\\your\\Sonic\"\n",
    "\n",
    "\n",
    "# Dictionary to store image count per date per cloud type\n",
    "cloud_type_daily_counts = defaultdict(lambda: defaultdict(int))\n",
    "\n",
    "# Walk through months and days\n",
    "for month_folder in os.listdir(sonic_base_dir):\n",
    "    month_path = os.path.join(sonic_base_dir, month_folder)\n",
    "    if not os.path.isdir(month_path):\n",
    "        continue\n",
    "\n",
    "    for day_folder in os.listdir(month_path):\n",
    "        day_path = os.path.join(month_path, day_folder)\n",
    "        if not os.path.isdir(day_path):\n",
    "            continue\n",
    "\n",
    "        cloud_profiles_path = os.path.join(day_path, \"cloud_profiles\")\n",
    "        if not os.path.exists(cloud_profiles_path):\n",
    "            continue\n",
    "\n",
    "        # Go through each cloud type folder (if any)\n",
    "        for cloud_type_folder in os.listdir(cloud_profiles_path):\n",
    "            cloud_type_path = os.path.join(cloud_profiles_path, cloud_type_folder)\n",
    "            if not os.path.isdir(cloud_type_path):\n",
    "                continue\n",
    "\n",
    "            # Count how many PNGs are in this cloud type folder for this day\n",
    "            num_images = len([f for f in os.listdir(cloud_type_path) if f.endswith(\".png\")])\n",
    "            if num_images > 0:\n",
    "                cloud_type_daily_counts[cloud_type_folder][day_folder] += num_images\n",
    "\n",
    "# Find the top 3 days for each cloud type\n",
    "print(\"\\n🌤️ Top 3 Days for Each Cloud Type:\\n\")\n",
    "\n",
    "for cloud_type, day_counts in cloud_type_daily_counts.items():\n",
    "    if not day_counts:\n",
    "        continue\n",
    "\n",
    "    # Sort days by number of plots (descending)\n",
    "    sorted_days = sorted(day_counts.items(), key=lambda x: x[1], reverse=True)\n",
    "\n",
    "    print(f\"☁️ {cloud_type.replace('_', ' ')}:\")\n",
    "\n",
    "    # 🥇 Top Day\n",
    "    if len(sorted_days) >= 1:\n",
    "        print(f\"   🥇 {sorted_days[0][0]} ({sorted_days[0][1]} plots)\")\n",
    "    else:\n",
    "        print(f\"   🥇 Not available\")\n",
    "\n",
    "    # 🥈 Second Best\n",
    "    if len(sorted_days) >= 2:\n",
    "        print(f\"   🥈 {sorted_days[1][0]} ({sorted_days[1][1]} plots)\")\n",
    "    else:\n",
    "        print(f\"   🥈 Not available\")\n",
    "\n",
    "    # 🥉 Third Best\n",
    "    if len(sorted_days) >= 3:\n",
    "        print(f\"   🥉 {sorted_days[2][0]} ({sorted_days[2][1]} plots)\")\n",
    "    else:\n",
    "        print(f\"   🥉 Not available\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "15857112",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ——— Print all dates that have a cloud_profiles folder ———\n",
    "dates_with_profiles = []\n",
    "\n",
    "for month_folder in os.listdir(sonic_base_dir):\n",
    "    month_path = os.path.join(sonic_base_dir, month_folder)\n",
    "    if not os.path.isdir(month_path):\n",
    "        continue\n",
    "\n",
    "    for day_folder in os.listdir(month_path):\n",
    "        day_path = os.path.join(month_path, day_folder)\n",
    "        if not os.path.isdir(day_path):\n",
    "            continue\n",
    "\n",
    "        # If cloud_profiles exists here, record the date\n",
    "        if os.path.isdir(os.path.join(day_path, \"cloud_profiles\")):\n",
    "            dates_with_profiles.append(f\"{month_folder}\\\\{day_folder}\")\n",
    "\n",
    "# Deduplicate and sort\n",
    "unique_dates = sorted(set(dates_with_profiles))\n",
    "\n",
    "print(\"\\n📅 Dates with cloud_profiles:\\n\")\n",
    "for d in unique_dates:\n",
    "    print(f\" - {d}\")\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
