# -*- coding: utf-8 -*- """ Created on Wed Jul 28 13:09:28 2021 @author: Mark B """ import urllib.request import pandas as pd import glob import matplotlib.pyplot as plt import numpy as np from climateUtils import runAvg baseUrl = r"https://www.ncei.noaa.gov/pub/data/uscrn/products/hourly02/{:d}/" #baseFile = r"CRNH0203-{:d}-CO_Dinosaur_2_E.txt" #baseFile = r"CRNH0203-{:d}-AK_Utqiagvik_formerly_Barrow_4_ENE.txt" #baseFile = r"CRNH0203-{:d}-FL_Everglades_City_5_NE.txt" #baseFile = r"CRNH0203-{:d}-NY_Ithaca_13_E.txt" baseFile = r"CRNH0203-{:d}-AZ_Yuma_27_ENE.txt" #baseFile = r"CRNH0203-{:d}-CA_Fallbrook_5_NE.txt" #localDir = r"C:/Users/brugger/Downloads/dinosaur/" #localDir = r"/home/mark/Downloads/diCRNH0203-2021-AK_Utqiagvik_formerly_Barrow_4_ENE.txtnosaur/" #localDir = r"/home/mark/Downloads/utqiagvik/" #localDir = r"/home/mark/Downloads/everglades/" localDir = r"/home/mark/Downloads/{:s}/".format(baseFile.split("_")[1]) avgDays = [30,15,-14] colNames = ["WBANNO", "UTC_DATE", "UTC_TIME", "LST_DATE", "LST_TIME", "CRX_VN", "LONGITUDE", "LATITUDE", "T_CALC", "T_HR_AVG", "T_MAX", "T_MIN", "P_CALC", "SOLARAD", "SOLARAD_FLAG", "SOLARAD_MAX", "SOLARAD_MAX_FLAG", "SOLARAD_MIN", "SOLARAD_MIN_FLAG", "SUR_TEMP_TYPE", "SUR_TEMP", "SUR_TEMP_FLAG", "SUR_TEMP_MAX", "SUR_TEMP_MAX_FLAG", "SUR_TEMP_MIN", "SUR_TEMP_MIN_FLAG", "RH_HR_AVG", "RH_HR_AVG_FLAG", "SOIL_MOISTURE_5", "SOIL_MOISTURE_10", "SOIL_MOISTURE_20", "SOIL_MOISTURE_50", "SOIL_MOISTURE_100", "SOIL_TEMP_5", "SOIL_TEMP_10", "SOIL_TEMP_20", "SOIL_TEMP_50", "SOIL_TEMP_100"] useCols =["UTC_DATE", "UTC_TIME", "LST_DATE", "LST_TIME", "T_CALC", "T_HR_AVG", "T_MAX", "T_MIN"] dtype_dic = {"UTC_DATE":str, "UTC_TIME":str, "LST_DATE":str, "LST_TIME":str} # Retrieve files from http site, this takes a long time def getDinosaur(): for year in range(2022, 1999, -1): try: urllib.request.urlretrieve(baseUrl.format(year) + baseFile.format(year), localDir + baseFile.format(year)) print(baseFile.format(year) + " : retrieved") except: print(baseFile.format(year) + " : not retrieved") def plotDino(): print("hi!") def getUSCRN(localDir=localDir, baseFile=baseFile): files = getFileNameList(localDir=localDir) df = pd.DataFrame({}) for file in files: print(file) #df = pd.read_csv(file, sep="\s+", names=colNames, na_values=["-9999.0"]) dfY = pd.read_csv(file, sep="\s+", names=colNames, usecols=[1,2,3,4,6,7,8,9,10,11], na_values=["-9999.0"], dtype=dtype_dic) df = pd.concat([df, dfY]) return df def getFileNameList(localDir=localDir): files = glob.glob(localDir + "*.txt") return files # Get station information as pandas dataframe # Data retrieved from https://www.ncei.noaa.gov/pub/data/uscrn/products/stations.tsv def getStations(): file = r"/home/mark/Downloads/stations.tsv" df = pd.read_csv(file, sep="\t") return df if __name__ == "__main__": df = getUSCRN(localDir=localDir, baseFile=baseFile) df["DT_Local"] = pd.to_datetime(df.LST_DATE, format='%Y%m%d') + ( pd.to_datetime(df.LST_TIME, format='%H%M') - pd.to_datetime('1900', format='%Y') ) dayList = list(set(df.LST_DATE)) pd.to_datetime(df.LST_DATE, format='%Y%m%d') dayList.sort() L = len(dayList) tMaxArray = np.empty(L) * np.nan tMinArray = np.empty(L) * np.nan tMeanArray = np.empty(L) * np.nan tAvgArray = np.empty(L) * np.nan tMedianArray = np.empty(L) * np.nan for k, date in enumerate(dayList): mask = (df.LST_DATE == date) & (df.T_MAX == df.T_MAX) if sum(mask) > 0: tMaxArray[k] = max(df.T_MAX[mask]) tMinArray[k] = min(df.T_MIN[mask]) tMeanArray[k] = (tMaxArray[k] + tMinArray[k]) / 2 tAvgArray[k] = np.mean(df.T_HR_AVG[mask]) tMedianArray[k] = np.median(df.T_HR_AVG[mask]) monthList = list([d[4:-2] for d in dayList]) dfDaily = pd.DataFrame( {"day":dayList, "tMax":tMaxArray, "tMin":tMinArray, "tMean":tMeanArray, "tAvg":tAvgArray, "tMed":tMedianArray, "date":pd.to_datetime(dayList, format='%Y%m%d'), "month":monthList, "mAvg":np.empty(L), "mMean":np.empty(L), "mMed":np.empty(L), "aAvg":np.empty(L), "aMean":np.empty(L), "aMed":np.empty(L)} ) for month in set(dfDaily.month): monMask = month == dfDaily.month dfDaily.mAvg[monMask] = np.mean(dfDaily.tAvg[monMask]) dfDaily.mMean[monMask] = np.mean(dfDaily.tMean[monMask]) dfDaily.mMed[monMask] = np.mean(dfDaily.tMed[monMask]) dfDaily.aAvg = dfDaily.tAvg - dfDaily.mAvg dfDaily.aMean = dfDaily.tMean - dfDaily.mMean dfDaily.aMed = dfDaily.tMed - dfDaily.mMed maskAvg = dfDaily.aAvg == dfDaily.aAvg maskMean = dfDaily.aMean == dfDaily.aMean maskMed = dfDaily.aMed == dfDaily.aMed days = np.arange(0,L) pAvg = np.polyfit(days[maskAvg], dfDaily.aAvg[maskAvg], 1) pMean = np.polyfit(days[maskMean], dfDaily.aMean[maskMean], 1) pMed = np.polyfit(days[maskMed], dfDaily.aMed[maskMed], 1) plt.clf() plt.subplot(2,1,1) plt.plot(dfDaily.date[14:-15], runAvg(dfDaily.aMean, 30), '-c', dfDaily.date[14:-15], runAvg(dfDaily.aMed, 30), '-g', dfDaily.date[14:-15], runAvg(dfDaily.aAvg, 30), '-b', dfDaily.date[maskMean], np.polyval(pMean, days[maskMean]), '-c', dfDaily.date[maskMed], np.polyval(pMed, days[maskMed]), '--g', dfDaily.date[maskAvg], np.polyval(pAvg, days[maskAvg]), ':b') plt.legend(["Mean Anomaly (Delta T={0:4.2f})".format(np.mean(dfDaily.tMean - dfDaily.tAvg)), "Median Anomaly (Delta T={0:4.2f})".format(np.mean(dfDaily.tMed - dfDaily.tAvg)), "Average Anomaly", "Mean Anomaly Trend: {0:4.2f}/decade".format(pMean[0]*365.2425*10), "Median Anomaly Trend: {0:4.2f}/decade".format(pMed[0]*365.2425*10), "Average Anomaly Trend: {0:4.2f}/decade".format(pAvg[0]*365.2425*10)]) plt.grid(axis="both") plt.xlabel("Year") plt.ylabel(u"Anomaly (\u00B0C)") plt.title("USCRN {:s} Station Anomalies (30 day average)".format(baseFile.split("_")[1])) plt.subplot(2,1,2) plt.plot(dfDaily.date[14:-15], runAvg(dfDaily.aAvg-dfDaily.aMean, 30), '-c', dfDaily.date[14:-15], runAvg(dfDaily.aAvg-dfDaily.aMed, 30), '-g') plt.legend(["Residual (Avg - Mean) Anomaly", "Residual (Avg - Median) Anomaly"]) plt.grid(axis="both") plt.xlabel("Year") plt.ylabel(u"Residual Anomaly (\u00B0C)")