#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Jun 22 22:08:12 2021

@author: mark
"""

import pandas as pd
import numpy as np
from scipy import interpolate
import urllib
from matplotlib.dates import date2num
from scipy.interpolate import interp1d

def runAvg(series, N):
    return np.convolve(series, np.ones(N)/N, mode='valid')

def ctrm(series, N=15, fs=12):
    return runAvg( runAvg( runAvg(series, int(N*fs)), int(5*N*fs/6)), int(4*N*fs/6) )

def getPDO1850():
    url = r"https://www.ncdc.noaa.gov/teleconnections/pdo/data.csv"
    url = r"//home/mark/Downloads/data.csv"
    df = pd.read_csv(url, skiprows=1)
    dateSeries = np.fix(df["Date"]/100) + (np.mod(df["Date"],100)-0.5)/12
    tempSeries = np.asarray(df["Value"])
    return dateSeries, tempSeries

def getSOI():
    url = r"https://www.ncdc.noaa.gov/teleconnections/enso/indicators/soi/data.csv"
    df = pd.read_csv(url, skiprows=1)
    dateSeries = np.fix(df["Date"]/100) + (np.mod(df["Date"],100)-0.5)/12
    tempSeries = np.asarray(df["Value"])
    return dateSeries, tempSeries

def getMEIv2():
    url = r"https://psl.noaa.gov/enso/mei/data/meiv2.data"
    df = pd.read_csv(url, sep="\s+", header=None, skiprows=1, skipfooter=3, engine="python", na_values="-999.00")
    monthlyData = df.values[:,1:13]
    monthlySeries = monthlyData.reshape(1,monthlyData.size)[0]
    monthlySeries = monthlySeries[monthlySeries == monthlySeries]   # Remove nan
    yearsByMonth = df.values[0,0] + np.arange(monthlySeries.size) / 12.
    return yearsByMonth, monthlySeries

def getGissLoti():
    csvUrl = r"https://data.giss.nasa.gov/gistemp/tabledata_v4/GLB.Ts+dSST.csv" # LOTI v4
    df = pd.read_csv(csvUrl, sep=',', header=1, na_values=['***'] )
    monthlyData = df.values[:,1:13]
    monthlySeries = monthlyData.reshape(1,monthlyData.size)[0]
    monthlySeries = monthlySeries[monthlySeries == monthlySeries]   # Remove nan
    yearsByMonth = df["Year"][0] + np.arange(monthlySeries.size) / 12.
    return yearsByMonth, monthlySeries
    
def getAMO():
    url = r"https://climatedataguide.ucar.edu/sites/default/files/amo_monthly.txt"
    df = pd.read_csv(url, sep="\s+", header=None, skiprows=1, na_values=['-999'])
    monthlyData = df.values[:,1:13]
    monthlySeries = monthlyData.reshape(1,monthlyData.size)[0]
    monthlySeries = monthlySeries[monthlySeries == monthlySeries]   # Remove nan
    yearsByMonth = df.values[0,0] + np.arange(monthlySeries.size) / 12.
    return yearsByMonth, monthlySeries

def getHadCrut4():
    url = r"https://www.metoffice.gov.uk/hadobs/hadcrut4/data/current/time_series/HadCRUT.4.6.0.0.monthly_ns_avg.txt"
    url = r"/home/mark/Downloads/HadCRUT.4.6.0.0.monthly_ns_avg.txt"
    df = pd.read_csv(url, sep="\s+", header=None)
    yearsByMonth = np.asarray([int(D.split("/")[0]) + (int(D.split("/")[1])-0.5)/12 for D in df.values[:,0]])
    monthlySeries = df.values[:,1]
    return yearsByMonth, monthlySeries

def getUAH(region = "Globe"):
    filename = '/home/mark/Downloads/uahncdc_lt_6.0.txt'
    filename = 'http://vortex.nsstc.uah.edu/data/msu/v6.0/tlt/uahncdc_lt_6.0.txt'
    df = pd.read_table(filename, delim_whitespace=True, skipfooter=12, engine='python')
    nDate = df.Year + (df.Mo-0.5)/12
    nRegion = df[region]
    return nDate, nRegion

def getRSStlt():
    url = r"http://images.remss.com/msu/graphics/TLT_v40/time_series/RSS_TS_channel_TLT_Global_Land_And_Sea_v04_0.txt"
    df = pd.read_table(url, delim_whitespace=True, skiprows=5, 
                       names=["Year","Mo","Globe"], na_values=["-99.9000"])
    nDate = (df.Year + (df.Mo-0.5)/12)[~df.Globe.isna()]
    nGlobe = df.Globe[~df.Globe.isna()]
    
    return nDate, nGlobe

def baseline(date, temp, begin=1951, end=1981):
    mask = (date >= begin) & (date < end)
    mean = np.mean(temp[mask])
    return temp-mean, mean
    
# El Nino Oceanic Niño Index (ONI)
class getONI:
    """
    Retrieves El Nino Oceanic Niño Index (ONI) data
        Methods:
            interp1d(dates) - returns interpolated anomaly for array-like dates
            
            extrapolate(dates) - like interp1d, but extrapolates first & last value
            
        Data:
            df - Pandas dataframe of retrieved data            
    """
    def __init__(self):
        self.__source__ = r"https://www.cpc.ncep.noaa.gov/data/indices/oni.ascii.txt"
        self.df = pd.read_csv(self.__source__, sep="\s+")
        self.df["yearDec"] = self.df.YR[0] + (np.arange(0,len(self.df.YR))+0.5) / 12 
        self.yearDec = self.df.yearDec.values
        self.__dateRange__ = (self.yearDec[0], self.yearDec[-1])
        self.interp1d  = interpolate.interp1d(self.df.yearDec, self.df.ANOM, kind="linear")
        
    def extrapolate(self, dates):
        d = np.minimum(np.maximum(dates,self.__dateRange__[0]),self.__dateRange__[1])
        y = self.interp1d(d)
        return y

class getAOD:
    """
    Retrieves Aerosol Optical Density (AOD) data
    """
    def __init__(self):
        self.url = r"https://data.giss.nasa.gov/modelforce/strataer/reff_line.txt"
        self.df = pd.read_csv(self.url, sep="\s+", skiprows=4, skipfooter=1, 
                              names=["year","global","NH","SH"], engine="python")
        self.__dateRange__ = (self.df.year.values[0], self.df.year.values[-1])
        self.interp1d  = interpolate.interp1d(self.df.year, self.df["global"], kind="linear")

    def extrapolate(self, dates):
        d = np.minimum(np.maximum(dates,self.__dateRange__[0]),self.__dateRange__[1])
        y = self.interp1d(d)
        return y

class getAnnualEmissions:
    # Get Global Carbon Project Emissions Data from https://www.icos-cp.eu/global-carbon-budget-2019
    #emitFile = r"~/Downloads/Global_Carbon_Budget_2019v1.0.xlsx"
    #emit = pd.read_excel(emitFile, sheet_name="Global Carbon Budget", skiprows=18)
    def __init__(self):
        self.emitFile = r"~/Downloads/Global_Carbon_Budget_2020v1.0.xlsx"
        self.emit = pd.read_excel(emitFile, sheet_name="Global Carbon Budget", skiprows=20, skipfooter=2)
        self.eMask = emit.Year >= dates[0]
        self.interp1d  = interpolate.interp1d(self.emit.Year, self.emitdf["global"], kind="linear")

    def fEmit(self, dates):
        return "Hi"
        

    
def pdoBiondi():
    url = r"https://www.ncei.noaa.gov/pub/data/paleo/treering/reconstructions/pdo.txt"
    df = pd.read_csv(url, sep="\s+", skiprows=58)
    return df['YEAR'], df['E.PDO']

def pdoDarrigo2001():
    url = r"https://www.ncei.noaa.gov/pub/data/paleo/treering/reconstructions/pdo-darrigo2001.txt"
    df = pd.read_csv(url, sep="\s+", skiprows=86, usecols=[0,1], header=None)
    return df.values[:,0], df.values[:,1]

def pdoDarrigo2006(): 
    url = r"https://www.ncei.noaa.gov/pub/data/paleo/treering/reconstructions/pdo-darrigo2006.txt"
    with urllib.request.urlopen(url) as f:
        lines = f.readlines()
    dates = [int(row[:4]) for row in lines[74:-17]]
    reconst = [float(row[17:-1]) for row in lines[74:-17]]
    return dates, reconst

def pdoMacDonald2005():
    url = r"/home/mark/Downloads/pdo-macdonald2005.txt"
    df = pd.read_csv(url, sep="\s+", skiprows=68)
    return df.values[:,0], df.values[:,1]

def getAIRS():
    url = r"https://data.giss.nasa.gov/gistemp/tabledata_v4/T_AIRS/GLB.Ts+dSST.csv"
    df = pd.read_csv(url, na_values=["*******"], skiprows=46, header=None)
    monthlyData = df.values[:,1:13]
    monthlySeries = monthlyData.reshape(1,monthlyData.size)[0]
    yearsByMonth = df.values[0][0] + np.arange(monthlySeries.size) / 12. + 1/24
    mask = monthlySeries == monthlySeries   # Remove nan
    return yearsByMonth[mask], monthlySeries[mask]

def getNOAAlo():
    url = r"https://www.ncei.noaa.gov/data/noaa-global-surface-temperature/v5/access/timeseries/aravg.mon.land_ocean.90S.90N.v5.0.0.202106.asc"
    names = ["year", "month", "anomaly", "total_variance", "high_freq_var", 
             "low_freq_var", "bias_var", "diag_var_a", "diag_var_b", "diag_var_c"]
    df = pd.read_csv(url, sep="\s+", names=names)
    years = df.year + df.month/12 - 1/24
    temps = df.anomaly
    return years, temps

def annualAvgDec(dates, data):
    yearList = list(set(np.fix(dates)))
    yearList.sort()
    avgList = np.zeros(len(yearList))
    for k,year in enumerate(yearList):
        mask = (year == np.fix(dates))
        avgList[k] = np.mean(data[mask])
    return np.array(yearList), avgList

def date2year(dateSeries):
    return date2num(dateSeries) / 365.2425 + 1

def detrend(series):
    p = np.polyfit(range(series.size), series, 1)
    return series - np.polyval(p, range(series.size))

def annualAvg(dateSeries, dataSeries):
    years = list(set([d.year for d in dateSeries]))
    years.sort()
    
    
# Class to calculate anomaly baseline and provide method as function of date
class anomalyBaseline():
    def __init__(self, date, series, begin=2011, end=2021):
        self.base = {}
        yearMask = (date.dt.year <= begin) & (date.dt.year < end)
        for month in range(1,13):
            monthMask = (month == date.dt.month)
            self.base[month] = np.mean(series[monthMask & yearMask])
            
# Get MLO weekly data
def getMLO():
    mloWeekFile = r"ftp://aftp.cmdl.noaa.gov/products/trends/co2/co2_weekly_mlo.txt"
    mloWeekFile = r"/home/mark/Downloads/co2_weekly_mlo.csv"

    mlo = pd.read_csv(mloWeekFile, skiprows=47, na_values=["-999.99"])
    mask = mlo.average == mlo.average
    dates = mlo.decimal.values[mask]
    fPpm = interp1d(dates, mlo.average[mask], kind="linear")
    ppm = fPpm(mlo.decimal)
    return mlo.decimal, ppm