# Import libraries
#Import packages
import pandas as pd
import numpy as np
import time
import datetime
from pycoingecko import CoinGeckoAPI
#from utils import slice
# Get API for CoinGecko
#cg = CoinGeckoAPI()
from dotenv import load_dotenv

from bs4 import BeautifulSoup
import requests
from pytrends.request import TrendReq
pytrends = TrendReq(hl='en-US')
from pytrends import dailydata
import yfinance as yf
import json
import prettytable
import os
load_dotenv()

COINGECKO_API_KEY=os.environ["COINGECKO_API_KEY"]
# Historical crypto data
def scrape_historical_series(coin_name,date_start,date_end):
    import datetime
    """ Scrape historical series on the sample of coins.

        Args:
            coin_names(list): List of coins we will use for training.
            date_start(list): List of values for Year_start,Month_start,Day_start.
            date_end(list): List of values for Year_end,Month_end,Day_end.
        Returns:
            Dataframe with the evolution of prices, market capitalizaiton, and total volume over time, for each respective currency.
        """

    df_ts_coins1=pd.DataFrame()

    #DATE definitions

    date_time = datetime.datetime(int(date_start[0]),int(date_start[1]),int(date_start[2]))
    date_time_now = datetime.datetime(int(date_end[0]),int(date_end[1]),int(date_end[2]))
    unix_past=time.mktime(date_time.timetuple()) #change the date format into unix for scraping
    unix_now=time.mktime(date_time_now.timetuple())
    past=datetime.datetime(int(date_start[0]),int(date_start[1]),int(date_start[2])).strftime('%Y-%m-%d')
    now=datetime.datetime(int(date_end[0]),int(date_end[1]),int(date_end[2])).strftime('%Y-%m-%d')
    datum_range=pd.date_range(start=past,end=now, freq='D')
    
    #empty lists
    unix_all=[]
    coins_names=[]

    #create date variable
    for val in datum_range:
        unix_all=np.append(unix_all,time.mktime(val.timetuple()))
    
    for coin in pd.unique(coin_name):
        time.sleep(5)
        url = f"https://api.coingecko.com/api/v3/coins/{coin.lower()}/market_chart/range?vs_currency=usd&from={unix_past}&to={unix_now}"

        headers = {
            "accept": "application/json",
            "x-cg-demo-api-key": COINGECKO_API_KEY
        }

        response = requests.get(url, headers=headers)
        data=response.json()
        #data=cg.get_coin_market_chart_range_by_id(id=coin.lower(),vs_currency='usd',include_market_cap='true', include_24hr_vol='true', from_timestamp=unix_past,to_timestamp=unix_now)
        if len(data)>0:
            prices=pd.DataFrame(data['prices'],columns=['date','prices'])
            market=pd.DataFrame(data['market_caps'],columns=['date','market_caps'])
            volume=pd.DataFrame(data['total_volumes'],columns=['date','total_vol'])
            ts_coins_cut=pd.concat([prices,market.iloc[:,1],volume.iloc[:,1]],axis=1)

            #create id variable for each coin
            coinn=np.repeat(coin,len(ts_coins_cut)) 
            coins_names=np.append(coins_names,coinn)
            
            #make daily data from hourly
            ts_coins_cut['id']=coinn
            date_all=[]

            #create date variable
            import datetime
            for val in ts_coins_cut['date']:
                date_all=np.append(date_all,((datetime.datetime.fromtimestamp(int(val)/1000)).strftime('%m/%d/%y, %H:%M:%S')))
            dates=pd.to_datetime(date_all, format='%m/%d/%y, %H:%M:%S')
            
            #set date as an index to aggreggate hourly data into daily
            ts_coins_cut['dates']=dates
            ts_coins_cut=ts_coins_cut.set_index('dates')
            prices=ts_coins_cut.pop('prices')
            ts_coins_cut=ts_coins_cut.groupby([pd.Grouper(freq='D'), 'id']).mean()
            prices1=prices.groupby([pd.Grouper(freq='D')]).mean()
            #after you aggreggated data change the index back
            prices1=prices1.reset_index()
            ts_coins_cut.reset_index(inplace=True)

            ts_coins_cut.insert(2,'prices',prices1.iloc[:,1])
            #move the date column to different position
            ts_coins_cut=ts_coins_cut.drop(columns=['date']) 
            ts_coins_cut.insert(2,'date',unix_all[0:len(ts_coins_cut)])
            df_ts_coins1=pd.concat([df_ts_coins1,ts_coins_cut]) #concat the chunk with the selected variables across all currencies

        
        else:
            df_ts_coins1=pd.DataFrame()
        df_ts_coins1=df_ts_coins1.drop(columns=['dates'])
    return df_ts_coins1

# 2. Macro variables, CLI

def scrape_cli(past,today):
    """Scrape data on leading indicator for USA.
        Args:
            past(date): Date for which you want to start scraping.
            today(date): Date for which you want to end scraping.
        Returns:
            Dataframe with CLI and dates.
 """
    countries=['USA'] #,'OECDE','OECD','NMEC']
    past_date=past.strftime('%Y-%m')
    today_date=today.strftime('%Y-%m')
    clis=[]
    bclis=[]
    names=[]
    datas_country=pd.DataFrame()
    datas1=pd.DataFrame()
    types=['CLI'] #,'BCLI']
    for type in types:
        print(type)
        '''Scrape OECD data and create dataset in the form of time series where variables are CLI and BCLI for each country'''
        for country in countries:
            # Scrape data
            # if type=='BCLI':
            #     mainpage=requests.get(f'https://stats.oecd.org/restsdmx/sdmx.ashx/GetData/MEI_CLI/BSCICP03.{country}.M/all?startTime={past}&endTime={today}')
            if type=='CLI':
                mainpage=requests.get(f'https://stats.oecd.org/restsdmx/sdmx.ashx/GetData/MEI_CLI/CSCICP03.{country}.M/all?startTime={past_date}&endTime={today_date}')
            soup=BeautifulSoup(mainpage.content,'xml') #'html.parser')
            whatis=soup.find_all("ObsValue")
            whatis_key=soup.find_all("ObsKey")
            country=([(str(whatis_key[i]).split('"REF_AREA" value="')[1][:3]) for i in range(len(whatis))])
            dates=[pd.to_datetime(str(whatis_key[i]).split('"TIME_PERIOD" value="')[1][:7]) for i in range(len(whatis))]
            measure=[(str(whatis_key[i]).split('"MEASURE" value="')[1][:7][:-2]) for i in range(len(whatis))]
            values=[float(str(whatis[i]).split('value="')[1][0:-4]) for i in range(len(whatis))]
            df_cli=pd.DataFrame({'date':dates,'country':country,'measure':measure,type:values})
            df_cli.index=pd.to_datetime(df_cli['date'])
            df_cli=df_cli.loc[df_cli['country']=='USA']['CLI'].astype('float').resample('M').mean()
    return df_cli

def scrape_cpi_employment():
    """Scrape CPI and employment data."""

    headers = {'Content-type': 'application/json'}
    variables=['CUUR0000SA0','LNS12000000']
    data = json.dumps({"seriesid": variables,"startyear":"2024", "endyear":"2024"})
    p = requests.post('https://api.bls.gov/publicAPI/v2/timeseries/data/', data=data, headers=headers)
    json_data = json.loads(p.text)
    year_all=[]
    period_all=[]
    value_all=[]
    series_id=[]
    if len(json_data['Results'])>0:
        for series in json_data['Results']['series']:
            x=prettytable.PrettyTable(["series id","year","period","value","footnotes"])
            seriesId = series['seriesID']
            for item in series['data']:
                year = item['year']
                period = item['period']
                value = item['value']
                footnotes=""
                for footnote in item['footnotes']:
                    if footnote:
                        footnotes = footnotes + footnote['text'] + ','
                if 'M01' <= period <= 'M12':
                    x.add_row([seriesId,year,period,value,footnotes[0:-1]])
                year_all=np.append(year_all,year)
                period_all=np.append(period_all,period)
                value_all=np.append(value_all,value)
                if seriesId=='CUUR0000SA0':
                    series_id=np.append(series_id,'CPI')
                if seriesId=='LNS12000000':
                    series_id=np.append(series_id,'Employment')
                

        date=[(pd.to_datetime(f"{year_all[i]}'-'{int(period_all[i][-2:])}")) for i in range(len(year_all))]
        df_cpi=pd.DataFrame({'date':date,'value':value_all})
        df_cpi['series_id']=series_id
        df_cpi.set_index('date',inplace=True)
        df_cpi=pd.concat([df_cpi.loc[df_cpi['series_id']=='CPI'],df_cpi.loc[df_cpi['series_id']=='Employment']],axis=1)
        df_cpi=df_cpi.drop(columns='series_id')
        df_cpi.columns=['CPI','Employment']
    else:
        df_cpi=pd.DataFrame()
    return df_cpi

def scrape_google_trends(currency, currency_short):
    curr_neni=[]
    names_values=[currency]
    names_short=[currency_short]

    from datetime import date
    today = date.today()
    Day_end = today.strftime("%d")
    Month_end = today.strftime("%m")
    Year_end = today.strftime("%Y")
    Hour_end=21
    Minute_end=20

    past=today-datetime.timedelta(days=30)
    Day_start = past.strftime("%d")
    Month_start = past.strftime("%m")
    Year_start = past.strftime("%Y")

    date_start=[Year_start,Month_start,Day_start]
    date_end=[Year_end,Month_end,Day_end]
    date_all1=pd.date_range(past,today)
    #data_all1=np.repeat(0,len(date))
    keywords = []
    google_data=pd.DataFrame()
    for run_name in list(names_values):
        '''Scrape Google trends and create one time-series in the form of concated time series across all currencies'''
        #google_old_slice=slice(run_name,google_old_for_slice,google_old_for_slice['id'])
        run=list(names_values).index(run_name)
        time.sleep(5)
        try:
            data=dailydata.get_daily_data(str(run_name),int(Year_start), int(Month_start), int(Year_end), int(Month_end),verbose=False) #kw_list, 2021, 10, 2021, 11, geo = '',verbose=False,wait_time=5
            data1=data.iloc[:,4]
        except:
            try:
                time.sleep(5)
                new_index=list(names_values).index(run_name)
                data = dailydata.get_daily_data(word=names_short[new_index],start_year=Year_start, start_mon=Month_start, stop_year=Year_end, stop_mon=Month_end,verbose=False) #kw_list, 2021, 10, 2021, 11, geo = '',verbose=False,wait_time=5
                data1=data.iloc[:,4]
            except:
                pass
                curr_neni=np.append(curr_neni,run_name)
                #print(f'no currency {run_name} to scrape in google trends')
                data1=(np.repeat(0,len(date_all1)))
        data1=pd.DataFrame({'google_trend':pd.Series(data1)})
        data1.insert(0,'id',np.repeat(run_name,len(data1)))
        #google_tog=pd.concat([google_old_slice.set_index('date'),data1],axis=0)
        google_data=pd.concat([google_data,data1],axis=0)

    #change index from date to date_new to match old_dataset
    google_data.reset_index(inplace=True)

    if int(np.mean(data1['google_trend']))==0==0:
        google_data['date_new']=date_all1
        google_data.set_index('date_new',inplace=True)
    else:
        google_data.columns=np.append('date_new',google_data.columns[1:])
        google_data.set_index('date_new',inplace=True)
    return google_data

def scrape_stocks(past,today):
    # Set dates in the form needed for scraping
    date_old = past
    date_new = today
    #date_new = date.today().strftime("%Y-%m-%d")

    df=pd.DataFrame()
    #the codes for variables we are going to scrape
    codes=['^GSPC','GC=F','EURUSD%3DX','^TNX']
    codes_names=['GSPC','GC=F','EURUSD','TNX']

    for code in codes:
        '''Scrape Yahoo finance and create dataset with time series for all the financial variables'''
        code_index=codes.index(code)
        code_name=codes_names[code_index]
        df_code = yf.download(code,start=date_old, end=date_new,progress=False) #^IXIC print
        df_code=pd.DataFrame(df_code)
        df_code=df_code.reset_index()
        df_code_ts=df_code.iloc[:,1]
        df_code_ts=df_code_ts.rename(code_name)
        df = pd.concat([df,df_code_ts],axis=1)
    df_time=df_code.iloc[:,0]

    #set the index to date_new
    df.insert(0,'date',df_time)
    df_finance=df.dropna()
    df_finance.set_index('date',inplace=True)
    df_finance.index=pd.to_datetime(df_finance.index)

    #combine dataset with the old one
    return df_finance