from datetime import datetime, timedelta
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime
import re

# %%

# # prelim list
country_codes = ['AE', 'AU', 'BR', 'CA', 'AT', 'US', 'CZ', 'BE-NL', 'DE', 'DK']

#                   'ES', 'FI', 'FR', 'HK', 'HU', 'IE', 'IT', 'JP', 'KR', 'MX', 'MY', 'NL',
#                  'NO', 'NZ', 'PH', 'PL', 'RU', 'PT', 'SE', 'SG', 'TH', 'TR', 'LU', 'TW', 'UK']
country_list = [country.lower() for country in country_codes]

currency_codes = ['AED', 'AUD', 'BRL', 'CAD',
                  'EUR', 'USD', 'CZK', 'EUR', 'EUR', 'DKK']
#    'EUR', 'EUR', 'EUR', 'HKD', 'HUF', 'EUR',
#   'EUR', 'JPY', 'KRW', 'MXN', 'MYR', 'EUR', 'NOK', 'NZD', 'PHP', 'PLN', 'RUB', 'EUR', 'SEK', 'SGD', 'THB', 'TRY', 'EUR', 'TWD', 'GBP']


# ### Country and date selection ###

# # specify currency and country, can loop over list of countries and currencies.
# currency = "TWD"
# country = "tw"

start_date = "2016-01-01"
end_date = "2023-05-07"

# start_date = "2021-01-01"
# end_date = "2021-01-08"

###################################


def generate_urls(start_date, end_date, country):
    base_url = "https://url.za.m.mimecastprotect.com/s/AYthC2Rq5Wip45wKMunfAH5AWWl"
    date_format = "%Y%m%d"

    start_date_obj = datetime.strptime(start_date, "%Y-%m-%d")
    end_date_obj = datetime.strptime(end_date, "%Y-%m-%d")

    urls = []

    while start_date_obj <= end_date_obj:
        date_str = start_date_obj.strftime(date_format)
        urls.append(base_url.format(date_str, country))
        start_date_obj += timedelta(days=7)

    return urls


def get_prices(soup):

    prices_method_1 = soup.find_all("p", class_="pd-billboard-price")

    if prices_method_1:
        return prices_method_1, 1

    price_div_classes = ["ipad-pro price", "ipad-air price",
                         "ipad price", "ipad-air price"]
    prices_method_2 = [soup.find("div", class_=class_name)
                       for class_name in price_div_classes]

    if all(price is not None for price in prices_method_2):
        return prices_method_2, 2

    prices_method_3 = soup.find_all("p", class_=re.compile("dd-compare-price"))

    if prices_method_3:
        return prices_method_3, 3

    # New method
    new_price_classes = ["ipad-pro price", "ipad-air price",
                         "ipad-10thgen price", "ipad-9thgen price", "ipad-mini price"]
    prices_method_4 = [soup.find("p", class_=class_name) or soup.find(
        "div", class_=class_name) for class_name in new_price_classes]

    if all(price is not None for price in prices_method_4):
        return prices_method_4, 4

    return None, None


def get_products(soup):
    products_method_1 = soup.find_all(lambda tag: tag.name == 'div' and
                                      tag.get('class') and
                                      'pd-billboard-product' in tag.get('class'))

    if products_method_1:
        return products_method_1, 1

    products_method_2 = soup.find_all(lambda tag: tag.name == 'h3' and
                                      tag.get('class') and
                                      'dd-compare-product' in tag.get('class'))

    if products_method_2:
        return products_method_2, 2

    # New method
    new_product_classes = ["ipad-pro price", "ipad-air price",
                           "ipad-10thgen price", "ipad-9thgen price", "ipad-mini price"]
    products_method_3 = [class_name.split()[0]
                         for class_name in new_product_classes]

    formatted_product_names = {
        "ipad-pro": "iPad Pro",
        "ipad-air": "iPad Air",
        "ipad-10thgen": "iPad 10th Gen",
        "ipad-9thgen": "iPad 9th Gen",
        "ipad-mini": "iPad Mini"
    }

    if products_method_3:
        products_method_3 = [formatted_product_names[name]
                             for name in products_method_3]
        return products_method_3, 3

    return None, None


def extract_data(url, currency, country):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")

    date_match = re.search(r'web/(\d{8})/', url)
    date = datetime.strptime(date_match.group(
        1), '%Y%m%d').strftime('%Y-%m-%d')

    products, product_method = get_products(soup)

    if not products:
        print(f"Error: Products not found for {url}")
        return None

    prices, price_method = get_prices(soup)

    if not prices:
        print(f"Error: Prices not found for {url}")
        return None

    ipad_data = []

    for product, price in zip(products, prices):
        if product_method == 1:
            model_element = product.find(
                "p", class_=re.compile("pd-billboard-subheader"))
        elif product_method == 2:
            model_element = product.find(
                "span", class_=re.compile("block-link"))
        elif product_method == 3:
            model_element = product

        if model_element:
            model = model_element.text.strip() if product_method != 3 else model_element
            price_text = price.text.strip().replace('\n', '').replace(u'\xa0', ' ')

            if currency == "EUR":
                price_value = re.search(
                    r'\d{1,3}(?:[ ,]?\d{3})*(?:\.\d{2})?', price_text).group(0)
                price_value = float(
                    price_value.replace(',', '').replace(' ', ''))
                price_value = round(price_value)

            else:
                price_value = re.search(
                    r'\d{1,3}(?:[ ,.]?\d{1,3})*', price_text).group(0)
                price_value = int(price_value.replace(
                    ',', '').replace(' ', '').replace('.', ''))

            ipad_data.append({
                "Date": date,
                "Currency": currency,
                "Country": country,
                "Model": model,
                "Price": price_value
            })

    df = pd.DataFrame(ipad_data)

    df_pivot = df.pivot_table(index=['Date', 'Currency', 'Country'],
                              columns='Model',
                              values='Price',
                              aggfunc='first').reset_index()

    return df_pivot


for country, currency in zip(country_list, currency_codes):
    print(f"Processing for country: {country.upper()}, currency: {currency}")

    urls = generate_urls(start_date, end_date, country)

    dataframes = []
    last_known_currency = None
    for url in urls:
        print(f"Processing: {url}")
        try:
            df = extract_data(url, currency, country)
            if df is not None:
                last_known_currency = df.loc[0, 'Currency']
                dataframes.append(df)
        except Exception as e:
            print(f"Error while processing: {url}")
            print(e)

    combined_df = pd.concat(dataframes, ignore_index=True)
    print(combined_df)

    csv_filename = f"ipad_prices_{country}_test.csv"
    combined_df.to_csv(csv_filename, index=False)

# %%
