"""
Binance 历史K线数据下载器
用于回测时从Binance公开API下载1H K线数据
"""

import os
import time
import requests
import pandas as pd
from datetime import datetime, timezone
from typing import Optional


BINANCE_KLINE_URL = "https://api.binance.com/api/v3/klines"
DATA_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), "historical_data")


def download_klines(
    symbol: str,
    interval: str = "1h",
    start_date: str = "2017-08-01",
    end_date: Optional[str] = None,
    save: bool = True,
) -> pd.DataFrame:
    """
    从Binance下载历史K线数据

    Args:
        symbol: 交易对, 如 "BTCUSDT"
        interval: K线周期, 默认 "1h"
        start_date: 起始日期 "YYYY-MM-DD"
        end_date: 结束日期, 默认到当前
        save: 是否保存为CSV
    Returns:
        DataFrame with columns: timestamp, open, high, low, close, volume
    """
    start_ts = int(datetime.strptime(start_date, "%Y-%m-%d")
                   .replace(tzinfo=timezone.utc).timestamp() * 1000)
    if end_date:
        end_ts = int(datetime.strptime(end_date, "%Y-%m-%d")
                     .replace(tzinfo=timezone.utc).timestamp() * 1000)
    else:
        end_ts = int(datetime.now(timezone.utc).timestamp() * 1000)

    all_data = []
    current = start_ts
    print(f"Downloading {symbol} {interval} from {start_date} ...")

    while current < end_ts:
        params = {
            "symbol": symbol,
            "interval": interval,
            "startTime": current,
            "endTime": end_ts,
            "limit": 1000,
        }
        try:
            resp = requests.get(BINANCE_KLINE_URL, params=params, timeout=30)
            resp.raise_for_status()
            data = resp.json()
        except Exception as e:
            print(f"  Request error: {e}, retrying in 5s ...")
            time.sleep(5)
            continue

        if not data:
            break

        all_data.extend(data)
        current = data[-1][6] + 1  # close_time + 1
        print(f"  ... {len(all_data)} bars downloaded", end="\r")
        time.sleep(0.2)  # rate limit

    print(f"\n  Total: {len(all_data)} bars for {symbol}")

    if not all_data:
        return pd.DataFrame()

    df = pd.DataFrame(all_data, columns=[
        'open_time', 'open', 'high', 'low', 'close', 'volume',
        'close_time', 'quote_volume', 'trades', 'taker_buy_vol',
        'taker_buy_quote_vol', 'ignore'
    ])

    df['timestamp'] = pd.to_datetime(df['open_time'], unit='ms', utc=True)
    for col in ['open', 'high', 'low', 'close', 'volume']:
        df[col] = df[col].astype(float)

    df = df[['timestamp', 'open', 'high', 'low', 'close', 'volume']].copy()
    df = df.drop_duplicates(subset='timestamp').sort_values('timestamp').reset_index(drop=True)

    if save:
        os.makedirs(DATA_DIR, exist_ok=True)
        path = os.path.join(DATA_DIR, f"{symbol}_{interval}.csv")
        df.to_csv(path, index=False)
        print(f"  Saved to {path}")

    return df


def load_klines(symbol: str, interval: str = "1h") -> pd.DataFrame:
    """从本地CSV加载K线数据"""
    path = os.path.join(DATA_DIR, f"{symbol}_{interval}.csv")
    if not os.path.exists(path):
        raise FileNotFoundError(f"No data file: {path}. Run download first.")
    df = pd.read_csv(path, parse_dates=['timestamp'])
    df['timestamp'] = pd.to_datetime(df['timestamp'], utc=True)
    return df


def download_all_coins(coins_config: dict, end_date: Optional[str] = None):
    """下载所有配置币种的1H数据"""
    for symbol, cfg in coins_config.items():
        download_klines(symbol, "1h", cfg.data_start, end_date)
        time.sleep(1)
