"""
模拟真实BTC/ETH走势的测试数据生成器

基于BTC 2017-2025年的实际周期结构:
- 2017 Q3-Q4: 牛市 $4K → $20K
- 2018: 熊市 $20K → $3.2K
- 2019: 复苏 $3.2K → $14K → 回落$7K
- 2020 Q1-Q3: COVID崩盘+复苏 $7K→$3.8K→$12K
- 2020 Q4-2021 Q2: 大牛 $12K → $65K
- 2021 Q3: 回调 $65K → $30K
- 2021 Q4: 二次高点 $30K → $69K
- 2022: 大熊 $69K → $15.5K
- 2023: 慢牛 $15.5K → $44K
- 2024: ETF牛市 $44K → $73K → 回调 → $100K
- 2025 Q1: $100K → $80K → $90K

每个阶段的关键: 跌幅、盒子（横盘）、放量突破
"""

import numpy as np
import pandas as pd
from typing import List, Tuple


def _generate_phase(
    start_price: float,
    end_price: float,
    n_hours: int,
    volatility: float = 0.003,
    box_zones: List[Tuple[float, float, int]] = None,
) -> np.ndarray:
    """
    生成单个阶段的小时价格

    box_zones: [(start_frac, end_frac, box_duration_hours)]
      在阶段的start_frac~end_frac位置插入横盘盒子
    """
    np.random.seed(abs(hash(f"{start_price}{end_price}{n_hours}")) % 2**31)
    prices = np.zeros(n_hours)
    prices[0] = start_price

    # 基础漂移
    log_return = np.log(end_price / start_price)
    drift_per_hour = log_return / n_hours

    for i in range(1, n_hours):
        frac = i / n_hours
        local_drift = drift_per_hour

        # 在盒子区域压缩drift到接近0
        if box_zones:
            for bstart, bend, _ in box_zones:
                if bstart <= frac <= bend:
                    local_drift *= 0.05  # 横盘时几乎不动

        noise = np.random.normal(0, volatility)
        prices[i] = prices[i-1] * np.exp(local_drift + noise)

    # 确保终点正确
    prices = prices * (end_price / prices[-1]) ** (np.linspace(0, 1, n_hours) ** 3)

    return prices


def _add_volume_pattern(n: int, base_vol: float = 500,
                        spike_positions: List[float] = None) -> np.ndarray:
    """生成带有放量特征的成交量"""
    np.random.seed(42 + n)
    vol = np.random.lognormal(np.log(base_vol), 0.3, n)

    # 在指定位置加放量
    if spike_positions:
        for pos in spike_positions:
            idx = int(pos * n)
            for j in range(max(0, idx-5), min(n, idx+10)):
                vol[j] *= np.random.uniform(2.5, 5.0)

    return vol


def generate_btc_history(start_date: str = '2017-08-01',
                         end_date: str = '2026-03-31') -> pd.DataFrame:
    """
    生成模拟BTC 1H K线数据

    基于真实价格结构，包含:
    - 大跌后的盒子（横盘区域）
    - 突破时的放量
    - 真实的波动率变化
    """
    phases = [
        # (end_price, hours, vol, box_zones, vol_spikes)
        # 2017 Aug-Dec: 牛市
        (4000, 5000, 200, 0.006, None, [0.3, 0.6, 0.9]),     # $4K→$20K
        # 这里分细一点

    ]

    # 更精确的分段定义
    segments = [
        # (start_price, end_price, start_date, end_date, volatility, has_box_and_breakout)
        # 2017
        (4000,   6500,  '2017-08-01', '2017-10-01', 0.004, False),
        (6500,   7800,  '2017-10-01', '2017-11-01', 0.005, False),
        (7800,   19500, '2017-11-01', '2017-12-17', 0.008, False),
        (19500,  13500, '2017-12-17', '2018-01-01', 0.010, False),
        # 2018 大熊
        (13500,  11500, '2018-01-01', '2018-02-06', 0.008, False),
        (11500,  8500,  '2018-02-06', '2018-03-30', 0.006, True),   # 盒子
        (8500,   6200,  '2018-03-30', '2018-06-30', 0.005, True),   # 盒子
        (6200,   3200,  '2018-06-30', '2018-12-15', 0.004, True),   # 盒子
        # 2019 复苏
        (3200,   3500,  '2018-12-15', '2019-03-01', 0.003, True),   # 底部盒子!
        (3500,   5500,  '2019-03-01', '2019-04-02', 0.005, False),  # 突破
        (5500,   8500,  '2019-04-02', '2019-05-15', 0.006, False),
        (8500,   13800, '2019-05-15', '2019-06-26', 0.007, False),
        (13800,  9500,  '2019-06-26', '2019-09-30', 0.005, True),   # 回调+盒子
        (9500,   7200,  '2019-09-30', '2019-12-31', 0.004, True),   # 盒子
        # 2020
        (7200,   9200,  '2020-01-01', '2020-02-14', 0.004, False),
        (9200,   3800,  '2020-02-14', '2020-03-16', 0.015, False),  # COVID崩盘
        (3800,   5000,  '2020-03-16', '2020-04-15', 0.006, True),   # 底部盒子!
        (5000,   9500,  '2020-04-15', '2020-07-20', 0.004, True),   # 盒子→突破
        (9500,   10500, '2020-07-20', '2020-10-01', 0.003, True),   # 盒子!
        (10500,  19800, '2020-10-01', '2020-12-01', 0.005, False),  # 突破+大涨
        (19800,  29000, '2020-12-01', '2021-01-08', 0.007, False),
        # 2021 牛市
        (29000,  42000, '2021-01-08', '2021-02-21', 0.006, False),
        (42000,  58000, '2021-02-21', '2021-03-13', 0.008, False),
        (58000,  64900, '2021-03-13', '2021-04-14', 0.006, False),  # ATH1
        (64900,  30000, '2021-04-14', '2021-06-22', 0.008, True),   # 大回调
        (30000,  35000, '2021-06-22', '2021-07-20', 0.005, True),   # 盒子!
        (35000,  52000, '2021-07-20', '2021-09-07', 0.005, False),  # 突破
        (52000,  41000, '2021-09-07', '2021-09-29', 0.007, False),
        (41000,  69000, '2021-09-29', '2021-11-10', 0.006, False),  # ATH2
        (69000,  46000, '2021-11-10', '2021-12-31', 0.007, False),
        # 2022 大熊
        (46000,  38000, '2022-01-01', '2022-02-28', 0.005, True),   # 盒子
        (38000,  30000, '2022-02-28', '2022-05-09', 0.004, True),   # 盒子
        (30000,  18000, '2022-05-09', '2022-06-18', 0.010, False),  # LUNA崩盘
        (18000,  20000, '2022-06-18', '2022-08-15', 0.003, True),   # 底部盒子
        (20000,  24500, '2022-08-15', '2022-08-25', 0.005, False),  # 小反弹
        (24500,  19500, '2022-08-25', '2022-09-20', 0.005, False),
        (19500,  20500, '2022-09-20', '2022-11-05', 0.003, True),   # 盒子
        (20500,  15500, '2022-11-05', '2022-11-21', 0.012, False),  # FTX崩盘
        (15500,  16500, '2022-11-21', '2022-12-31', 0.003, True),   # 底部盒子!
        # 2023 慢牛
        (16500,  21000, '2023-01-01', '2023-01-20', 0.005, False),  # 突破
        (21000,  23500, '2023-01-20', '2023-02-20', 0.004, False),
        (23500,  28500, '2023-02-20', '2023-03-20', 0.005, False),
        (28500,  25000, '2023-03-20', '2023-06-15', 0.003, True),   # 盒子
        (25000,  31500, '2023-06-15', '2023-07-13', 0.005, False),  # 突破
        (31500,  26000, '2023-07-13', '2023-09-11', 0.003, True),   # 盒子
        (26000,  35000, '2023-09-11', '2023-10-24', 0.005, False),  # 突破
        (35000,  38000, '2023-10-24', '2023-11-15', 0.004, False),
        (38000,  44000, '2023-11-15', '2023-12-31', 0.004, False),
        # 2024 ETF牛市
        (44000,  49000, '2024-01-01', '2024-01-11', 0.005, False),  # ETF批准
        (49000,  52000, '2024-01-11', '2024-02-07', 0.004, False),
        (52000,  69000, '2024-02-07', '2024-03-14', 0.006, False),  # 新ATH
        (69000,  73800, '2024-03-14', '2024-03-15', 0.008, False),  # ATH
        (73800,  57000, '2024-03-15', '2024-05-01', 0.005, True),   # 回调
        (57000,  60000, '2024-05-01', '2024-07-15', 0.003, True),   # 盒子
        (60000,  72000, '2024-07-15', '2024-10-29', 0.004, False),  # 突破
        (72000,  100000, '2024-10-29', '2024-12-05', 0.006, False), # 大选行情
        (100000, 92000, '2024-12-05', '2024-12-31', 0.005, False),
        # 2025
        (92000,  106000, '2025-01-01', '2025-01-20', 0.005, False),
        (106000, 78000,  '2025-01-20', '2025-02-28', 0.006, False), # 回调
        (78000,  84000,  '2025-02-28', '2025-03-31', 0.004, True),  # 盒子
    ]

    all_prices = []
    all_volumes = []
    all_timestamps = []

    for seg in segments:
        s_price, e_price, s_date, e_date, vol, has_box = seg

        s_ts = pd.Timestamp(s_date, tz='UTC')
        e_ts = pd.Timestamp(e_date, tz='UTC')
        n_hours = int((e_ts - s_ts).total_seconds() / 3600)

        if n_hours <= 0:
            continue

        # 生成价格
        box_zones = [(0.2, 0.5, n_hours // 3)] if has_box else None
        prices = _generate_phase(s_price, e_price, n_hours, vol, box_zones)

        # 生成成交量（突破点放量）
        spike_pos = [0.5] if has_box else [0.3, 0.7]
        volumes = _add_volume_pattern(n_hours, 300 + abs(e_price - s_price) * 0.01,
                                       spike_pos)

        timestamps = pd.date_range(s_date, periods=n_hours, freq='1h', tz='UTC')

        all_prices.extend(prices)
        all_volumes.extend(volumes)
        all_timestamps.extend(timestamps)

    prices = np.array(all_prices)
    volumes = np.array(all_volumes)
    n = len(prices)

    np.random.seed(12345)
    df = pd.DataFrame({
        'timestamp': all_timestamps[:n],
        'open': prices * (1 + np.random.normal(0, 0.001, n)),
        'high': prices * (1 + np.abs(np.random.normal(0.003, 0.002, n))),
        'low':  prices * (1 - np.abs(np.random.normal(0.003, 0.002, n))),
        'close': prices,
        'volume': volumes,
    })

    # 去重保留最后一个
    df = df.drop_duplicates(subset='timestamp', keep='last').sort_values('timestamp').reset_index(drop=True)

    return df


def generate_eth_history(btc_df: pd.DataFrame) -> pd.DataFrame:
    """
    基于BTC走势生成ETH走势
    ETH和BTC高度相关，但波动更大（beta ≈ 1.3-1.5）
    """
    np.random.seed(99999)

    btc_prices = btc_df['close'].values
    n = len(btc_prices)

    # ETH/BTC ratio 历史走势
    # 2017: ~0.05  2018: 0.03  2019: 0.02  2020: 0.03  2021: 0.04-0.08
    # 2022: 0.06  2023: 0.06  2024: 0.04-0.05
    ratio_anchors = {
        0.00: 0.075,   # 2017 Aug: ETH~$300 / BTC~$4000
        0.10: 0.060,
        0.20: 0.035,   # 2018 bear
        0.30: 0.020,   # 2019 low ratio
        0.40: 0.030,
        0.50: 0.040,   # 2020 DeFi summer
        0.60: 0.070,   # 2021 bull
        0.70: 0.080,   # 2021 peak
        0.80: 0.065,   # 2022-2023
        0.90: 0.045,   # 2024
        1.00: 0.035,   # 2025
    }

    fracs = np.array(sorted(ratio_anchors.keys()))
    ratios = np.array([ratio_anchors[f] for f in fracs])

    x = np.linspace(0, 1, n)
    interp_ratio = np.interp(x, fracs, ratios)

    # Add noise to ratio
    ratio_noise = np.random.normal(0, 0.002, n)
    ratio_smooth = interp_ratio + np.cumsum(ratio_noise) * 0.0001

    eth_prices = btc_prices * ratio_smooth

    # Make ETH more volatile (higher beta)
    btc_returns = np.diff(np.log(btc_prices))
    eth_excess = btc_returns * np.random.uniform(0.2, 0.6, len(btc_returns))
    eth_prices_adj = np.zeros(n)
    eth_prices_adj[0] = eth_prices[0]
    for i in range(1, n):
        base_return = np.log(eth_prices[i] / eth_prices[i-1])
        eth_prices_adj[i] = eth_prices_adj[i-1] * np.exp(base_return + np.random.normal(0, 0.002))

    volumes = _add_volume_pattern(n, 500, [0.3, 0.5, 0.7])

    np.random.seed(54321)
    df = pd.DataFrame({
        'timestamp': btc_df['timestamp'].values[:n],
        'open': eth_prices_adj * (1 + np.random.normal(0, 0.001, n)),
        'high': eth_prices_adj * (1 + np.abs(np.random.normal(0.004, 0.002, n))),
        'low':  eth_prices_adj * (1 - np.abs(np.random.normal(0.004, 0.002, n))),
        'close': eth_prices_adj,
        'volume': volumes,
    })

    return df


def generate_altcoin_history(btc_df: pd.DataFrame, symbol: str,
                              data_start: str) -> pd.DataFrame:
    """
    基于BTC走势生成山寨币走势
    山寨 = BTC × 随机beta(1.5-2.5) + 额外噪声
    """
    seed = abs(hash(symbol)) % 2**31
    np.random.seed(seed)

    start_ts = pd.Timestamp(data_start, tz='UTC')
    mask = btc_df['timestamp'] >= start_ts
    btc_sub = btc_df[mask].reset_index(drop=True)
    n = len(btc_sub)

    if n < 100:
        return pd.DataFrame()

    btc_prices = btc_sub['close'].values

    # 山寨相对BTC的价格比从一个随机起点开始
    base_price = np.random.uniform(0.5, 500)
    ratio = base_price / btc_prices[0]

    # Beta = 1.5~2.5
    beta = np.random.uniform(1.3, 2.0)

    alt_prices = np.zeros(n)
    alt_prices[0] = base_price

    btc_returns = np.diff(np.log(btc_prices))

    for i in range(1, n):
        btc_ret = btc_returns[i-1] if i-1 < len(btc_returns) else 0
        alt_ret = btc_ret * beta + np.random.normal(0, 0.004)
        alt_prices[i] = alt_prices[i-1] * np.exp(alt_ret)

    volumes = _add_volume_pattern(n, 300, [0.3, 0.6])

    np.random.seed(seed + 1)
    df = pd.DataFrame({
        'timestamp': btc_sub['timestamp'].values,
        'open': alt_prices * (1 + np.random.normal(0, 0.001, n)),
        'high': alt_prices * (1 + np.abs(np.random.normal(0.005, 0.003, n))),
        'low':  alt_prices * (1 - np.abs(np.random.normal(0.005, 0.003, n))),
        'close': alt_prices,
        'volume': volumes,
    })

    return df


def generate_all_test_data() -> dict:
    """
    生成所有12币种的模拟历史数据

    Returns:
        {symbol: df_1h}
    """
    from langlang_ai.config.coins import COINS

    print("生成模拟历史数据...")
    btc_df = generate_btc_history()
    print(f"  BTC: {len(btc_df)} bars, "
          f"{btc_df['timestamp'].min().date()} ~ {btc_df['timestamp'].max().date()}, "
          f"${btc_df['close'].iloc[0]:.0f} → ${btc_df['close'].iloc[-1]:.0f}")

    eth_df = generate_eth_history(btc_df)
    print(f"  ETH: {len(eth_df)} bars, "
          f"${eth_df['close'].iloc[0]:.0f} → ${eth_df['close'].iloc[-1]:.0f}")

    all_data = {'BTCUSDT': btc_df, 'ETHUSDT': eth_df}

    # 山寨币
    for symbol, cfg in COINS.items():
        if symbol in ('BTCUSDT', 'ETHUSDT'):
            continue
        alt_df = generate_altcoin_history(btc_df, symbol, cfg.data_start)
        if len(alt_df) > 0:
            all_data[symbol] = alt_df
            print(f"  {symbol}: {len(alt_df)} bars, "
                  f"${alt_df['close'].iloc[0]:.2f} → ${alt_df['close'].iloc[-1]:.2f}")

    return all_data
