CGGItemSets/src/ms_data_source.rs

use indexmap::IndexMap;
use log::{error, warn};
use regex::Regex;
use serde_derive::Deserialize;
use std::sync::LazyLock;

use crate::ChampInfo;
use crate::Champion;
use crate::data_source::{Data, DataSource, Stat};

pub struct MSDataSource {
    client: ureq::Agent,
}

#[derive(Deserialize)]
struct MSChampion {
    name: String,
    #[serde(rename = "search-terms")]
    search_terms: String,
}

// Compile regexes once for performance
static NUMBER_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"([0-9]+\.?[0-9]+)").unwrap());
static ITEM_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"/item/([0-9]+)\.").unwrap());

fn get_champ_from_name(champs: &Champion, name: &str) -> Option<u32> {
    champs.data.values().find_map(|champ| {
        if name == champ.name || name == champ.id {
            Some(champ.key)
        } else {
            None
        }
    })
}

fn find_next_number(rest: &str) -> f32 {
    if let Some(cap) = NUMBER_REGEX.captures(rest) {
        if let Some(matched) = cap.get(1) {
            return matched.as_str().parse::<f32>().unwrap_or(0.0);
        }
    }
    0.0
}

fn find_matching_div(html: &str, start_pos: usize) -> Option<(usize, usize)> {
    let mut open_divs = 0;
    let mut i = start_pos;
    let len = html.len();

    while i < len {
        if html[i..].starts_with("<div") {
            open_divs += 1;
            i += 4;
        } else if html[i..].starts_with("</div>") {
            open_divs -= 1;
            i += 6;
            if open_divs == 0 {
                return Some((start_pos, i));
            }
        } else {
            i += 1;
        }
    }
    None
}

fn extract_items_from_section(page: &str, section_title: &str) -> Vec<String> {
    if let Some(h3_pos) = page.find(section_title) {
        // Find the start of the div containing the h3
        let div_start = page[..h3_pos].rfind("<div").unwrap_or(0);
        if let Some((div_start, div_end)) = find_matching_div(page, div_start) {
            let div_html = &page[div_start..div_end];
            return ITEM_REGEX
                .captures_iter(div_html)
                .map(|cap| cap[1].to_owned())
                .collect();
        } else {
            warn!("Failed to find matching </div> for section '{section_title}'");
        }
    }
    vec![]
}

fn extract_skill_order_from_table(page: &str) -> String {
    // Find the table containing the skill order (look for Q.png as anchor)
    let table_start = page
        .find("Q.png")
        .and_then(|pos| page[..pos].rfind("<table"))
        .unwrap_or(0);
    let table_end = page[table_start..]
        .find("</table>")
        .map(|e| table_start + e + 8)
        .unwrap_or(page.len());
    let table_html = &page[table_start..table_end];

    // Extract rows
    let rows: Vec<&str> = table_html
        .match_indices("<tr")
        .map(|(i, _)| {
            let end = table_html[i..]
                .find("</tr>")
                .map(|e| i + e + 5)
                .unwrap_or(table_html.len());
            &table_html[i..end]
        })
        .collect();

    // Only process Q/W/E/R rows (skip header)
    let skills = ["Q", "W", "E", "R"];
    let mut order = [""; 18];
    for (i, row) in rows.iter().skip(1).take(4).enumerate() {
        let mut col = 0;
        let mut pos = 0;
        while let Some(td_start) = row[pos..].find("<td") {
            let td_start = pos + td_start;
            let td_end = row[td_start..]
                .find("</td>")
                .map(|e| td_start + e + 5)
                .unwrap_or(row.len());
            let td_html = &row[td_start..td_end];
            if td_html.contains(&format!(">{}<", skills[i])) && col < 18 {
                order[col] = skills[i];
            }
            col += 1;
            pos = td_end;
        }
    }
    order.join("")
}

impl MSDataSource {
    pub fn new(client: &ureq::Agent) -> Self {
        MSDataSource {
            client: client.clone(),
        }
    }
}

impl DataSource for MSDataSource {
    fn get_alias(&self) -> &str {
        "MS"
    }

    fn get_champs_with_positions(&self, champion: &Champion) -> IndexMap<u32, Vec<String>> {
        let mut champs = IndexMap::new();

        let champions: Vec<MSChampion> = match self
            .client
            .get("https://www.metasrc.com/lol/search/lol")
            .call()
            .and_then(|mut resp| resp.body_mut().read_json())
        {
            Ok(champs) => champs,
            Err(e) => {
                error!("Failed to fetch champions from MetaSRC: {e}");
                return champs;
            }
        };

        for champ in champions {
            if let Some(id) = get_champ_from_name(champion, &champ.name) {
                let allowed_roles = ["TOP", "ADC", "SUPPORT", "JUNGLE", "MID"];
                let roles = champ
                    .search_terms
                    .split('|')
                    .map(|s| s.to_uppercase())
                    .filter(|role| allowed_roles.contains(&role.as_str()))
                    .collect::<Vec<String>>();
                if let Some(first_role) = roles.first() {
                    champs.insert(id, vec![first_role.clone()]);
                }
            } else {
                warn!("Could not find champ '{}' in champion data", champ.name);
            }
        }

        champs
    }

    fn get_champ_data_with_win_pourcentage(
        &self,
        champ: &ChampInfo,
        positions: &[String],
    ) -> Vec<Data> {
        let mut builds = vec![];

        let rep = self
            .client
            .get(
                format!(
                    "https://www.metasrc.com/lol/build/{}/{}",
                    champ.id.to_lowercase(),
                    positions[0].to_lowercase()
                )
                .as_str(),
            )
            .call();
        if let Ok(mut p) = rep {
            let page = match p.body_mut().read_to_string() {
                Ok(s) => s,
                Err(e) => {
                    warn!("Failed to read page for champ {}: {}", champ.id, e);
                    return builds;
                }
            };

            // Extract patch, win rate, kda, games
            let patch = page
                .find("Patch ")
                .map(|p| find_next_number(&page[p..]).to_string())
                .unwrap_or_default();

            let win_rate = page
                .find("Win")
                .map(|p| find_next_number(&page[p..]))
                .unwrap_or(0.0);

            let kda = page
                .find("KDA:")
                .map(|p| find_next_number(&page[p..]))
                .unwrap_or(0.0);

            let games = page
                .find("Games:")
                .map(|p| find_next_number(&page[p..]) as u32)
                .unwrap_or(0);

            let items = extract_items_from_section(&page, "Item Purchase Order");
            let starting_items = extract_items_from_section(&page, "Starting Items");

            builds.push(Data {
                position: positions[0].to_owned(),
                items: vec![
                    self.make_item_set(
                        starting_items,
                        format!(
                            "Starting Items | skillOrder: {}",
                            extract_skill_order_from_table(&page)
                        ),
                    ),
                    self.make_item_set(items, "Item Purchase Order".to_owned()),
                ],
                stat: Stat {
                    win_rate,
                    games,
                    kda,
                    patch,
                },
            });
        } else {
            warn!(
                "Failed to fetch build page for champ {} at position {}",
                champ.id, positions[0]
            );
        }

        builds
    }
}