CGGItemSets/src/ms_data_source.rs
nyyu d4a4113571
Some checks failed
ci/woodpecker/push/test Pipeline failed
ci/woodpecker/push/linux Pipeline failed
ci/woodpecker/push/mingw Pipeline failed
refactor: enhance data handling in data sources
2025-06-29 09:30:10 +02:00

259 lines
7.7 KiB
Rust

use indexmap::IndexMap;
use log::{error, warn};
use regex::Regex;
use serde_derive::Deserialize;
use std::sync::LazyLock;
use crate::ChampInfo;
use crate::Champion;
use crate::data_source::{Data, DataSource, Stat};
pub struct MSDataSource {
client: ureq::Agent,
}
#[derive(Deserialize)]
struct MSChampion {
name: String,
#[serde(rename = "search-terms")]
search_terms: String,
}
// Compile regexes once for performance
static NUMBER_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"([0-9]+\.?[0-9]+)").unwrap());
static ITEM_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"/item/([0-9]+)\.").unwrap());
fn get_champ_from_name(champs: &Champion, name: &str) -> Option<u32> {
champs.data.values().find_map(|champ| {
if name == champ.name || name == champ.id {
Some(champ.key)
} else {
None
}
})
}
fn find_next_number(rest: &str) -> f32 {
if let Some(cap) = NUMBER_REGEX.captures(rest) {
if let Some(matched) = cap.get(1) {
return matched.as_str().parse::<f32>().unwrap_or(0.0);
}
}
0.0
}
fn find_matching_div(html: &str, start_pos: usize) -> Option<(usize, usize)> {
let mut open_divs = 0;
let mut i = start_pos;
let len = html.len();
while i < len {
if html[i..].starts_with("<div") {
open_divs += 1;
i += 4;
} else if html[i..].starts_with("</div>") {
open_divs -= 1;
i += 6;
if open_divs == 0 {
return Some((start_pos, i));
}
} else {
i += 1;
}
}
None
}
fn extract_items_from_section(page: &str, section_title: &str) -> Vec<String> {
if let Some(h3_pos) = page.find(section_title) {
// Find the start of the div containing the h3
let div_start = page[..h3_pos].rfind("<div").unwrap_or(0);
if let Some((div_start, div_end)) = find_matching_div(page, div_start) {
let div_html = &page[div_start..div_end];
return ITEM_REGEX
.captures_iter(div_html)
.map(|cap| cap[1].to_owned())
.collect();
} else {
warn!("Failed to find matching </div> for section '{section_title}'");
}
}
vec![]
}
fn extract_skill_order_from_table(page: &str) -> String {
// Find the table containing the skill order (look for Q.png as anchor)
let table_start = page
.find("Q.png")
.and_then(|pos| page[..pos].rfind("<table"))
.unwrap_or(0);
let table_end = page[table_start..]
.find("</table>")
.map(|e| table_start + e + 8)
.unwrap_or(page.len());
let table_html = &page[table_start..table_end];
// Extract rows
let rows: Vec<&str> = table_html
.match_indices("<tr")
.map(|(i, _)| {
let end = table_html[i..]
.find("</tr>")
.map(|e| i + e + 5)
.unwrap_or(table_html.len());
&table_html[i..end]
})
.collect();
// Only process Q/W/E/R rows (skip header)
let skills = ["Q", "W", "E", "R"];
let mut order = [""; 18];
for (i, row) in rows.iter().skip(1).take(4).enumerate() {
let mut col = 0;
let mut pos = 0;
while let Some(td_start) = row[pos..].find("<td") {
let td_start = pos + td_start;
let td_end = row[td_start..]
.find("</td>")
.map(|e| td_start + e + 5)
.unwrap_or(row.len());
let td_html = &row[td_start..td_end];
if td_html.contains(&format!(">{}<", skills[i])) && col < 18 {
order[col] = skills[i];
}
col += 1;
pos = td_end;
}
}
order.join("")
}
impl MSDataSource {
pub fn new(client: &ureq::Agent) -> Self {
MSDataSource {
client: client.clone(),
}
}
}
impl DataSource for MSDataSource {
fn get_alias(&self) -> &str {
"MS"
}
fn get_champs_with_positions(&self, champion: &Champion) -> IndexMap<u32, Vec<String>> {
let mut champs = IndexMap::new();
let champions: Vec<MSChampion> = match self
.client
.get("https://www.metasrc.com/lol/search/lol")
.call()
.and_then(|mut resp| resp.body_mut().read_json())
{
Ok(champs) => champs,
Err(e) => {
error!("Failed to fetch champions from MetaSRC: {e}");
return champs;
}
};
for champ in champions {
if let Some(id) = get_champ_from_name(champion, &champ.name) {
let allowed_roles = ["TOP", "ADC", "SUPPORT", "JUNGLE", "MID"];
let roles = champ
.search_terms
.split('|')
.map(|s| s.to_uppercase())
.filter(|role| allowed_roles.contains(&role.as_str()))
.collect::<Vec<String>>();
if let Some(first_role) = roles.first() {
champs.insert(id, vec![first_role.clone()]);
}
} else {
warn!("Could not find champ '{}' in champion data", champ.name);
}
}
champs
}
fn get_champ_data_with_win_pourcentage(
&self,
champ: &ChampInfo,
positions: &[String],
) -> Vec<Data> {
let mut builds = vec![];
let rep = self
.client
.get(
format!(
"https://www.metasrc.com/lol/build/{}/{}",
champ.id.to_lowercase(),
positions[0].to_lowercase()
)
.as_str(),
)
.call();
if let Ok(mut p) = rep {
let page = match p.body_mut().read_to_string() {
Ok(s) => s,
Err(e) => {
warn!("Failed to read page for champ {}: {}", champ.id, e);
return builds;
}
};
// Extract patch, win rate, kda, games
let patch = page
.find("Patch ")
.map(|p| find_next_number(&page[p..]).to_string())
.unwrap_or_default();
let win_rate = page
.find("Win")
.map(|p| find_next_number(&page[p..]))
.unwrap_or(0.0);
let kda = page
.find("KDA:")
.map(|p| find_next_number(&page[p..]))
.unwrap_or(0.0);
let games = page
.find("Games:")
.map(|p| find_next_number(&page[p..]) as u32)
.unwrap_or(0);
let items = extract_items_from_section(&page, "Item Purchase Order");
let starting_items = extract_items_from_section(&page, "Starting Items");
builds.push(Data {
position: positions[0].to_owned(),
items: vec![
self.make_item_set(
starting_items,
format!(
"Starting Items | skillOrder: {}",
extract_skill_order_from_table(&page)
),
),
self.make_item_set(items, "Item Purchase Order".to_owned()),
],
stat: Stat {
win_rate,
games,
kda,
patch,
},
});
} else {
warn!(
"Failed to fetch build page for champ {} at position {}",
champ.id, positions[0]
);
}
builds
}
}