From d1ff162de85bd1352d8c9b047e1b6e9f2f68641f Mon Sep 17 00:00:00 2001 From: Luke Hubmayer-Werner Date: Fri, 4 Feb 2022 17:31:40 +1030 Subject: [PATCH] Do most processing as bits instead of strings --- Cargo.toml | 3 +- src/main.rs | 178 ++++++++++++++++++++++++++++++++-------------------- 2 files changed, 112 insertions(+), 69 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index ef293f4..8b15a2a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,4 +7,5 @@ edition = "2021" [dependencies] regex = "1" -rayon = "1.5" \ No newline at end of file +rayon = "1.5" +bitintr = "0.3.0" diff --git a/src/main.rs b/src/main.rs index ab1493c..7260208 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,14 +1,43 @@ use std::fs; -use std::collections::{HashMap, HashSet}; +use std::collections::HashMap; +use bitintr::{Lzcnt, Tzcnt}; use regex::Regex; use rayon::prelude::*; -fn load_dictionary(filename: &str) -> Vec { +const WORD_LENGTH: usize = 5; +type Charmask = i32; + +#[derive(Copy, Clone, Default)] +struct SimState { + banned_chars: [Charmask; WORD_LENGTH], // Alphabetical bitmask + required_chars: Charmask +} + +#[derive(Copy, Clone, Default)] +struct Word { + letters: [char; WORD_LENGTH], + charmask: Charmask // All of the characters contained +} + +type WordCache = HashMap>; + +fn str2word(s: &str) -> Word { + let mut word = Word::default(); + let mut iter = s.chars(); + for i in 0..WORD_LENGTH { + let c = iter.next().unwrap(); + word.letters[i] = c; + word.charmask |= char2bit(c); + } + word +} + +fn load_dictionary(filename: &str) -> Vec { println!("Loading dictionary at {}", filename); let rawfile = fs::read_to_string(filename).unwrap(); let rawwords = rawfile.split('\n'); let mut words = Vec::new(); - let re = Regex::new(r"^\w{5}$").unwrap(); + let re = Regex::new(&format!("{}{}{}", r"^\w{", WORD_LENGTH, r"}$")).unwrap(); for line in rawwords { if re.is_match(line) { words.push(line.to_uppercase()); @@ -16,41 +45,14 @@ fn load_dictionary(filename: &str) -> Vec { } words.sort(); words.dedup(); - words + words.iter().map(|w| str2word(w)).collect() } -fn inc_char(c: char) -> char { +/* fn inc_char(c: char) -> char { (c as u8 + 1) as char -} +} */ -fn _generate_wordcache_nested(cache: &mut HashMap>, subcache: &[String], key: &str, depth: u8) { - for c in inc_char(key.chars().last().unwrap())..='Z' { - let sc2: Vec = subcache.iter().filter(|w| w.contains(c)).cloned().collect(); - if !sc2.is_empty() { - let key2 = format!("{}{}", key, c); - if depth > 0 { - _generate_wordcache_nested(cache, &sc2, &key2, depth-1); - } - cache.insert(key2, sc2); - } - } -} - -fn generate_wordcache(words: Vec) -> HashMap> { - let mut cache = HashMap::new(); - for c1 in 'A'..='Z' { - let sc: Vec = words.iter().filter(|w| w.contains(c1)).cloned().collect(); - if !sc.is_empty() { - let key = format!("{}", c1); - _generate_wordcache_nested(&mut cache, &sc, &key, 4); - cache.insert(key, sc); - } - } - cache.insert("".to_string(), words); - cache -} - -fn hs2str(hs: &HashSet) -> String { +/* fn hs2str(hs: &HashSet) -> String { let mut chars: Vec = hs.iter().cloned().collect(); if chars.is_empty() { "".to_string() @@ -58,52 +60,90 @@ fn hs2str(hs: &HashSet) -> String { chars.sort_unstable(); chars.iter().collect() } +} */ + + +fn char2bit(c: char) -> Charmask { + debug_assert!('A' <= c && c <= 'Z'); + 1 << (c as u8 - 'A' as u8) } -fn simulate(guess: &str, solution: &str, wordcache: &HashMap>) -> Vec { - //let b_guess = guess.as_bytes(); - //let b_solution = solution.as_bytes(); - let mut matching_chars = ['.', '.', '.', '.', '.']; - let mut banned_chars = [HashSet::new(), HashSet::new(), HashSet::new(), HashSet::new(), HashSet::new()]; - let mut required_chars = HashSet::new(); - for (i, (g, s)) in guess.chars().zip(solution.chars()).enumerate() { - if g == s { // Right letter right position - matching_chars[i] = g; - required_chars.insert(g); - } else if solution.contains(g) { // Right letter wrong position - banned_chars[i].insert(g); - required_chars.insert(g); +fn cm2char(cm: Charmask, offset: i8) -> char { + (((31 - cm.lzcnt() as i8) + 'A' as i8 + offset) as u8) as char +} + +fn _generate_wordcache_nested(cache: &mut WordCache, subcache: &[Word], key: Charmask, depth: u8) { + for c in cm2char(key, 1)..='Z' { + let cb = char2bit(c); + let sc2: Vec = subcache.iter().filter(|w| (w.charmask & cb) == cb).cloned().collect(); + if !sc2.is_empty() { + let key2 = key | cb; + if depth > 0 { + _generate_wordcache_nested(cache, &sc2, key2, depth-1); + } + cache.insert(key2, sc2); + } + } +} + +fn generate_wordcache(words: Vec) -> WordCache { + let mut cache: WordCache = HashMap::new(); + let subcache: Vec = words.iter().cloned().collect(); + _generate_wordcache_nested(&mut cache, &subcache, 0, 5); + cache.insert(0, words); + cache +} + +fn filter_word(w: &Word, banned_chars: &[Charmask; 5], required_chars: Charmask) -> bool { + if w.charmask & required_chars != required_chars { + return false; + } + for (c, bans) in w.letters.iter().zip(banned_chars.iter()) { + if char2bit(*c) & bans != 0 { + return false; + } + } + true +} + +fn simulate(guess: &Word, solution: &Word, mut s: SimState, wordcache: &WordCache) -> (Vec, SimState) { + s.required_chars |= guess.charmask & solution.charmask; + for (i, (gc, sc)) in guess.letters.iter().zip(solution.letters.iter()).enumerate() { + let gb = char2bit(*gc); + if gc == sc { // Right letter right position + s.banned_chars[i] = 255 ^ gb; + } else if solution.charmask & gb != 0 { // Right letter wrong position + s.banned_chars[i] |= gb; } else { // Letter not in solution - for j in 0..banned_chars.len() { - banned_chars[j].insert(g); + for j in 0..s.banned_chars.len() { + s.banned_chars[j] |= gb; } } } - let mut re_str = String::new(); - for (m, b) in matching_chars.iter().zip(banned_chars.iter()) { - if *m != '.' { - re_str.push(*m); - } else { - re_str += &format!("[^{}]", hs2str(b)); - } - } - let re = Regex::new(&re_str).unwrap(); - let cachekey = hs2str(&required_chars); + let cachekey = s.required_chars; match wordcache.contains_key(&cachekey) { - true => wordcache[&cachekey].iter().filter(|w| re.is_match(w)).cloned().collect(), - false => Vec::::new(), + true => ( + wordcache[&cachekey].iter().filter(|w| filter_word(w, &s.banned_chars, s.required_chars)).cloned().collect(), + s + ), + false => ( + Vec::::new(), + s + ), } } -fn find_worstcase(word: &str, wordcache: &HashMap>) -> String { +fn find_worstcase(word: &Word, wordcache: &WordCache) -> (String, usize) { let mut worst = 0; - for target in &wordcache[""] { - let remaining = simulate(&word, target, &wordcache).len(); + let ss = SimState::default(); + for target in &wordcache[&0] { + let remaining = simulate(word, target, ss, &wordcache).0.len(); if remaining > worst {worst = remaining}; } - let output = format!("{} - {}", word, worst); + let wordstr: String = word.letters.iter().collect(); + let output = format!("{} - {}", wordstr, worst); println!("{}", output); - output + (output, worst) } fn main() { @@ -113,7 +153,9 @@ fn main() { let wordcache = generate_wordcache(words); //let sr = simulate(&wordcache[""][0], &wordcache[""][5000], &wordcache); //println!("{:?}", sr); - let results: Vec = wordcache[""].par_iter().map(|w| find_worstcase(w, &wordcache)).collect(); - fs::write("results.txt", results.join("\n")).expect("Failed to write output"); + let mut results: Vec<(String, usize)> = wordcache[&0].par_iter().map(|w| find_worstcase(w, &wordcache)).collect(); + results.sort_by_key(|r| r.1); + let results_strs: Vec = results.iter().map(|r| r.0.clone()).collect(); + fs::write("results.txt", results_strs.join("\n")).expect("Failed to write output"); //println!("{:?}", wordcache.keys()); }