Do most processing as bits instead of strings

This commit is contained in:
Luke Hubmayer-Werner 2022-02-04 17:31:40 +10:30
parent 3ddc3204bb
commit d1ff162de8
2 changed files with 112 additions and 69 deletions

View File

@ -7,4 +7,5 @@ edition = "2021"
[dependencies] [dependencies]
regex = "1" regex = "1"
rayon = "1.5" rayon = "1.5"
bitintr = "0.3.0"

View File

@ -1,14 +1,43 @@
use std::fs; use std::fs;
use std::collections::{HashMap, HashSet}; use std::collections::HashMap;
use bitintr::{Lzcnt, Tzcnt};
use regex::Regex; use regex::Regex;
use rayon::prelude::*; use rayon::prelude::*;
fn load_dictionary(filename: &str) -> Vec<String> { const WORD_LENGTH: usize = 5;
type Charmask = i32;
#[derive(Copy, Clone, Default)]
struct SimState {
banned_chars: [Charmask; WORD_LENGTH], // Alphabetical bitmask
required_chars: Charmask
}
#[derive(Copy, Clone, Default)]
struct Word {
letters: [char; WORD_LENGTH],
charmask: Charmask // All of the characters contained
}
type WordCache = HashMap<Charmask, Vec<Word>>;
fn str2word(s: &str) -> Word {
let mut word = Word::default();
let mut iter = s.chars();
for i in 0..WORD_LENGTH {
let c = iter.next().unwrap();
word.letters[i] = c;
word.charmask |= char2bit(c);
}
word
}
fn load_dictionary(filename: &str) -> Vec<Word> {
println!("Loading dictionary at {}", filename); println!("Loading dictionary at {}", filename);
let rawfile = fs::read_to_string(filename).unwrap(); let rawfile = fs::read_to_string(filename).unwrap();
let rawwords = rawfile.split('\n'); let rawwords = rawfile.split('\n');
let mut words = Vec::new(); let mut words = Vec::new();
let re = Regex::new(r"^\w{5}$").unwrap(); let re = Regex::new(&format!("{}{}{}", r"^\w{", WORD_LENGTH, r"}$")).unwrap();
for line in rawwords { for line in rawwords {
if re.is_match(line) { if re.is_match(line) {
words.push(line.to_uppercase()); words.push(line.to_uppercase());
@ -16,41 +45,14 @@ fn load_dictionary(filename: &str) -> Vec<String> {
} }
words.sort(); words.sort();
words.dedup(); words.dedup();
words words.iter().map(|w| str2word(w)).collect()
} }
fn inc_char(c: char) -> char { /* fn inc_char(c: char) -> char {
(c as u8 + 1) as char (c as u8 + 1) as char
} } */
fn _generate_wordcache_nested(cache: &mut HashMap<String, Vec<String>>, subcache: &[String], key: &str, depth: u8) { /* fn hs2str(hs: &HashSet<char>) -> String {
for c in inc_char(key.chars().last().unwrap())..='Z' {
let sc2: Vec<String> = subcache.iter().filter(|w| w.contains(c)).cloned().collect();
if !sc2.is_empty() {
let key2 = format!("{}{}", key, c);
if depth > 0 {
_generate_wordcache_nested(cache, &sc2, &key2, depth-1);
}
cache.insert(key2, sc2);
}
}
}
fn generate_wordcache(words: Vec<String>) -> HashMap<String, Vec<String>> {
let mut cache = HashMap::new();
for c1 in 'A'..='Z' {
let sc: Vec<String> = words.iter().filter(|w| w.contains(c1)).cloned().collect();
if !sc.is_empty() {
let key = format!("{}", c1);
_generate_wordcache_nested(&mut cache, &sc, &key, 4);
cache.insert(key, sc);
}
}
cache.insert("".to_string(), words);
cache
}
fn hs2str(hs: &HashSet<char>) -> String {
let mut chars: Vec<char> = hs.iter().cloned().collect(); let mut chars: Vec<char> = hs.iter().cloned().collect();
if chars.is_empty() { if chars.is_empty() {
"".to_string() "".to_string()
@ -58,52 +60,90 @@ fn hs2str(hs: &HashSet<char>) -> String {
chars.sort_unstable(); chars.sort_unstable();
chars.iter().collect() chars.iter().collect()
} }
} */
fn char2bit(c: char) -> Charmask {
debug_assert!('A' <= c && c <= 'Z');
1 << (c as u8 - 'A' as u8)
} }
fn simulate(guess: &str, solution: &str, wordcache: &HashMap<String, Vec<String>>) -> Vec<String> { fn cm2char(cm: Charmask, offset: i8) -> char {
//let b_guess = guess.as_bytes(); (((31 - cm.lzcnt() as i8) + 'A' as i8 + offset) as u8) as char
//let b_solution = solution.as_bytes(); }
let mut matching_chars = ['.', '.', '.', '.', '.'];
let mut banned_chars = [HashSet::new(), HashSet::new(), HashSet::new(), HashSet::new(), HashSet::new()]; fn _generate_wordcache_nested(cache: &mut WordCache, subcache: &[Word], key: Charmask, depth: u8) {
let mut required_chars = HashSet::new(); for c in cm2char(key, 1)..='Z' {
for (i, (g, s)) in guess.chars().zip(solution.chars()).enumerate() { let cb = char2bit(c);
if g == s { // Right letter right position let sc2: Vec<Word> = subcache.iter().filter(|w| (w.charmask & cb) == cb).cloned().collect();
matching_chars[i] = g; if !sc2.is_empty() {
required_chars.insert(g); let key2 = key | cb;
} else if solution.contains(g) { // Right letter wrong position if depth > 0 {
banned_chars[i].insert(g); _generate_wordcache_nested(cache, &sc2, key2, depth-1);
required_chars.insert(g); }
cache.insert(key2, sc2);
}
}
}
fn generate_wordcache(words: Vec<Word>) -> WordCache {
let mut cache: WordCache = HashMap::new();
let subcache: Vec<Word> = words.iter().cloned().collect();
_generate_wordcache_nested(&mut cache, &subcache, 0, 5);
cache.insert(0, words);
cache
}
fn filter_word(w: &Word, banned_chars: &[Charmask; 5], required_chars: Charmask) -> bool {
if w.charmask & required_chars != required_chars {
return false;
}
for (c, bans) in w.letters.iter().zip(banned_chars.iter()) {
if char2bit(*c) & bans != 0 {
return false;
}
}
true
}
fn simulate(guess: &Word, solution: &Word, mut s: SimState, wordcache: &WordCache) -> (Vec<Word>, SimState) {
s.required_chars |= guess.charmask & solution.charmask;
for (i, (gc, sc)) in guess.letters.iter().zip(solution.letters.iter()).enumerate() {
let gb = char2bit(*gc);
if gc == sc { // Right letter right position
s.banned_chars[i] = 255 ^ gb;
} else if solution.charmask & gb != 0 { // Right letter wrong position
s.banned_chars[i] |= gb;
} else { // Letter not in solution } else { // Letter not in solution
for j in 0..banned_chars.len() { for j in 0..s.banned_chars.len() {
banned_chars[j].insert(g); s.banned_chars[j] |= gb;
} }
} }
} }
let mut re_str = String::new(); let cachekey = s.required_chars;
for (m, b) in matching_chars.iter().zip(banned_chars.iter()) {
if *m != '.' {
re_str.push(*m);
} else {
re_str += &format!("[^{}]", hs2str(b));
}
}
let re = Regex::new(&re_str).unwrap();
let cachekey = hs2str(&required_chars);
match wordcache.contains_key(&cachekey) { match wordcache.contains_key(&cachekey) {
true => wordcache[&cachekey].iter().filter(|w| re.is_match(w)).cloned().collect(), true => (
false => Vec::<String>::new(), wordcache[&cachekey].iter().filter(|w| filter_word(w, &s.banned_chars, s.required_chars)).cloned().collect(),
s
),
false => (
Vec::<Word>::new(),
s
),
} }
} }
fn find_worstcase(word: &str, wordcache: &HashMap<String, Vec<String>>) -> String { fn find_worstcase(word: &Word, wordcache: &WordCache) -> (String, usize) {
let mut worst = 0; let mut worst = 0;
for target in &wordcache[""] { let ss = SimState::default();
let remaining = simulate(&word, target, &wordcache).len(); for target in &wordcache[&0] {
let remaining = simulate(word, target, ss, &wordcache).0.len();
if remaining > worst {worst = remaining}; if remaining > worst {worst = remaining};
} }
let output = format!("{} - {}", word, worst); let wordstr: String = word.letters.iter().collect();
let output = format!("{} - {}", wordstr, worst);
println!("{}", output); println!("{}", output);
output (output, worst)
} }
fn main() { fn main() {
@ -113,7 +153,9 @@ fn main() {
let wordcache = generate_wordcache(words); let wordcache = generate_wordcache(words);
//let sr = simulate(&wordcache[""][0], &wordcache[""][5000], &wordcache); //let sr = simulate(&wordcache[""][0], &wordcache[""][5000], &wordcache);
//println!("{:?}", sr); //println!("{:?}", sr);
let results: Vec<String> = wordcache[""].par_iter().map(|w| find_worstcase(w, &wordcache)).collect(); let mut results: Vec<(String, usize)> = wordcache[&0].par_iter().map(|w| find_worstcase(w, &wordcache)).collect();
fs::write("results.txt", results.join("\n")).expect("Failed to write output"); results.sort_by_key(|r| r.1);
let results_strs: Vec<String> = results.iter().map(|r| r.0.clone()).collect();
fs::write("results.txt", results_strs.join("\n")).expect("Failed to write output");
//println!("{:?}", wordcache.keys()); //println!("{:?}", wordcache.keys());
} }