Do most processing as bits instead of strings

This commit is contained in:
Luke Hubmayer-Werner 2022-02-04 17:31:40 +10:30
parent 3ddc3204bb
commit d1ff162de8
2 changed files with 112 additions and 69 deletions

View File

@ -7,4 +7,5 @@ edition = "2021"
[dependencies]
regex = "1"
rayon = "1.5"
rayon = "1.5"
bitintr = "0.3.0"

View File

@ -1,14 +1,43 @@
use std::fs;
use std::collections::{HashMap, HashSet};
use std::collections::HashMap;
use bitintr::{Lzcnt, Tzcnt};
use regex::Regex;
use rayon::prelude::*;
fn load_dictionary(filename: &str) -> Vec<String> {
const WORD_LENGTH: usize = 5;
type Charmask = i32;
#[derive(Copy, Clone, Default)]
struct SimState {
banned_chars: [Charmask; WORD_LENGTH], // Alphabetical bitmask
required_chars: Charmask
}
#[derive(Copy, Clone, Default)]
struct Word {
letters: [char; WORD_LENGTH],
charmask: Charmask // All of the characters contained
}
type WordCache = HashMap<Charmask, Vec<Word>>;
fn str2word(s: &str) -> Word {
let mut word = Word::default();
let mut iter = s.chars();
for i in 0..WORD_LENGTH {
let c = iter.next().unwrap();
word.letters[i] = c;
word.charmask |= char2bit(c);
}
word
}
fn load_dictionary(filename: &str) -> Vec<Word> {
println!("Loading dictionary at {}", filename);
let rawfile = fs::read_to_string(filename).unwrap();
let rawwords = rawfile.split('\n');
let mut words = Vec::new();
let re = Regex::new(r"^\w{5}$").unwrap();
let re = Regex::new(&format!("{}{}{}", r"^\w{", WORD_LENGTH, r"}$")).unwrap();
for line in rawwords {
if re.is_match(line) {
words.push(line.to_uppercase());
@ -16,41 +45,14 @@ fn load_dictionary(filename: &str) -> Vec<String> {
}
words.sort();
words.dedup();
words
words.iter().map(|w| str2word(w)).collect()
}
fn inc_char(c: char) -> char {
/* fn inc_char(c: char) -> char {
(c as u8 + 1) as char
}
} */
fn _generate_wordcache_nested(cache: &mut HashMap<String, Vec<String>>, subcache: &[String], key: &str, depth: u8) {
for c in inc_char(key.chars().last().unwrap())..='Z' {
let sc2: Vec<String> = subcache.iter().filter(|w| w.contains(c)).cloned().collect();
if !sc2.is_empty() {
let key2 = format!("{}{}", key, c);
if depth > 0 {
_generate_wordcache_nested(cache, &sc2, &key2, depth-1);
}
cache.insert(key2, sc2);
}
}
}
fn generate_wordcache(words: Vec<String>) -> HashMap<String, Vec<String>> {
let mut cache = HashMap::new();
for c1 in 'A'..='Z' {
let sc: Vec<String> = words.iter().filter(|w| w.contains(c1)).cloned().collect();
if !sc.is_empty() {
let key = format!("{}", c1);
_generate_wordcache_nested(&mut cache, &sc, &key, 4);
cache.insert(key, sc);
}
}
cache.insert("".to_string(), words);
cache
}
fn hs2str(hs: &HashSet<char>) -> String {
/* fn hs2str(hs: &HashSet<char>) -> String {
let mut chars: Vec<char> = hs.iter().cloned().collect();
if chars.is_empty() {
"".to_string()
@ -58,52 +60,90 @@ fn hs2str(hs: &HashSet<char>) -> String {
chars.sort_unstable();
chars.iter().collect()
}
} */
fn char2bit(c: char) -> Charmask {
debug_assert!('A' <= c && c <= 'Z');
1 << (c as u8 - 'A' as u8)
}
fn simulate(guess: &str, solution: &str, wordcache: &HashMap<String, Vec<String>>) -> Vec<String> {
//let b_guess = guess.as_bytes();
//let b_solution = solution.as_bytes();
let mut matching_chars = ['.', '.', '.', '.', '.'];
let mut banned_chars = [HashSet::new(), HashSet::new(), HashSet::new(), HashSet::new(), HashSet::new()];
let mut required_chars = HashSet::new();
for (i, (g, s)) in guess.chars().zip(solution.chars()).enumerate() {
if g == s { // Right letter right position
matching_chars[i] = g;
required_chars.insert(g);
} else if solution.contains(g) { // Right letter wrong position
banned_chars[i].insert(g);
required_chars.insert(g);
fn cm2char(cm: Charmask, offset: i8) -> char {
(((31 - cm.lzcnt() as i8) + 'A' as i8 + offset) as u8) as char
}
fn _generate_wordcache_nested(cache: &mut WordCache, subcache: &[Word], key: Charmask, depth: u8) {
for c in cm2char(key, 1)..='Z' {
let cb = char2bit(c);
let sc2: Vec<Word> = subcache.iter().filter(|w| (w.charmask & cb) == cb).cloned().collect();
if !sc2.is_empty() {
let key2 = key | cb;
if depth > 0 {
_generate_wordcache_nested(cache, &sc2, key2, depth-1);
}
cache.insert(key2, sc2);
}
}
}
fn generate_wordcache(words: Vec<Word>) -> WordCache {
let mut cache: WordCache = HashMap::new();
let subcache: Vec<Word> = words.iter().cloned().collect();
_generate_wordcache_nested(&mut cache, &subcache, 0, 5);
cache.insert(0, words);
cache
}
fn filter_word(w: &Word, banned_chars: &[Charmask; 5], required_chars: Charmask) -> bool {
if w.charmask & required_chars != required_chars {
return false;
}
for (c, bans) in w.letters.iter().zip(banned_chars.iter()) {
if char2bit(*c) & bans != 0 {
return false;
}
}
true
}
fn simulate(guess: &Word, solution: &Word, mut s: SimState, wordcache: &WordCache) -> (Vec<Word>, SimState) {
s.required_chars |= guess.charmask & solution.charmask;
for (i, (gc, sc)) in guess.letters.iter().zip(solution.letters.iter()).enumerate() {
let gb = char2bit(*gc);
if gc == sc { // Right letter right position
s.banned_chars[i] = 255 ^ gb;
} else if solution.charmask & gb != 0 { // Right letter wrong position
s.banned_chars[i] |= gb;
} else { // Letter not in solution
for j in 0..banned_chars.len() {
banned_chars[j].insert(g);
for j in 0..s.banned_chars.len() {
s.banned_chars[j] |= gb;
}
}
}
let mut re_str = String::new();
for (m, b) in matching_chars.iter().zip(banned_chars.iter()) {
if *m != '.' {
re_str.push(*m);
} else {
re_str += &format!("[^{}]", hs2str(b));
}
}
let re = Regex::new(&re_str).unwrap();
let cachekey = hs2str(&required_chars);
let cachekey = s.required_chars;
match wordcache.contains_key(&cachekey) {
true => wordcache[&cachekey].iter().filter(|w| re.is_match(w)).cloned().collect(),
false => Vec::<String>::new(),
true => (
wordcache[&cachekey].iter().filter(|w| filter_word(w, &s.banned_chars, s.required_chars)).cloned().collect(),
s
),
false => (
Vec::<Word>::new(),
s
),
}
}
fn find_worstcase(word: &str, wordcache: &HashMap<String, Vec<String>>) -> String {
fn find_worstcase(word: &Word, wordcache: &WordCache) -> (String, usize) {
let mut worst = 0;
for target in &wordcache[""] {
let remaining = simulate(&word, target, &wordcache).len();
let ss = SimState::default();
for target in &wordcache[&0] {
let remaining = simulate(word, target, ss, &wordcache).0.len();
if remaining > worst {worst = remaining};
}
let output = format!("{} - {}", word, worst);
let wordstr: String = word.letters.iter().collect();
let output = format!("{} - {}", wordstr, worst);
println!("{}", output);
output
(output, worst)
}
fn main() {
@ -113,7 +153,9 @@ fn main() {
let wordcache = generate_wordcache(words);
//let sr = simulate(&wordcache[""][0], &wordcache[""][5000], &wordcache);
//println!("{:?}", sr);
let results: Vec<String> = wordcache[""].par_iter().map(|w| find_worstcase(w, &wordcache)).collect();
fs::write("results.txt", results.join("\n")).expect("Failed to write output");
let mut results: Vec<(String, usize)> = wordcache[&0].par_iter().map(|w| find_worstcase(w, &wordcache)).collect();
results.sort_by_key(|r| r.1);
let results_strs: Vec<String> = results.iter().map(|r| r.0.clone()).collect();
fs::write("results.txt", results_strs.join("\n")).expect("Failed to write output");
//println!("{:?}", wordcache.keys());
}