Compare commits
1 Commits
Author | SHA1 | Date |
---|---|---|
Luke Hubmayer-Werner | c10007f080 |
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
175
src/main.rs
175
src/main.rs
|
@ -6,19 +6,18 @@ use regex::Regex;
|
||||||
use rayon::prelude::*;
|
use rayon::prelude::*;
|
||||||
use itertools::zip;
|
use itertools::zip;
|
||||||
use array_init::array_init;
|
use array_init::array_init;
|
||||||
|
use std::collections::BTreeMap;
|
||||||
|
|
||||||
pub type Charmask = i32;
|
pub type Charmask = i128;
|
||||||
pub type Achar = i8; // ASCII char
|
|
||||||
|
|
||||||
pub const WORD_LENGTH: usize = 5;
|
pub const WORD_LENGTH: usize = 4;
|
||||||
pub const WORD_LENGTH_P: usize = 5; // Padded for SIMD shenanigans
|
pub const WORD_LENGTH_P: usize = 4; // Padded for SIMD shenanigans
|
||||||
pub const GUESS_DEPTH: usize = 1; // TODO: Change this whenever working at different depths
|
pub const GUESS_DEPTH: usize = 1; // TODO: Change this whenever working at different depths
|
||||||
pub const N_SOLUTIONS: usize = 2315;
|
pub const N_LETTERS: u8 = 74;
|
||||||
|
// pub const n_solutions: usize = 2315;
|
||||||
pub const CACHE_SIZE: usize = 1<<26;
|
pub const CACHE_SIZE: usize = 1<<26;
|
||||||
pub const IDX_ALL_WORDS: Charmask = (CACHE_SIZE as Charmask) - 1;
|
pub const IDX_ALL_WORDS: Charmask = (CACHE_SIZE as Charmask) - 1;
|
||||||
pub const IDX_VALID_SOLUTIONS: Charmask = 0;
|
pub const IDX_VALID_SOLUTIONS: Charmask = 0;
|
||||||
pub const A: Achar = 'A' as Achar;
|
|
||||||
pub const Z: Achar = 'Z' as Achar;
|
|
||||||
|
|
||||||
pub const MAX_ENTRIES_PER_JOB: usize = 1000;
|
pub const MAX_ENTRIES_PER_JOB: usize = 1000;
|
||||||
|
|
||||||
|
@ -26,7 +25,6 @@ pub const MAX_ENTRIES_PER_JOB: usize = 1000;
|
||||||
pub struct Word {
|
pub struct Word {
|
||||||
charbits: [Charmask; WORD_LENGTH_P], // Each letter in bitmask form
|
charbits: [Charmask; WORD_LENGTH_P], // Each letter in bitmask form
|
||||||
charmask: Charmask, // All of the characters contained
|
charmask: Charmask, // All of the characters contained
|
||||||
//letters: [Achar; WORD_LENGTH]
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -56,8 +54,6 @@ use std::hash::BuildHasherDefault;
|
||||||
#[cfg(all(not(use_thin_array), not(use_hashmap), not(feature = "ahash"), feature = "xxhash_rust"))]
|
#[cfg(all(not(use_thin_array), not(use_hashmap), not(feature = "ahash"), feature = "xxhash_rust"))]
|
||||||
type WordCache = HashMap<Charmask, Vec<Word>, BuildHasherDefault<Xxh3>>;
|
type WordCache = HashMap<Charmask, Vec<Word>, BuildHasherDefault<Xxh3>>;
|
||||||
|
|
||||||
#[cfg(all(not(use_thin_array), not(use_hashmap), not(feature = "ahash"), not(feature = "xxhash_rust")))]
|
|
||||||
use std::collections::BTreeMap;
|
|
||||||
#[cfg(all(not(use_thin_array), not(use_hashmap), not(feature = "ahash"), not(feature = "xxhash_rust")))]
|
#[cfg(all(not(use_thin_array), not(use_hashmap), not(feature = "ahash"), not(feature = "xxhash_rust")))]
|
||||||
type WordCache = BTreeMap<Charmask, Vec<Word>>;
|
type WordCache = BTreeMap<Charmask, Vec<Word>>;
|
||||||
|
|
||||||
|
@ -66,20 +62,94 @@ fn default_wordcache() -> WordCache {
|
||||||
WordCache::default()
|
WordCache::default()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn char2bit(c: char) -> Charmask {
|
||||||
fn char2bit(c: Achar) -> Charmask {
|
match c { // By setting the most frequently-occurring kana to the highest bits, we can numerically assess what word combinations have more of them set
|
||||||
debug_assert!((A..=Z).contains(&c));
|
'ん' => 1<<73,
|
||||||
1 << (c - A)
|
'い' => 1<<72,
|
||||||
|
'う' => 1<<71,
|
||||||
|
'か' => 1<<70,
|
||||||
|
'る' => 1<<69,
|
||||||
|
'く' => 1<<68,
|
||||||
|
'つ' => 1<<67,
|
||||||
|
'こ' => 1<<66,
|
||||||
|
'し' => 1<<65,
|
||||||
|
'と' => 1<<64,
|
||||||
|
'た' => 1<<63,
|
||||||
|
'き' => 1<<62,
|
||||||
|
'す' => 1<<61,
|
||||||
|
'せ' => 1<<60,
|
||||||
|
'さ' => 1<<59,
|
||||||
|
'お' => 1<<58,
|
||||||
|
'ま' => 1<<57,
|
||||||
|
'な' => 1<<56,
|
||||||
|
'け' => 1<<55,
|
||||||
|
'ら' => 1<<54,
|
||||||
|
'て' => 1<<53,
|
||||||
|
'れ' => 1<<52,
|
||||||
|
'り' => 1<<51,
|
||||||
|
'あ' => 1<<50,
|
||||||
|
'が' => 1<<49,
|
||||||
|
'だ' => 1<<48,
|
||||||
|
'ち' => 1<<47,
|
||||||
|
'そ' => 1<<46,
|
||||||
|
'め' => 1<<45,
|
||||||
|
'え' => 1<<44,
|
||||||
|
'ど' => 1<<43,
|
||||||
|
'は' => 1<<42,
|
||||||
|
'じ' => 1<<41,
|
||||||
|
'も' => 1<<40,
|
||||||
|
'よ' => 1<<39,
|
||||||
|
'ー' => 1<<38,
|
||||||
|
'ろ' => 1<<37,
|
||||||
|
'の' => 1<<36,
|
||||||
|
'ぶ' => 1<<35,
|
||||||
|
'げ' => 1<<34,
|
||||||
|
'み' => 1<<33,
|
||||||
|
'や' => 1<<32,
|
||||||
|
'わ' => 1<<31,
|
||||||
|
'に' => 1<<30,
|
||||||
|
'ふ' => 1<<29,
|
||||||
|
'ほ' => 1<<28,
|
||||||
|
'ば' => 1<<27,
|
||||||
|
'ぼ' => 1<<26,
|
||||||
|
'ひ' => 1<<25,
|
||||||
|
'ざ' => 1<<24,
|
||||||
|
'ご' => 1<<23,
|
||||||
|
'ず' => 1<<22,
|
||||||
|
'ゆ' => 1<<21,
|
||||||
|
'ぞ' => 1<<20,
|
||||||
|
'む' => 1<<19,
|
||||||
|
'び' => 1<<18,
|
||||||
|
'で' => 1<<17,
|
||||||
|
'ぜ' => 1<<16,
|
||||||
|
'ね' => 1<<15,
|
||||||
|
'べ' => 1<<14,
|
||||||
|
'ぱ' => 1<<13,
|
||||||
|
'へ' => 1<<12,
|
||||||
|
'ぐ' => 1<<11,
|
||||||
|
'ぎ' => 1<<10,
|
||||||
|
'づ' => 1<<9,
|
||||||
|
'ぷ' => 1<<8,
|
||||||
|
'ぽ' => 1<<7,
|
||||||
|
'ぴ' => 1<<6,
|
||||||
|
'ぬ' => 1<<5,
|
||||||
|
'ぺ' => 1<<4,
|
||||||
|
'ぢ' => 1<<3,
|
||||||
|
'を' => 1<<2,
|
||||||
|
'ゔ' => 1<<1,
|
||||||
|
'〜' => 1<<0,
|
||||||
|
_ => 0
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
fn str2word(s: &str) -> Word {
|
fn str2word(s: &str) -> Word {
|
||||||
let mut word = Word::default();
|
let mut word = Word::default();
|
||||||
let mut iter = s.chars();
|
let mut iter = s.chars();
|
||||||
for i in 0..WORD_LENGTH {
|
for i in 0..WORD_LENGTH {
|
||||||
let c = iter.next().unwrap() as Achar;
|
let c = iter.next().unwrap();
|
||||||
let cb = char2bit(c);
|
let cb = char2bit(c);
|
||||||
word.charbits[i] = cb;
|
word.charbits[i] = cb;
|
||||||
//word.letters[i] = c;
|
|
||||||
word.charmask |= cb;
|
word.charmask |= cb;
|
||||||
}
|
}
|
||||||
word
|
word
|
||||||
|
@ -121,40 +191,40 @@ fn charbits2str(charbits: [Charmask; WORD_LENGTH]) -> String {
|
||||||
s
|
s
|
||||||
} */
|
} */
|
||||||
|
|
||||||
fn load_dictionary(filename: &str) -> Vec<String> {
|
fn load_dictionary(filename: &str) -> (Vec<String>, usize) {
|
||||||
println!("Loading dictionary at {}", filename);
|
println!("Loading dictionary at {}", filename);
|
||||||
let rawfile = fs::read_to_string(filename).unwrap();
|
let rawfile = fs::read_to_string(filename).unwrap();
|
||||||
let rawwords = rawfile.split('\n');
|
let rawwords = rawfile.split('\n');
|
||||||
let mut words = Vec::new();
|
let mut words = Vec::<String>::new();
|
||||||
let re = Regex::new(&format!("{}{}{}", r"^[A-Za-z]{", WORD_LENGTH, r"}$")).unwrap();
|
let mut n_solutions = 0;
|
||||||
for line in rawwords {
|
for line in rawwords {
|
||||||
if re.is_match(line) {
|
if line == "[Ta]" {
|
||||||
words.push(line.to_uppercase());
|
n_solutions = words.len();
|
||||||
|
} else if line.chars().count() == 4 {
|
||||||
|
words.push(line.to_string());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
//words.sort();
|
(words, n_solutions)
|
||||||
//words.dedup();
|
|
||||||
words
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn _generate_wordcache_nested(cache: &mut WordCache, subcache: &[Word], key: Charmask, next_c: Achar, depth: u8) {
|
fn _generate_wordcache_nested(cache: &mut WordCache, subcache: &[Word], key: Charmask, next_bit: u8, depth: u8) {
|
||||||
for c in next_c..=Z {
|
for b in next_bit..N_LETTERS {
|
||||||
let cb = char2bit(c);
|
let cb = 1<<b;
|
||||||
let sc2: Vec<Word> = subcache.iter().filter(|w| (w.charmask & cb) == cb).cloned().collect();
|
let sc2: Vec<Word> = subcache.iter().filter(|w| (w.charmask & cb) == cb).cloned().collect();
|
||||||
if !sc2.is_empty() {
|
if !sc2.is_empty() {
|
||||||
let key2 = key | cb;
|
let key2 = key | cb;
|
||||||
if depth > 0 {
|
if depth > 0 {
|
||||||
_generate_wordcache_nested(cache, &sc2, key2, c+1, depth-1);
|
_generate_wordcache_nested(cache, &sc2, key2, b+1, depth-1);
|
||||||
}
|
}
|
||||||
cache.insert(key2, sc2);
|
cache.insert(key2, sc2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn generate_wordcache(valid_words: Vec<Word>) -> WordCache {
|
fn generate_wordcache(valid_words: Vec<Word>, n_solutions: usize) -> WordCache {
|
||||||
let mut cache: WordCache = default_wordcache();
|
let mut cache: WordCache = default_wordcache();
|
||||||
let valid_solutions: Vec<Word> = valid_words[..N_SOLUTIONS].to_vec(); // Hacky way to separate the valid solutions from the larger guessing list
|
let valid_solutions: Vec<Word> = valid_words[..n_solutions].to_vec(); // Hacky way to separate the valid solutions from the larger guessing list
|
||||||
_generate_wordcache_nested(&mut cache, &valid_solutions, 0, A, 5);
|
_generate_wordcache_nested(&mut cache, &valid_solutions, 0, 0, 5);
|
||||||
cache.insert(IDX_VALID_SOLUTIONS, valid_solutions);
|
cache.insert(IDX_VALID_SOLUTIONS, valid_solutions);
|
||||||
cache.insert(IDX_ALL_WORDS, valid_words);
|
cache.insert(IDX_ALL_WORDS, valid_words);
|
||||||
cache
|
cache
|
||||||
|
@ -164,7 +234,7 @@ fn filter_word(w: &[Charmask; WORD_LENGTH_P], banned_chars: &[Charmask; WORD_LEN
|
||||||
zip(w, banned_chars).all(|(x,y)| x & y == 0)
|
zip(w, banned_chars).all(|(x,y)| x & y == 0)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn aggregate_guesses(guess_ids: &Vec<usize>, wordcache: &WordCache) -> Word {
|
fn aggregate_guesses(guess_ids: &[usize], wordcache: &WordCache) -> Word {
|
||||||
//guess_ids.iter().reduce(|out, g| out |= wordcache[IDX_ALL_WORDS][g]).unwrap()
|
//guess_ids.iter().reduce(|out, g| out |= wordcache[IDX_ALL_WORDS][g]).unwrap()
|
||||||
let all_words = &wordcache[&IDX_ALL_WORDS];
|
let all_words = &wordcache[&IDX_ALL_WORDS];
|
||||||
let mut iter = guess_ids.iter();
|
let mut iter = guess_ids.iter();
|
||||||
|
@ -182,15 +252,12 @@ fn aggregate_guesses(guess_ids: &Vec<usize>, wordcache: &WordCache) -> Word {
|
||||||
fn simulate(guess: Word, wordcache: &WordCache) -> (usize, usize) {
|
fn simulate(guess: Word, wordcache: &WordCache) -> (usize, usize) {
|
||||||
// let valid_words = &wordcache[&IDX_ALL_WORDS];
|
// let valid_words = &wordcache[&IDX_ALL_WORDS];
|
||||||
let valid_solutions = &wordcache[&IDX_VALID_SOLUTIONS];
|
let valid_solutions = &wordcache[&IDX_VALID_SOLUTIONS];
|
||||||
|
let n_solutions = valid_solutions.len();
|
||||||
|
|
||||||
let required_chars: [Charmask; N_SOLUTIONS] = array_init::from_iter(
|
let required_chars: Vec<Charmask> = valid_solutions.iter().map(|s| s.charmask & guess.charmask).collect();
|
||||||
valid_solutions.iter().map(|s| s.charmask & guess.charmask)
|
let mut banned_chars: Vec<Charmask> = (0..WORD_LENGTH*n_solutions).map(|_| 0).collect();
|
||||||
).unwrap();
|
|
||||||
let mut banned_chars: [Charmask; WORD_LENGTH*N_SOLUTIONS] = [0; WORD_LENGTH*N_SOLUTIONS];
|
for i in 0..n_solutions {
|
||||||
/* array_init::from_iter(
|
|
||||||
valid_solutions.iter().map(|s| s.charmask & guess.charmask)
|
|
||||||
).unwrap(); */
|
|
||||||
for i in 0..N_SOLUTIONS {
|
|
||||||
let s = valid_solutions[i];
|
let s = valid_solutions[i];
|
||||||
let bans = guess.charmask & !s.charmask; // A letter fully rejected in any position bans it in all positions
|
let bans = guess.charmask & !s.charmask; // A letter fully rejected in any position bans it in all positions
|
||||||
for j in 0..WORD_LENGTH {
|
for j in 0..WORD_LENGTH {
|
||||||
|
@ -203,13 +270,13 @@ fn simulate(guess: Word, wordcache: &WordCache) -> (usize, usize) {
|
||||||
banned_chars[i*WORD_LENGTH + j] |= !correct;
|
banned_chars[i*WORD_LENGTH + j] |= !correct;
|
||||||
} */
|
} */
|
||||||
//Branchless
|
//Branchless
|
||||||
banned_chars[i*WORD_LENGTH + j] |= !correct * (correct !=0) as i32;
|
banned_chars[i*WORD_LENGTH + j] |= !correct * (correct !=0) as Charmask;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut worst = 0;
|
let mut worst = 0;
|
||||||
let mut worst_w = 0;
|
let mut worst_w = 0;
|
||||||
for target_id in 0..N_SOLUTIONS {
|
for target_id in 0..n_solutions {
|
||||||
let cachekey = required_chars[target_id];
|
let cachekey = required_chars[target_id];
|
||||||
if wordcache.contains_key(&cachekey) {
|
if wordcache.contains_key(&cachekey) {
|
||||||
let mut remaining = 0;
|
let mut remaining = 0;
|
||||||
|
@ -234,9 +301,10 @@ fn calculate_best(w1start: usize, w1end: usize, total: usize, wordcache: &WordCa
|
||||||
println!("Starting from word #{} to ending word #{}.", w1start, w1end);
|
println!("Starting from word #{} to ending word #{}.", w1start, w1end);
|
||||||
let mut guess_ids: Vec<Vec<usize>> = Vec::default();
|
let mut guess_ids: Vec<Vec<usize>> = Vec::default();
|
||||||
for i1 in w1start..w1end {
|
for i1 in w1start..w1end {
|
||||||
for i2 in i1..total {
|
guess_ids.push(vec![i1])
|
||||||
guess_ids.push(vec![i1,i2])
|
// for i2 in i1..total {
|
||||||
}
|
// guess_ids.push(vec![i1,i2])
|
||||||
|
// }
|
||||||
}
|
}
|
||||||
let guesses: Vec<Word> = guess_ids.iter().map(|i| aggregate_guesses(&i, &wordcache)).collect();
|
let guesses: Vec<Word> = guess_ids.iter().map(|i| aggregate_guesses(&i, &wordcache)).collect();
|
||||||
println!("This consists of {} guess combinations", guess_ids.len());
|
println!("This consists of {} guess combinations", guess_ids.len());
|
||||||
|
@ -251,29 +319,29 @@ fn calculate_best(w1start: usize, w1end: usize, total: usize, wordcache: &WordCa
|
||||||
results
|
results
|
||||||
}
|
}
|
||||||
|
|
||||||
fn guess2str(guess: &Vec<usize>, word_strs: &Vec<String>) -> String {
|
fn guess2str(guess: &[usize], word_strs: &[String]) -> String {
|
||||||
let strs: Vec<String> = guess.iter().map(|i| word_strs[*i].clone()).collect();
|
let strs: Vec<String> = guess.iter().map(|i| word_strs[*i].clone()).collect();
|
||||||
strs.join(",")
|
strs.join(",")
|
||||||
}
|
}
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
eprint!("Hello, world!\n");
|
//eprint!("Hello, world!\n");
|
||||||
// Prints each argument on a separate line
|
// Prints each argument on a separate line
|
||||||
for argument in env::args() {
|
for argument in env::args() {
|
||||||
print!("{}\t", argument);
|
print!("{}\t", argument);
|
||||||
}
|
}
|
||||||
fs::write("test.txt", ["test1", "test2", "test3"].join("\n")).expect("Failed to write output");
|
//fs::write("test.txt", ["test1", "test2", "test3"].join("\n")).expect("Failed to write output");
|
||||||
let word_strs: Vec<String> = load_dictionary("words-kura");
|
let (word_strs, n_solutions) = load_dictionary("kotobade-asobou-list");
|
||||||
let totalwords = word_strs.len();
|
let totalwords = word_strs.len();
|
||||||
let words: Vec<Word> = word_strs.iter().map(|w| str2word(w)).collect();
|
let words: Vec<Word> = word_strs.iter().map(|w| str2word(w)).collect();
|
||||||
println!("Loaded dict - {} words in dict", totalwords);
|
println!("Loaded dict - {} words in dict, {} of which can be solutions.", totalwords, n_solutions);
|
||||||
let wordcache = generate_wordcache(words);
|
let wordcache = generate_wordcache(words, n_solutions);
|
||||||
//let all_words = &wordcache[&IDX_ALL_WORDS];
|
//let all_words = &wordcache[&IDX_ALL_WORDS];
|
||||||
// println!("Cache contains {} keys", wordcache.keys().len()); // 6756 on words-kura
|
// println!("Cache contains {} keys", wordcache.keys().len()); // 6756 on words-kura
|
||||||
|
|
||||||
let args: Vec<String> = env::args().collect();
|
let args: Vec<String> = env::args().collect();
|
||||||
let mut w1start: usize = 0;
|
let w1start: usize;
|
||||||
let mut w1end: usize = totalwords.min(1000);
|
let w1end: usize;
|
||||||
match args.len() {
|
match args.len() {
|
||||||
3 => {
|
3 => {
|
||||||
let s_w1start = &args[1];
|
let s_w1start = &args[1];
|
||||||
|
@ -296,6 +364,7 @@ fn main() {
|
||||||
},
|
},
|
||||||
_ => {
|
_ => {
|
||||||
w1start = 0;
|
w1start = 0;
|
||||||
|
w1end = totalwords;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue