bitmask batching

This commit is contained in:
Luke Hubmayer-Werner 2022-12-07 15:11:23 +10:30
parent 33a79a23bc
commit 253068d67d
1 changed files with 41 additions and 22 deletions

View File

@ -35,37 +35,56 @@ fn mask_to_string(mask: u32) -> String {
} }
fn find_first_unique_runs(s: &Vec<u8>) -> (Int, Int) { fn find_first_unique_runs(s: &Vec<u8>) -> (Int, Int) {
let mut four = 0; const BATCH_SIZE: Int = 8;
let mut fourteen = 0; const BATCH_NUM: Int = 4096/BATCH_SIZE;
let mut four: Int = 0;
let mut fourteen: Int = 0;
let mut masks: [u32; 4096] = [0; 4096]; let mut masks: [u32; 4096] = [0; 4096];
let mut scratch_masks: [u32; BATCH_SIZE] = [0; BATCH_SIZE];
let mut scratch_masks_bits: [u8; BATCH_SIZE] = [0; BATCH_SIZE];
for i in 0..4096 { for i in 0..4096 {
masks[i] = 1 << (s[i]-b'a'); masks[i] = 1 << (s[i]-b'a');
} }
let mut masks2: [u32; 4096] = masks.clone(); // Turn masks into masks2. Indices are now +1.
for i in 1..4096 { for i in 0..(4096-1) {
masks2[i] |= masks[i-1]; masks[i] |= masks[i+1];
} }
for i in 4..4096 { 'four_loop: for batch in 1..(BATCH_NUM) {
// let mut mask = masks[i]; for i in 0..BATCH_SIZE {
// for j in 1..4 {mask |= masks[i-j];} let idx = batch*BATCH_SIZE + i;
let mut mask = masks2[i]; scratch_masks[i] = masks[idx] | masks[idx-2];
for j in 1..2 {mask |= masks2[i-j*2];} scratch_masks_bits[i] = scratch_masks[i].count_ones() as u8;
if mask.count_ones() == 4 { }
four = i; for i in 0..BATCH_SIZE {
break; if scratch_masks_bits[i] == 4 {
four = batch*BATCH_SIZE + i;
break 'four_loop;
}
} }
} }
for i in (four+9)..4096 { // Turn masks2 into masks4. Indices are now +3.
// let mut mask = masks[i]; for i in four..(4096-3) {
// for j in 1..14 {mask |= masks[i-j];} masks[i] |= masks[i+2];
let mut mask = masks2[i]; }
for j in 1..7 {mask |= masks2[i-j*2];} // (2..14).step_by(2) doesn't unroll, LLVM is garbage // Turn masks4 into masks8. Indices are now +7.
if mask.count_ones() == 14 { for i in four..(4096-7) {
fourteen = i; masks[i] |= masks[i+4];
break; }
'fourteen_loop: for batch in (four/BATCH_SIZE)..(BATCH_NUM) {
for i in 0..BATCH_SIZE {
let idx = batch*BATCH_SIZE + i;
// scratch_masks[i] = masks[idx] | masks[idx-4] | masks[idx-8] | masks[idx-10];
scratch_masks[i] = masks[idx] | masks[idx-6];
scratch_masks_bits[i] = scratch_masks[i].count_ones() as u8;
}
for i in 0..BATCH_SIZE {
if scratch_masks_bits[i] == 14 {
fourteen = batch*BATCH_SIZE + i;
break 'fourteen_loop;
}
} }
} }
return (four+1, fourteen+1); return (four+2, fourteen+8);
} }
fn run_once(s: &Vec<u8>) { fn run_once(s: &Vec<u8>) {