More performance improvements

This commit is contained in:
hal8174 2025-07-06 12:46:48 +02:00
parent 5506079158
commit 8053532039
Signed by: hal8174
SSH key fingerprint: SHA256:JwuqS+eVfISfKr+DkDQ6NWAbGd1jFAHkPpCM1yCnlTs

View file

@ -121,7 +121,7 @@ fn read_traces(path: impl AsRef<Path>, capacity: usize) -> Vec<Vec<u8>> {
r r
} }
const LANES: usize = 16; const LANES: usize = 32;
fn correlation( fn correlation(
bit: usize, bit: usize,
@ -130,31 +130,33 @@ fn correlation(
cyphtertext: &[[Vec<u8>; 16]], cyphtertext: &[[Vec<u8>; 16]],
traces: &[Vec<u8>], traces: &[Vec<u8>],
) -> f64 { ) -> f64 {
let mut x = Simd::<u64, LANES>::default(); assert!(
let mut y = Simd::<u64, LANES>::default(); traces[0].len() < (1 << 16) * LANES,
let mut xy = Simd::<u64, LANES>::default(); "Integers might overflow"
let mut xsqr = Simd::<u64, LANES>::default(); );
let mut ysqr = Simd::<u64, LANES>::default();
let mut x = Simd::<u32, LANES>::default();
let mut y = Simd::<u32, LANES>::default();
let mut xy = Simd::<u32, LANES>::default();
let mut ysqr = Simd::<u32, LANES>::default();
let mask = Simd::<u8, LANES>::splat(1 << (bit % 8)); let mask = Simd::<u8, LANES>::splat(1 << (bit % 8));
for i in 0..traces[0].len() / LANES { for i in 0..traces[0].len() / LANES {
let xi = Simd::<u8, LANES>::from_slice(&cyphtertext[key_hypothesis][bit / 8][i * LANES..]); let xi = Simd::<u8, LANES>::from_slice(&cyphtertext[key_hypothesis][bit / 8][i * LANES..]);
let xi = (xi & mask).cast::<u64>(); let xi = (xi & mask).cast();
let yi = Simd::<u8, LANES>::from_slice(&traces[trace_index][i * LANES..]).cast::<u64>(); let yi = Simd::<u8, LANES>::from_slice(&traces[trace_index][i * LANES..]).cast();
x += xi; x += xi;
y += yi; y += yi;
xy += xi * yi; xy += xi * yi;
xsqr += xi * xi;
ysqr += yi * yi; ysqr += yi * yi;
} }
let mut x = x.reduce_sum() as i64; let mut x = x.cast::<u64>().reduce_sum() as i64;
let mut y = y.reduce_sum() as i64; let mut y = y.cast::<u64>().reduce_sum() as i64;
let mut xy = xy.reduce_sum() as i64; let mut xy = xy.cast::<u64>().reduce_sum() as i64;
let mut xsqr = xsqr.reduce_sum() as i64; let mut ysqr = ysqr.cast::<u64>().reduce_sum() as i64;
let mut ysqr = ysqr.reduce_sum() as i64;
for i in (traces[0].len() / LANES) * LANES..traces[0].len() { for i in (traces[0].len() / LANES) * LANES..traces[0].len() {
let xi = (cyphtertext[key_hypothesis][bit / 8][i] & (1 << (bit % 8))) as i64; let xi = (cyphtertext[key_hypothesis][bit / 8][i] & (1 << (bit % 8))) as i64;
@ -163,10 +165,11 @@ fn correlation(
x += xi; x += xi;
y += yi; y += yi;
xy += xi * yi; xy += xi * yi;
xsqr += xi * xi;
ysqr += yi * yi; ysqr += yi * yi;
} }
let xsqr = x * (1 << (bit % 8));
let n = traces[0].len() as i64; let n = traces[0].len() as i64;
let num = (n * xy - x * y) as f64; let num = (n * xy - x * y) as f64;
let denom = f64::sqrt((n * xsqr - x * x) as f64) * f64::sqrt((n * ysqr - y * y) as f64); let denom = f64::sqrt((n * xsqr - x * x) as f64) * f64::sqrt((n * ysqr - y * y) as f64);