From 8053532039e11d2cd1909df105873cca55f6f711 Mon Sep 17 00:00:00 2001 From: hal8174 Date: Sun, 6 Jul 2025 12:46:48 +0200 Subject: [PATCH] More performance improvements --- src/main.rs | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/src/main.rs b/src/main.rs index 3b8ea61..ecba62f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -121,7 +121,7 @@ fn read_traces(path: impl AsRef, capacity: usize) -> Vec> { r } -const LANES: usize = 16; +const LANES: usize = 32; fn correlation( bit: usize, @@ -130,31 +130,33 @@ fn correlation( cyphtertext: &[[Vec; 16]], traces: &[Vec], ) -> f64 { - let mut x = Simd::::default(); - let mut y = Simd::::default(); - let mut xy = Simd::::default(); - let mut xsqr = Simd::::default(); - let mut ysqr = Simd::::default(); + assert!( + traces[0].len() < (1 << 16) * LANES, + "Integers might overflow" + ); + + let mut x = Simd::::default(); + let mut y = Simd::::default(); + let mut xy = Simd::::default(); + let mut ysqr = Simd::::default(); let mask = Simd::::splat(1 << (bit % 8)); for i in 0..traces[0].len() / LANES { let xi = Simd::::from_slice(&cyphtertext[key_hypothesis][bit / 8][i * LANES..]); - let xi = (xi & mask).cast::(); - let yi = Simd::::from_slice(&traces[trace_index][i * LANES..]).cast::(); + let xi = (xi & mask).cast(); + let yi = Simd::::from_slice(&traces[trace_index][i * LANES..]).cast(); x += xi; y += yi; xy += xi * yi; - xsqr += xi * xi; ysqr += yi * yi; } - let mut x = x.reduce_sum() as i64; - let mut y = y.reduce_sum() as i64; - let mut xy = xy.reduce_sum() as i64; - let mut xsqr = xsqr.reduce_sum() as i64; - let mut ysqr = ysqr.reduce_sum() as i64; + let mut x = x.cast::().reduce_sum() as i64; + let mut y = y.cast::().reduce_sum() as i64; + let mut xy = xy.cast::().reduce_sum() as i64; + let mut ysqr = ysqr.cast::().reduce_sum() as i64; for i in (traces[0].len() / LANES) * LANES..traces[0].len() { let xi = (cyphtertext[key_hypothesis][bit / 8][i] & (1 << (bit % 8))) as i64; @@ -163,10 +165,11 @@ fn correlation( x += xi; y += yi; xy += xi * yi; - xsqr += xi * xi; ysqr += yi * yi; } + let xsqr = x * (1 << (bit % 8)); + let n = traces[0].len() as i64; let num = (n * xy - x * y) as f64; let denom = f64::sqrt((n * xsqr - x * x) as f64) * f64::sqrt((n * ysqr - y * y) as f64);