More performance improvements

This commit is contained in:
hal8174 2025-07-06 12:46:48 +02:00
parent 5506079158
commit 8053532039
Signed by: hal8174
SSH key fingerprint: SHA256:JwuqS+eVfISfKr+DkDQ6NWAbGd1jFAHkPpCM1yCnlTs

View file

@ -121,7 +121,7 @@ fn read_traces(path: impl AsRef<Path>, capacity: usize) -> Vec<Vec<u8>> {
r
}
const LANES: usize = 16;
const LANES: usize = 32;
fn correlation(
bit: usize,
@ -130,31 +130,33 @@ fn correlation(
cyphtertext: &[[Vec<u8>; 16]],
traces: &[Vec<u8>],
) -> f64 {
let mut x = Simd::<u64, LANES>::default();
let mut y = Simd::<u64, LANES>::default();
let mut xy = Simd::<u64, LANES>::default();
let mut xsqr = Simd::<u64, LANES>::default();
let mut ysqr = Simd::<u64, LANES>::default();
assert!(
traces[0].len() < (1 << 16) * LANES,
"Integers might overflow"
);
let mut x = Simd::<u32, LANES>::default();
let mut y = Simd::<u32, LANES>::default();
let mut xy = Simd::<u32, LANES>::default();
let mut ysqr = Simd::<u32, LANES>::default();
let mask = Simd::<u8, LANES>::splat(1 << (bit % 8));
for i in 0..traces[0].len() / LANES {
let xi = Simd::<u8, LANES>::from_slice(&cyphtertext[key_hypothesis][bit / 8][i * LANES..]);
let xi = (xi & mask).cast::<u64>();
let yi = Simd::<u8, LANES>::from_slice(&traces[trace_index][i * LANES..]).cast::<u64>();
let xi = (xi & mask).cast();
let yi = Simd::<u8, LANES>::from_slice(&traces[trace_index][i * LANES..]).cast();
x += xi;
y += yi;
xy += xi * yi;
xsqr += xi * xi;
ysqr += yi * yi;
}
let mut x = x.reduce_sum() as i64;
let mut y = y.reduce_sum() as i64;
let mut xy = xy.reduce_sum() as i64;
let mut xsqr = xsqr.reduce_sum() as i64;
let mut ysqr = ysqr.reduce_sum() as i64;
let mut x = x.cast::<u64>().reduce_sum() as i64;
let mut y = y.cast::<u64>().reduce_sum() as i64;
let mut xy = xy.cast::<u64>().reduce_sum() as i64;
let mut ysqr = ysqr.cast::<u64>().reduce_sum() as i64;
for i in (traces[0].len() / LANES) * LANES..traces[0].len() {
let xi = (cyphtertext[key_hypothesis][bit / 8][i] & (1 << (bit % 8))) as i64;
@ -163,10 +165,11 @@ fn correlation(
x += xi;
y += yi;
xy += xi * yi;
xsqr += xi * xi;
ysqr += yi * yi;
}
let xsqr = x * (1 << (bit % 8));
let n = traces[0].len() as i64;
let num = (n * xy - x * y) as f64;
let denom = f64::sqrt((n * xsqr - x * x) as f64) * f64::sqrt((n * ysqr - y * y) as f64);