More performance improvements
This commit is contained in:
parent
5506079158
commit
8053532039
1 changed files with 18 additions and 15 deletions
33
src/main.rs
33
src/main.rs
|
|
@ -121,7 +121,7 @@ fn read_traces(path: impl AsRef<Path>, capacity: usize) -> Vec<Vec<u8>> {
|
||||||
r
|
r
|
||||||
}
|
}
|
||||||
|
|
||||||
const LANES: usize = 16;
|
const LANES: usize = 32;
|
||||||
|
|
||||||
fn correlation(
|
fn correlation(
|
||||||
bit: usize,
|
bit: usize,
|
||||||
|
|
@ -130,31 +130,33 @@ fn correlation(
|
||||||
cyphtertext: &[[Vec<u8>; 16]],
|
cyphtertext: &[[Vec<u8>; 16]],
|
||||||
traces: &[Vec<u8>],
|
traces: &[Vec<u8>],
|
||||||
) -> f64 {
|
) -> f64 {
|
||||||
let mut x = Simd::<u64, LANES>::default();
|
assert!(
|
||||||
let mut y = Simd::<u64, LANES>::default();
|
traces[0].len() < (1 << 16) * LANES,
|
||||||
let mut xy = Simd::<u64, LANES>::default();
|
"Integers might overflow"
|
||||||
let mut xsqr = Simd::<u64, LANES>::default();
|
);
|
||||||
let mut ysqr = Simd::<u64, LANES>::default();
|
|
||||||
|
let mut x = Simd::<u32, LANES>::default();
|
||||||
|
let mut y = Simd::<u32, LANES>::default();
|
||||||
|
let mut xy = Simd::<u32, LANES>::default();
|
||||||
|
let mut ysqr = Simd::<u32, LANES>::default();
|
||||||
|
|
||||||
let mask = Simd::<u8, LANES>::splat(1 << (bit % 8));
|
let mask = Simd::<u8, LANES>::splat(1 << (bit % 8));
|
||||||
|
|
||||||
for i in 0..traces[0].len() / LANES {
|
for i in 0..traces[0].len() / LANES {
|
||||||
let xi = Simd::<u8, LANES>::from_slice(&cyphtertext[key_hypothesis][bit / 8][i * LANES..]);
|
let xi = Simd::<u8, LANES>::from_slice(&cyphtertext[key_hypothesis][bit / 8][i * LANES..]);
|
||||||
let xi = (xi & mask).cast::<u64>();
|
let xi = (xi & mask).cast();
|
||||||
let yi = Simd::<u8, LANES>::from_slice(&traces[trace_index][i * LANES..]).cast::<u64>();
|
let yi = Simd::<u8, LANES>::from_slice(&traces[trace_index][i * LANES..]).cast();
|
||||||
|
|
||||||
x += xi;
|
x += xi;
|
||||||
y += yi;
|
y += yi;
|
||||||
xy += xi * yi;
|
xy += xi * yi;
|
||||||
xsqr += xi * xi;
|
|
||||||
ysqr += yi * yi;
|
ysqr += yi * yi;
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut x = x.reduce_sum() as i64;
|
let mut x = x.cast::<u64>().reduce_sum() as i64;
|
||||||
let mut y = y.reduce_sum() as i64;
|
let mut y = y.cast::<u64>().reduce_sum() as i64;
|
||||||
let mut xy = xy.reduce_sum() as i64;
|
let mut xy = xy.cast::<u64>().reduce_sum() as i64;
|
||||||
let mut xsqr = xsqr.reduce_sum() as i64;
|
let mut ysqr = ysqr.cast::<u64>().reduce_sum() as i64;
|
||||||
let mut ysqr = ysqr.reduce_sum() as i64;
|
|
||||||
|
|
||||||
for i in (traces[0].len() / LANES) * LANES..traces[0].len() {
|
for i in (traces[0].len() / LANES) * LANES..traces[0].len() {
|
||||||
let xi = (cyphtertext[key_hypothesis][bit / 8][i] & (1 << (bit % 8))) as i64;
|
let xi = (cyphtertext[key_hypothesis][bit / 8][i] & (1 << (bit % 8))) as i64;
|
||||||
|
|
@ -163,10 +165,11 @@ fn correlation(
|
||||||
x += xi;
|
x += xi;
|
||||||
y += yi;
|
y += yi;
|
||||||
xy += xi * yi;
|
xy += xi * yi;
|
||||||
xsqr += xi * xi;
|
|
||||||
ysqr += yi * yi;
|
ysqr += yi * yi;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let xsqr = x * (1 << (bit % 8));
|
||||||
|
|
||||||
let n = traces[0].len() as i64;
|
let n = traces[0].len() as i64;
|
||||||
let num = (n * xy - x * y) as f64;
|
let num = (n * xy - x * y) as f64;
|
||||||
let denom = f64::sqrt((n * xsqr - x * x) as f64) * f64::sqrt((n * ysqr - y * y) as f64);
|
let denom = f64::sqrt((n * xsqr - x * x) as f64) * f64::sqrt((n * ysqr - y * y) as f64);
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue