diff --git a/math/src/ring/impl_u64/automorphism.rs b/math/src/ring/impl_u64/automorphism.rs
index bbb93c9..9a82e9e 100644
--- a/math/src/ring/impl_u64/automorphism.rs
+++ b/math/src/ring/impl_u64/automorphism.rs
@@ -1,7 +1,7 @@
-use crate::modulus::{REDUCEMOD, WordOps};
+use crate::modulus::{ScalarOperations, ONCE};
+use crate::modulus::{WordOps, REDUCEMOD};
 use crate::poly::Poly;
 use crate::ring::Ring;
-use crate::modulus::{ONCE, ScalarOperations};
 
 /// Returns a lookup table for the automorphism X^{i} -> X^{i * k mod nth_root}.
 /// Method will panic if n or nth_root are not power-of-two.
@@ -45,7 +45,6 @@ pub fn automorphism_index<const NTT: bool>(n: usize, nth_root: usize, gal_el: us
 }
 
 impl Ring<u64> {
-
     // b <- auto(a)
     pub fn a_apply_automorphism_into_b<const NTT: bool>(
         &self,
@@ -53,33 +52,33 @@ impl Ring<u64> {
         gal_el: usize,
         nth_root: usize,
         b: &mut Poly<u64>,
-    ){
+    ) {
         self.apply_automorphism_core::<0, ONCE, NTT>(a, gal_el, nth_root, b)
     }
 
     // b <- REDUCEMOD(b + auto(a))
-    pub fn a_apply_automorphism_add_b_into_b<const REDUCE:REDUCEMOD, const NTT: bool>(
+    pub fn a_apply_automorphism_add_b_into_b<const REDUCE: REDUCEMOD, const NTT: bool>(
         &self,
         a: &Poly<u64>,
         gal_el: usize,
         nth_root: usize,
         b: &mut Poly<u64>,
-    ){
+    ) {
         self.apply_automorphism_core::<1, REDUCE, NTT>(a, gal_el, nth_root, b)
     }
 
     // b <- REDUCEMOD(b - auto(a))
-    pub fn a_apply_automorphism_sub_b_into_b<const REDUCE:REDUCEMOD, const NTT:bool>(
-    &self,
-    a: &Poly<u64>,
-    gal_el: usize,
-    nth_root: usize,
-    b: &mut Poly<u64>,
-    ){
+    pub fn a_apply_automorphism_sub_b_into_b<const REDUCE: REDUCEMOD, const NTT: bool>(
+        &self,
+        a: &Poly<u64>,
+        gal_el: usize,
+        nth_root: usize,
+        b: &mut Poly<u64>,
+    ) {
         self.apply_automorphism_core::<2, REDUCE, NTT>(a, gal_el, nth_root, b)
     }
 
-    fn apply_automorphism_core<const MOD:u8, const REDUCE:REDUCEMOD, const NTT: bool>(
+    fn apply_automorphism_core<const MOD: u8, const REDUCE: REDUCEMOD, const NTT: bool>(
         &self,
         a: &Poly<u64>,
         gal_el: usize,
@@ -116,11 +115,17 @@ impl Ring<u64> {
                 let i_rev: usize = 2 * i.reverse_bits_msb(log_nth_root_half) + 1;
                 let gal_el_i: usize = (((gal_el * i_rev) & mask) - 1) >> 1;
                 let idx: usize = gal_el_i.reverse_bits_msb(log_nth_root_half);
-                match MOD{
-                    0 =>{b_vec[idx] = *ai}
-                    1=>{self.modulus.sa_add_sb_into_sb::<REDUCE>(ai, &mut b_vec[idx])}
-                    2=>{self.modulus.sa_sub_sb_into_sa::<1, REDUCE>(ai, &mut b_vec[idx])}
-                    _=>{panic!("invalid const MOD should be 0, 1, or 2 but is {}", MOD)}
+                match MOD {
+                    0 => b_vec[idx] = *ai,
+                    1 => self
+                        .modulus
+                        .sa_add_sb_into_sb::<REDUCE>(ai, &mut b_vec[idx]),
+                    2 => self
+                        .modulus
+                        .sa_sub_sb_into_sa::<1, REDUCE>(ai, &mut b_vec[idx]),
+                    _ => {
+                        panic!("invalid const MOD should be 0, 1, or 2 but is {}", MOD)
+                    }
                 }
             });
         } else {
@@ -133,11 +138,17 @@ impl Ring<u64> {
                 let sign: u64 = ((gal_el_i >> log_n) & 1) as u64;
                 let i_out: usize = gal_el_i & mask;
                 let v: u64 = ai * (sign ^ 1) | (q - ai) * sign;
-                match MOD{
-                    0 =>{b_vec[i_out] = v}
-                    1=>{self.modulus.sa_add_sb_into_sb::<REDUCE>(&v, &mut b_vec[i_out])}
-                    2=>{self.modulus.sa_sub_sb_into_sa::<1, REDUCE>(&v, &mut b_vec[i_out])}
-                    _=>{panic!("invalid const MOD should be 0, 1, or 2 but is {}", MOD)}
+                match MOD {
+                    0 => b_vec[i_out] = v,
+                    1 => self
+                        .modulus
+                        .sa_add_sb_into_sb::<REDUCE>(&v, &mut b_vec[i_out]),
+                    2 => self
+                        .modulus
+                        .sa_sub_sb_into_sa::<1, REDUCE>(&v, &mut b_vec[i_out]),
+                    _ => {
+                        panic!("invalid const MOD should be 0, 1, or 2 but is {}", MOD)
+                    }
                 }
             });
         }
@@ -149,32 +160,38 @@ impl Ring<u64> {
         a: &Poly<u64>,
         idx: &[usize],
         b: &mut Poly<u64>,
-    ){
+    ) {
         self.automorphism_from_index_core::<0, ONCE, NTT>(a, idx, b)
     }
 
     // b <- REDUCEMOD(b + auto(a))
-    pub fn a_apply_automorphism_from_index_add_b_into_b<const REDUCE:REDUCEMOD, const NTT: bool>(
+    pub fn a_apply_automorphism_from_index_add_b_into_b<
+        const REDUCE: REDUCEMOD,
+        const NTT: bool,
+    >(
         &self,
         a: &Poly<u64>,
         idx: &[usize],
         b: &mut Poly<u64>,
-    ){
+    ) {
         self.automorphism_from_index_core::<1, REDUCE, NTT>(a, idx, b)
     }
 
     // b <- REDUCEMOD(b - auto(a))
-    pub fn a_apply_automorphism_from_index_sub_b_into_b<const REDUCE:REDUCEMOD, const NTT:bool>(
-    &self,
-    a: &Poly<u64>,
-    idx: &[usize],
-    b: &mut Poly<u64>,
-    ){
+    pub fn a_apply_automorphism_from_index_sub_b_into_b<
+        const REDUCE: REDUCEMOD,
+        const NTT: bool,
+    >(
+        &self,
+        a: &Poly<u64>,
+        idx: &[usize],
+        b: &mut Poly<u64>,
+    ) {
         self.automorphism_from_index_core::<2, REDUCE, NTT>(a, idx, b)
     }
 
     // b <- auto(a) if OVERWRITE else b <- REDUCEMOD(b + auto(a))
-    fn automorphism_from_index_core<const MOD:u8, const REDUCE:REDUCEMOD, const NTT: bool>(
+    fn automorphism_from_index_core<const MOD: u8, const REDUCE: REDUCEMOD, const NTT: bool>(
         &self,
         a: &Poly<u64>,
         idx: &[usize],
@@ -191,12 +208,16 @@ impl Ring<u64> {
         let a_vec: &Vec<u64> = &a.0;
 
         if NTT {
-            a_vec.iter().enumerate().for_each(|(i, ai)| {
-                match MOD{
-                    0 =>{ b_vec[idx[i]] = *ai}
-                    1 =>{self.modulus.sa_add_sb_into_sb::<REDUCE>(ai, &mut b_vec[idx[i]])}
-                    2=>{self.modulus.sa_sub_sb_into_sa::<1, REDUCE>(ai, &mut b_vec[idx[i]])}
-                    _=>{panic!("invalid const MOD should be 0, 1, or 2 but is {}", MOD)}
+            a_vec.iter().enumerate().for_each(|(i, ai)| match MOD {
+                0 => b_vec[idx[i]] = *ai,
+                1 => self
+                    .modulus
+                    .sa_add_sb_into_sb::<REDUCE>(ai, &mut b_vec[idx[i]]),
+                2 => self
+                    .modulus
+                    .sa_sub_sb_into_sa::<1, REDUCE>(ai, &mut b_vec[idx[i]]),
+                _ => {
+                    panic!("invalid const MOD should be 0, 1, or 2 but is {}", MOD)
                 }
             });
         } else {
@@ -206,11 +227,17 @@ impl Ring<u64> {
             a_vec.iter().enumerate().for_each(|(i, ai)| {
                 let sign: u64 = (idx[i] >> usize::BITS - 1) as u64;
                 let v: u64 = ai * (sign ^ 1) | (q - ai) * sign;
-                match MOD{
-                    0 =>{b_vec[idx[i] & mask] = v}
-                    1 =>{self.modulus.sa_add_sb_into_sb::<REDUCE>(&v, &mut b_vec[idx[i] & mask])}
-                    2=>{self.modulus.sa_sub_sb_into_sa::<1, REDUCE>(&v, &mut b_vec[idx[i] & mask])}
-                    _=>{panic!("invalid const MOD should be 0, 1, or 2 but is {}", MOD)}
+                match MOD {
+                    0 => b_vec[idx[i] & mask] = v,
+                    1 => self
+                        .modulus
+                        .sa_add_sb_into_sb::<REDUCE>(&v, &mut b_vec[idx[i] & mask]),
+                    2 => self
+                        .modulus
+                        .sa_sub_sb_into_sa::<1, REDUCE>(&v, &mut b_vec[idx[i] & mask]),
+                    _ => {
+                        panic!("invalid const MOD should be 0, 1, or 2 but is {}", MOD)
+                    }
                 }
             });
         }
diff --git a/math/src/ring/impl_u64/mod.rs b/math/src/ring/impl_u64/mod.rs
index 00b6dfa..6a426ff 100644
--- a/math/src/ring/impl_u64/mod.rs
+++ b/math/src/ring/impl_u64/mod.rs
@@ -1,8 +1,8 @@
 pub mod automorphism;
 pub mod packing;
-pub mod trace;
 pub mod rescaling_rns;
 pub mod ring;
 pub mod ring_rns;
 pub mod sampling;
-pub mod utils;
\ No newline at end of file
+pub mod trace;
+pub mod utils;
diff --git a/math/src/ring/impl_u64/packing.rs b/math/src/ring/impl_u64/packing.rs
index 8b1002f..2bccd80 100644
--- a/math/src/ring/impl_u64/packing.rs
+++ b/math/src/ring/impl_u64/packing.rs
@@ -7,7 +7,6 @@ use std::cmp::min;
 use std::rc::Rc;
 
 impl Ring<u64> {
-
     pub fn pack<const ZEROGARBAGE: bool, const NTT: bool>(
         &self,
         polys: &mut Vec<Option<Poly<u64>>>,
@@ -78,9 +77,16 @@ impl Ring<u64> {
                     let gal_el: usize = self.galois_element((1 << i) >> 1, i == 0, log_nth_root);
 
                     if !polys_hi[j].is_none() {
-                        self.a_apply_automorphism_add_b_into_b::<ONCE, true>(&tmpa, gal_el, 2 << self.log_n(), poly_lo);
+                        self.a_apply_automorphism_add_b_into_b::<ONCE, true>(
+                            &tmpa,
+                            gal_el,
+                            2 << self.log_n(),
+                            poly_lo,
+                        );
                     } else {
-                        self.a_apply_automorphism_into_b::<true>(poly_lo, gal_el, nth_root, &mut tmpa);
+                        self.a_apply_automorphism_into_b::<true>(
+                            poly_lo, gal_el, nth_root, &mut tmpa,
+                        );
                         self.a_add_b_into_b::<ONCE>(&tmpa, poly_lo);
                     }
                 } else if let Some(poly_hi) = polys_hi[j].as_mut() {
@@ -102,14 +108,16 @@ impl Ring<u64> {
     }
 }
 
-
 // Returns the largest gap between two values in an ordered array of distinct values.
 // Panics if the array is not ordered or values are not distincts.
 fn max_gap(vec: &[usize]) -> usize {
     let mut gap: usize = usize::MAX;
     for i in 1..vec.len() {
         let (l, r) = (vec[i - 1], vec[i]);
-        assert!(r > l, "invalid input vec: not sorted or collision between indices");
+        assert!(
+            r > l,
+            "invalid input vec: not sorted or collision between indices"
+        );
         gap = min(gap, r - l);
         if gap == 1 {
             break;
@@ -118,111 +126,193 @@ fn max_gap(vec: &[usize]) -> usize {
     gap
 }
 
-
-pub struct StreamRepacker{
+pub struct StreamRepacker {
     accumulators: Vec<Accumulator>,
-    buf0: Poly<u64>,
-    buf1: Poly<u64>,
-    buf_auto: Poly<u64>,
+    tmp_a: Poly<u64>,
+    tmp_b: Poly<u64>,
     x_pow_2: Vec<Poly<Montgomery<u64>>>,
     n_inv: Barrett<u64>,
+    pub results: Vec<Poly<u64>>,
     counter: usize,
 }
 
-pub struct Accumulator{
-    buf: [Option<Rc<Poly<u64>>>; 2],
+pub struct Accumulator {
+    buf: Poly<u64>,
+    value: bool,
     control: bool,
 }
 
-impl Accumulator{
-    pub fn new(r: &Ring<u64>) -> Self{
-        Self { buf: [Some(Rc::new(r.new_poly())), None], control: false }
+impl Accumulator {
+    pub fn new(r: &Ring<u64>) -> Self {
+        Self {
+            buf: r.new_poly(),
+            value: false,
+            control: false,
+        }
     }
 }
 
-impl StreamRepacker{
-    pub fn new(r: &Ring<u64>) -> Self{
-
+impl StreamRepacker {
+    pub fn new(r: &Ring<u64>) -> Self {
         let mut accumulators: Vec<Accumulator> = Vec::<Accumulator>::new();
 
-        (0..r.log_n()).for_each(|_|
-            accumulators.push(Accumulator::new(r))
-        );
+        (0..r.log_n()).for_each(|_| accumulators.push(Accumulator::new(r)));
 
-        Self{
+        Self {
             accumulators: accumulators,
-            buf0: r.new_poly(),
-            buf1: r.new_poly(),
-            buf_auto: r.new_poly(),
+            tmp_a: r.new_poly(),
+            tmp_b: r.new_poly(),
             x_pow_2: r.gen_x_pow_2::<true, false>(r.log_n()),
             n_inv: r.modulus.barrett.prepare(r.modulus.inv(r.n() as u64)),
-            counter:0,
+            results: Vec::<Poly<u64>>::new(),
+            counter: 0,
         }
     }
 
-    fn merge_ab(&mut self, r: &Ring<u64>, a: &Poly<u64>, b: &Poly<u64>, i: usize) -> &Poly<u64>{
-
-        let tmp_a: &mut Poly<u64> = &mut self.buf0;
-        let tmp_b: &mut Poly<u64> = &mut self.buf1;
-
-        r.a_mul_b_montgomery_into_c::<ONCE>(a, &self.x_pow_2[r.log_n()-i-1], tmp_a);
-        r.a_sub_b_into_c::<1, ONCE>(a, tmp_a, tmp_b);
-        r.a_add_b_into_b::<ONCE>(a, tmp_a);
-
-        if i == 0{
-            r.a_mul_b_scalar_barrett_into_a::<ONCE>(&self.n_inv, tmp_a);
-            r.a_mul_b_scalar_barrett_into_a::<ONCE>(&self.n_inv, tmp_b);
+    pub fn reset(&mut self) {
+        for i in 0..self.accumulators.len() {
+            self.accumulators[i].value = false;
+            self.accumulators[i].control = false;
         }
-
-        let log_nth_root = r.log_n()+1;
-        let nth_root = 1<<log_nth_root;
-            
-        let gal_el: usize = r.galois_element((1 << i) >> 1, i == 0, log_nth_root);
-
-        r.a_apply_automorphism_add_b_into_b::<ONCE, true>(tmp_b, gal_el, nth_root, tmp_a);
-
-        tmp_a
+        self.counter = 0;
     }
 
-    fn merge_a(&mut self, r: &Ring<u64>, a: &Poly<u64>, i: usize) -> &Poly<u64>{
-
-        let tmp_a: &mut Poly<u64> = &mut self.buf0;
- 
-        let log_nth_root = r.log_n()+1;
-        let nth_root = 1<<log_nth_root;
-        let gal_el: usize = r.galois_element((1 << i) >> 1, i == 0, log_nth_root);
-
-        if i == 0{
-            r.a_mul_b_scalar_barrett_into_a::<ONCE>(&self.n_inv, tmp_a);
-            r.a_apply_automorphism_into_b::<true>(tmp_a, gal_el, nth_root, &mut self.buf_auto)
-            r.a_add_b_into_c::<ONCE>(&self.buf_auto, a, tmp_a);
-        }else{
-            r.a_apply_automorphism_into_b::<true>(a, gal_el, nth_root, tmp_a);
-            r.a_add_b_into_b::<ONCE>(a, tmp_a);
+    pub fn add<const NTT: bool>(&mut self, r: &Ring<u64>, a: Option<&Poly<u64>>) {
+        assert!(NTT, "invalid parameterization: const NTT must be true");
+        pack_core::<NTT>(
+            r,
+            a,
+            &mut self.accumulators,
+            &self.n_inv,
+            &self.x_pow_2,
+            &mut self.tmp_a,
+            &mut self.tmp_b,
+            0,
+        );
+        self.counter += 1;
+        if self.counter == r.n() {
+            self.results
+                .push(self.accumulators[r.log_n() - 1].buf.clone());
+            self.reset();
         }
-
-        tmp_a
     }
 
-    fn merge_b(&mut self, r: &Ring<u64>, b: &Poly<u64>, i: usize) -> &Poly<u64>{
+    pub fn flush<const NTT: bool>(&mut self, r: &Ring<u64>) {
+        assert!(NTT, "invalid parameterization: const NTT must be true");
+        if self.counter != 0 {
+            while self.counter != r.n() - 1 {
+                self.add::<NTT>(r, None);
+            }
+        }
+    }
+}
 
-        let tmp_a: &mut Poly<u64> = &mut self.buf0;
-        let tmp_b: &mut Poly<u64> = &mut self.buf1;
+fn pack_core<const NTT: bool>(
+    r: &Ring<u64>,
+    a: Option<&Poly<u64>>,
+    accumulators: &mut [Accumulator],
+    n_inv: &Barrett<u64>,
+    x_pow_2: &[Poly<u64>],
+    tmp_a: &mut Poly<u64>,
+    tmp_b: &mut Poly<u64>,
+    i: usize,
+) {
+    if i == r.log_n() {
+        return;
+    }
 
-        let log_nth_root = r.log_n()+1;
-        let nth_root = 1<<log_nth_root;
-        let gal_el: usize = r.galois_element((1 << i) >> 1, i == 0, log_nth_root);
+    let (acc_prev, acc_next) = accumulators.split_at_mut(1);
 
-        if i == 0{
-            r.a_mul_b_scalar_barrett_into_c::<ONCE>(&self.n_inv, b, tmp_b);
-            r.a_mul_b_montgomery_into_a::<ONCE>(&self.x_pow_2[r.log_n()-i-1], tmp_b);
-        }else{
-            r.a_mul_b_montgomery_into_c::<ONCE>(b, &self.x_pow_2[r.log_n()-i-1], tmp_b);
+    if !acc_prev[0].control {
+        let acc_mut_ref: &mut Accumulator = &mut acc_prev[0]; // from split_at_mut
+
+        if let Some(a_ref) = a {
+            acc_mut_ref.buf.copy_from(a_ref);
+            acc_mut_ref.value = true
+        } else {
+            acc_mut_ref.value = false
+        }
+        acc_mut_ref.control = true;
+    } else {
+        combine::<true>(r, &mut acc_prev[0], a, n_inv, x_pow_2, tmp_a, tmp_b, i);
+        acc_prev[0].control = false;
+
+        if acc_prev[0].value {
+            pack_core::<NTT>(
+                r,
+                Some(&acc_prev[0].buf),
+                acc_next,
+                n_inv,
+                x_pow_2,
+                tmp_a,
+                tmp_b,
+                i + 1,
+            );
+        } else {
+            pack_core::<NTT>(r, None, acc_next, n_inv, x_pow_2, tmp_a, tmp_b, i + 1);
+        }
+    }
+}
+
+fn combine<const NTT: bool>(
+    r: &Ring<u64>,
+    acc: &mut Accumulator,
+    b: Option<&Poly<u64>>,
+    n_inv: &Barrett<u64>,
+    x_pow_2: &[Poly<u64>],
+    tmp_a: &mut Poly<u64>,
+    tmp_b: &mut Poly<u64>,
+    i: usize,
+) {
+    let log_n = r.log_n();
+    let log_nth_root = log_n + 1;
+    let nth_root = 1 << log_nth_root;
+    let gal_el: usize = r.galois_element((1 << i) >> 1, i == 0, log_nth_root);
+
+    let a: &mut Poly<u64> = &mut acc.buf;
+
+    if acc.value {
+        if i == 0 {
+            r.a_mul_b_scalar_barrett_into_a::<ONCE>(n_inv, a);
         }
 
-        r.a_apply_automorphism_into_b::<true>(tmp_b, gal_el, nth_root, &mut self.buf_auto);
-        r.a_sub_b_into_a::<1, ONCE>(&self.buf_auto, tmp_b);
+        if let Some(b) = b {
+            // tmp_a = b * X^t
+            r.a_mul_b_montgomery_into_c::<ONCE>(b, &x_pow_2[log_n - i - 1], tmp_a);
 
-        tmp_b
+            if i == 0 {
+                r.a_mul_b_scalar_barrett_into_a::<ONCE>(&n_inv, tmp_a);
+            }
+
+            // tmp_b = a - b*X^t
+            r.a_sub_b_into_c::<1, ONCE>(a, tmp_a, tmp_b);
+
+            // a = a + b * X^t
+            r.a_add_b_into_b::<ONCE>(tmp_a, a);
+
+            // a = a + b * X^t + phi(a - b * X^t)
+            r.a_apply_automorphism_add_b_into_b::<ONCE, NTT>(tmp_b, gal_el, nth_root, a);
+        } else {
+            // tmp_a = phi(a)
+            r.a_apply_automorphism_into_b::<NTT>(a, gal_el, nth_root, tmp_a);
+            // a = a + phi(a)
+            r.a_add_b_into_b::<ONCE>(tmp_a, a);
+        }
+    } else {
+        if let Some(b) = b {
+            // tmp_b = b * X^t
+            r.a_mul_b_montgomery_into_c::<ONCE>(b, &x_pow_2[log_n - i - 1], tmp_b);
+
+            if i == 0 {
+                r.a_mul_b_scalar_barrett_into_a::<ONCE>(&n_inv, tmp_b);
+            }
+
+            // tmp_a = phi(b * X^t)
+            r.a_apply_automorphism_into_b::<NTT>(tmp_b, gal_el, nth_root, tmp_a);
+
+            // a = (b* X^t - phi(b* X^t))
+            r.a_sub_b_into_c::<1, ONCE>(tmp_b, tmp_a, a);
+            acc.value = true
+        }
     }
-}
\ No newline at end of file
+}
diff --git a/math/src/ring/impl_u64/trace.rs b/math/src/ring/impl_u64/trace.rs
index 78e0a56..4740dfc 100644
--- a/math/src/ring/impl_u64/trace.rs
+++ b/math/src/ring/impl_u64/trace.rs
@@ -1,24 +1,31 @@
-use crate::ring::Ring;
-use crate::poly::Poly;
 use crate::modulus::barrett::Barrett;
 use crate::modulus::ONCE;
+use crate::poly::Poly;
+use crate::ring::Ring;
 
-impl Ring<u64>{
-    pub fn trace_inplace<const NTT:bool>(&self, step_start: usize, a: &mut Poly<u64>){
-        assert!(step_start <= self.log_n(), "invalid argument step_start: step_start={} > self.log_n()={}", step_start, self.log_n());
+impl Ring<u64> {
+    pub fn trace_inplace<const NTT: bool>(&self, step_start: usize, a: &mut Poly<u64>) {
+        assert!(
+            step_start <= self.log_n(),
+            "invalid argument step_start: step_start={} > self.log_n()={}",
+            step_start,
+            self.log_n()
+        );
 
         let log_steps: usize = self.log_n() - step_start;
-        let log_nth_root = self.log_n()+1;
-        let nth_root: usize= 1<<log_nth_root;
+        let log_nth_root = self.log_n() + 1;
+        let nth_root: usize = 1 << log_nth_root;
 
         if log_steps > 0 {
-            let n_inv: Barrett<u64> = self.modulus.barrett.prepare(self.modulus.inv(1<<log_steps));
+            let n_inv: Barrett<u64> = self
+                .modulus
+                .barrett
+                .prepare(self.modulus.inv(1 << log_steps));
             self.a_mul_b_scalar_barrett_into_a::<ONCE>(&n_inv, a);
 
             let mut tmp: Poly<u64> = self.new_poly();
 
-            (step_start..self.log_n()).for_each(|i|{
-
+            (step_start..self.log_n()).for_each(|i| {
                 let gal_el: usize = self.galois_element((1 << i) >> 1, i == 0, log_nth_root);
 
                 self.a_apply_automorphism_into_b::<NTT>(a, gal_el, nth_root, &mut tmp);
@@ -26,4 +33,4 @@ impl Ring<u64>{
             });
         }
     }
-}
\ No newline at end of file
+}
diff --git a/math/src/ring/impl_u64/utils.rs b/math/src/ring/impl_u64/utils.rs
index 9a5bde2..62f207e 100644
--- a/math/src/ring/impl_u64/utils.rs
+++ b/math/src/ring/impl_u64/utils.rs
@@ -1,8 +1,8 @@
-use crate::ring::Ring;
-use crate::poly::Poly;
 use crate::modulus::ONCE;
+use crate::poly::Poly;
+use crate::ring::Ring;
 
-impl Ring<u64>{
+impl Ring<u64> {
     // Generates a vector storing {X^{2^0}, X^{2^1}, .., X^{2^log_n}}.
     pub fn gen_x_pow_2<const NTT: bool, const INV: bool>(&self, log_n: usize) -> Vec<Poly<u64>> {
         let mut x_pow: Vec<Poly<u64>> = Vec::<Poly<u64>>::with_capacity(log_n);
@@ -35,4 +35,4 @@ impl Ring<u64>{
 
         x_pow
     }
-}
\ No newline at end of file
+}
diff --git a/math/tests/automorphism.rs b/math/tests/automorphism.rs
index dee2aa6..b39abbe 100644
--- a/math/tests/automorphism.rs
+++ b/math/tests/automorphism.rs
@@ -1,5 +1,7 @@
 use itertools::izip;
+use math::modulus::WordOps;
 use math::poly::Poly;
+use math::ring::impl_u64::packing::StreamRepacker;
 use math::ring::Ring;
 
 #[test]
@@ -135,7 +137,62 @@ fn test_packing_sparse_u64<const NTT: bool>(ring: &Ring<u64>) {
     }
 }
 
+#[test]
+fn packing_streaming_u64() {
+    let n: usize = 1 << 5;
+    let q_base: u64 = 65537u64;
+    let q_power: usize = 1usize;
+    let ring: Ring<u64> = Ring::new(n, q_base, q_power);
 
+    sub_test("test_packing_streaming_dense_u64::<NTT:true>", || {
+        test_packing_streaming_dense_u64::<true>(&ring)
+    });
+}
+
+fn test_packing_streaming_dense_u64<const NTT: bool>(ring: &Ring<u64>) {
+    let n: usize = ring.n();
+
+    let mut values: Vec<u64> = vec![0; n];
+    values
+        .iter_mut()
+        .enumerate()
+        .for_each(|(i, x)| *x = (i + 1) as u64);
+
+    let gap: usize = 3;
+
+    let mut packer = StreamRepacker::new(ring);
+
+    let mut poly: Poly<u64> = ring.new_poly();
+    for i in 0..n {
+        let i_rev: usize = i.reverse_bits_msb(ring.log_n() as u32);
+
+        if i_rev % gap == 0 {
+            poly.fill(&values[i_rev]);
+            if NTT {
+                ring.ntt_inplace::<false>(&mut poly);
+            }
+            packer.add::<NTT>(ring, Some(&poly))
+        } else {
+            packer.add::<NTT>(ring, None)
+        }
+    }
+
+    packer.flush::<NTT>(ring);
+
+    let result: &mut Poly<u64> = &mut packer.results[0];
+
+    if NTT {
+        ring.intt_inplace::<false>(result);
+    }
+
+    result.0.iter().enumerate().for_each(|(i, x)| {
+        if i % gap == 0 {
+            assert_eq!(*x, values[i] as u64)
+        } else {
+            assert_eq!(*x, 0u64)
+        }
+    });
+}
 
 #[test]
 fn trace_u64() {
@@ -144,24 +201,21 @@ fn trace_u64() {
     let q_power: usize = 1usize;
     let ring: Ring<u64> = Ring::new(n, q_base, q_power);
 
-    sub_test("test_trace::<NTT:false>", || {
-        test_trace_u64::<false>(&ring)
-    });
-    sub_test("test_trace::<NTT:true>", || {
-        test_trace_u64::<true>(&ring)
-    });
+    sub_test("test_trace::<NTT:false>", || test_trace_u64::<false>(&ring));
+    sub_test("test_trace::<NTT:true>", || test_trace_u64::<true>(&ring));
 }
 
 fn test_trace_u64<const NTT: bool>(ring: &Ring<u64>) {
     let n: usize = ring.n();
 
     let mut poly: Poly<u64> = ring.new_poly();
-    
-    poly.0.iter_mut().enumerate().for_each(|(i, x)|{
-        *x = (i+1) as u64
-    });
 
-    if NTT{
+    poly.0
+        .iter_mut()
+        .enumerate()
+        .for_each(|(i, x)| *x = (i + 1) as u64);
+
+    if NTT {
         ring.ntt_inplace::<false>(&mut poly);
     }
 
@@ -169,11 +223,11 @@ fn test_trace_u64<const NTT: bool>(ring: &Ring<u64>) {
 
     ring.trace_inplace::<NTT>(step_start, &mut poly);
 
-    if NTT{
+    if NTT {
         ring.intt_inplace::<false>(&mut poly);
     }
 
-    let gap: usize = 1<<(ring.log_n() - step_start);
+    let gap: usize = 1 << (ring.log_n() - step_start);
 
     poly.0.iter().enumerate().for_each(|(i, x)| {
         if i % gap == 0 {
@@ -182,4 +236,4 @@ fn test_trace_u64<const NTT: bool>(ring: &Ring<u64>) {
             assert_eq!(*x, 0u64)
         }
     });
-}
\ No newline at end of file
+}