diff --git a/Cargo.toml b/Cargo.toml
index 9bfc91a..fdc7dd2 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -13,4 +13,8 @@ criterion = "0.5.1"
 
 [[bench]]
 name = "ntt"
+harness = false
+
+[[bench]]
+name = "operations"
 harness = false
\ No newline at end of file
diff --git a/benches/operations.rs b/benches/operations.rs
new file mode 100644
index 0000000..8b20715
--- /dev/null
+++ b/benches/operations.rs
@@ -0,0 +1,40 @@
+use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
+use math::ring::Ring;
+use math::modulus::Operations;
+
+fn add_vec_unary(c: &mut Criterion) {
+    fn runner(r: Ring<u64>) -> Box<dyn FnMut()> {
+        
+        let mut p0: math::poly::Poly<u64> = r.new_poly();
+        let mut p1: math::poly::Poly<u64> = r.new_poly();
+        for i in 0..p0.n(){
+            p0.0[i] = i as u64;
+            p1.0[i] = i as u64;
+        }
+        println!("{}", r.n());
+        Box::new(move || {
+            r.modulus.add_vec_unary_assign::<8>(&p0.0, &mut p1.0);
+        })
+    }
+
+    let mut b: criterion::BenchmarkGroup<'_, criterion::measurement::WallTime> = c.benchmark_group("add_vec_unary");
+    for log_n in 11..17 {
+
+        let n: usize = 1<<log_n as usize;
+        let q_base: u64 = 0x1fffffffffe00001u64;
+        let q_power: usize = 1usize;
+        let r: Ring<u64> = Ring::<u64>::new(n, q_base, q_power);
+        let runners = [
+            ("prime", {
+                runner(r)
+            }),
+        ];
+        for (name, mut runner) in runners {
+            let id = BenchmarkId::new(name, n);
+            b.bench_with_input(id, &(), |b, _| b.iter(&mut runner));
+        }
+    }
+}
+
+criterion_group!(benches, add_vec_unary);
+criterion_main!(benches);
diff --git a/src/lib.rs b/src/lib.rs
index 89beb36..1306749 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -4,4 +4,115 @@
 pub mod modulus;
 pub mod dft;
 pub mod ring;
-pub mod poly;
\ No newline at end of file
+pub mod poly;
+
+pub mod macros{
+    
+    #[macro_export]
+    macro_rules! apply_unary {
+
+        ($self:expr, $f:expr, $a:expr, $CHUNK:expr) => {
+
+            match CHUNK{
+                8 => {
+                    
+                    $a.chunks_exact_mut(8).for_each(|a| {
+                        $f(&$self, &mut a[0]);
+                        $f(&$self, &mut a[1]);
+                        $f(&$self, &mut a[2]);
+                        $f(&$self, &mut a[3]);
+                        $f(&$self, &mut a[4]);
+                        $f(&$self, &mut a[5]);
+                        $f(&$self, &mut a[6]);
+                        $f(&$self, &mut a[7]);
+                    });
+
+                    let n: usize = $a.len();
+                    let m = n - (n&(CHUNK-1));
+                    $a[m..].iter_mut().for_each(|a| {
+                        $f(&$self, a);
+                    });
+                },
+                _=>{
+                    $a.iter_mut().for_each(|a| {
+                        $f(&$self, a);
+                    });
+                }
+            }
+        };
+    }
+
+    #[macro_export]
+    macro_rules! apply_binary {
+
+        ($self:expr, $f:expr, $a:expr, $b:expr, $CHUNK:expr) => {
+
+            let n: usize = $a.len();
+            debug_assert!($b.len() == n, "invalid argument b: b.len() = {} != a.len() = {}", $b.len(), n);
+            debug_assert!(CHUNK&(CHUNK-1) == 0, "invalid CHUNK const: not a power of two");
+
+            match CHUNK{
+                8 => {
+                    
+                    izip!($a.chunks_exact(8), $b.chunks_exact_mut(8)).for_each(|(a, b)| {
+                        $f(&$self, &a[0], &mut b[0]);
+                        $f(&$self, &a[1], &mut b[1]);
+                        $f(&$self, &a[2], &mut b[2]);
+                        $f(&$self, &a[3], &mut b[3]);
+                        $f(&$self, &a[4], &mut b[4]);
+                        $f(&$self, &a[5], &mut b[5]);
+                        $f(&$self, &a[6], &mut b[6]);
+                        $f(&$self, &a[7], &mut b[7]);
+                    });
+
+                    let m = n - (n&(CHUNK-1));
+                    izip!($a[m..].iter(), $b[m..].iter_mut()).for_each(|(a, b)| {
+                        $f(&$self, a, b);
+                    });
+                },
+                _=>{
+                    izip!($a.iter(), $b.iter_mut()).for_each(|(a, b)| {
+                        $f(&$self, a, b);
+                    });
+                }
+            }
+        };
+    }
+
+    #[macro_export]
+    macro_rules! apply_ternary {
+
+        ($self:expr, $f:expr, $a:expr, $b:expr, $c:expr, $CHUNK:expr) => {
+
+            let n: usize = $a.len();
+            debug_assert!($b.len() == n, "invalid argument b: b.len() = {} != a.len() = {}", $b.len(), n);
+            debug_assert!(CHUNK&(CHUNK-1) == 0, "invalid CHUNK const: not a power of two");
+
+            match CHUNK{
+                8 => {
+                    
+                    izip!($a.chunks_exact(8), $b.chunks_exact(8), $c.chunks_exact_mut(8)).for_each(|(a, b, c)| {
+                        $f(&$self, &a[0], &b[0], &mut c[0]);
+                        $f(&$self, &a[1], &b[1], &mut c[1]);
+                        $f(&$self, &a[2], &b[2], &mut c[2]);
+                        $f(&$self, &a[3], &b[3], &mut c[3]);
+                        $f(&$self, &a[4], &b[4], &mut c[4]);
+                        $f(&$self, &a[5], &b[5], &mut c[5]);
+                        $f(&$self, &a[6], &b[6], &mut c[6]);
+                        $f(&$self, &a[7], &b[7], &mut c[7]);
+                    });
+
+                    let m = n - (n&7);
+                    izip!($a[m..].iter(), $b[m..].iter(), $c[m..].iter_mut()).for_each(|(a, b, c)| {
+                        $f(&$self, a, b, c);
+                    });
+                },
+                _=>{
+                    izip!($a.iter(), $b.iter(), $c.iter_mut()).for_each(|(a, b, c)| {
+                        $f(&$self, a, b, c);
+                    });
+                }
+            }
+        };
+    }
+}
\ No newline at end of file
diff --git a/src/modulus.rs b/src/modulus.rs
index 2033493..3fc01c0 100644
--- a/src/modulus.rs
+++ b/src/modulus.rs
@@ -4,6 +4,8 @@ pub mod montgomery;
 pub mod shoup;
 pub mod impl_u64;
 
+
+
 pub trait WordOps<O>{
     fn log2(self) -> O;
     fn reverse_bits_msb(self, n:u32) -> O;
@@ -71,3 +73,38 @@ impl ReduceOnce<u64> for u64{
         (*self).min(self.wrapping_sub(q))
     }
 }
+
+
+pub trait Operations<O>{
+    // Assigns a + b to c.
+    fn add_binary_assign(&self, a: &O, b:&O, c: &mut O);
+
+    // Assigns a + b to b.
+    fn add_unary_assign(&self, a: &O, b: &mut O);
+
+    // Assigns a[i] + b[i] to c[i]
+    fn add_vec_binary_assign<const CHUNK:usize>(&self, a: &[O], b:&[O], c: &mut [O]);
+
+    // Assigns a[i] + b[i] to b[i]
+    fn add_vec_unary_assign<const CHUNK:usize>(&self, a: &[O], b: &mut [O]);
+
+    // Assigns a - b to c.
+    fn sub_binary_assign(&self, a: &O, b:&O, c: &mut O);
+
+    // Assigns b - a to b.
+    fn sub_unary_assign(&self, a: &O, b: &mut O);
+
+    // Assigns a[i] - b[i] to c[i]
+    fn sub_vec_binary_assign<const CHUNK:usize>(&self, a: &[O], b:&[O], c: &mut [O]);
+
+    // Assigns a[i] - b[i] to b[i]
+    fn sub_vec_unary_assign<const CHUNK:usize>(&self, a: &[O], b: &mut [O]);
+
+    // Assigns -a to a.
+    fn neg_assign(&self, a:&mut O);
+
+    // Assigns -a[i] to a[i].
+    fn neg_vec_assign<const CHUNK:usize>(&self, a: &mut [O]);
+}
+
+
diff --git a/src/modulus/impl_u64/mod.rs b/src/modulus/impl_u64/mod.rs
index 61322a4..4394599 100644
--- a/src/modulus/impl_u64/mod.rs
+++ b/src/modulus/impl_u64/mod.rs
@@ -1,4 +1,5 @@
 pub mod prime;
 pub mod barrett;
 pub mod montgomery;
-pub mod shoup;
\ No newline at end of file
+pub mod shoup;
+pub mod operations;
\ No newline at end of file
diff --git a/src/modulus/impl_u64/operations.rs b/src/modulus/impl_u64/operations.rs
new file mode 100644
index 0000000..05215cd
--- /dev/null
+++ b/src/modulus/impl_u64/operations.rs
@@ -0,0 +1,60 @@
+
+use crate::modulus::Operations;
+use crate::modulus::prime::Prime;
+use crate::modulus::ReduceOnce;
+use crate::{apply_unary, apply_binary, apply_ternary};
+use itertools::izip;
+
+impl Operations<u64> for Prime<u64>{
+
+    #[inline(always)]
+    fn add_binary_assign(&self, a: &u64, b: &u64, c: &mut u64){
+        *c = a.wrapping_add(*b).reduce_once(self.q);
+    }
+
+    #[inline(always)]
+    fn add_unary_assign(&self, a: &u64, b: &mut u64){
+        *b = a.wrapping_add(*b).reduce_once(self.q);
+    }
+
+    #[inline(always)]
+    fn add_vec_binary_assign<const CHUNK:usize>(&self, a: &[u64], b:&[u64], c:&mut [u64]){
+        apply_ternary!(self, Self::add_binary_assign, a, b, c, CHUNK);
+    }
+
+    #[inline(always)]
+    fn add_vec_unary_assign<const CHUNK:usize>(&self, a: &[u64], b:&mut [u64]){
+        apply_binary!(self, Self::add_unary_assign, a, b, CHUNK);
+    }
+
+    #[inline(always)]
+    fn sub_binary_assign(&self, a: &u64, b: &u64, c: &mut u64){
+        *c = a.wrapping_add(self.q.wrapping_sub(*b)).reduce_once(self.q);
+    }
+
+    #[inline(always)]
+    fn sub_unary_assign(&self, a: &u64, b: &mut u64){
+        *b = a.wrapping_add(self.q.wrapping_sub(*b)).reduce_once(self.q);
+    }
+
+    #[inline(always)]
+    fn sub_vec_binary_assign<const CHUNK:usize>(&self, a: &[u64], b:&[u64], c:&mut [u64]){
+        apply_ternary!(self, Self::sub_binary_assign, a, b, c, CHUNK);
+    }
+
+    #[inline(always)]
+    fn sub_vec_unary_assign<const CHUNK:usize>(&self, a: &[u64], b:&mut [u64]){
+        apply_binary!(self, Self::sub_unary_assign, a, b, CHUNK);
+    }
+
+    #[inline(always)]
+    fn neg_assign(&self, a: &mut u64){
+        *a = self.q.wrapping_sub(*a);
+    }
+
+    #[inline(always)]
+    fn neg_vec_assign<const CHUNK:usize>(&self, a: &mut [u64]){
+        apply_unary!(self, Self::neg_assign, a, CHUNK);
+    }
+}
+
diff --git a/src/poly/poly.rs b/src/poly/poly.rs
index e69de29..8b13789 100644
--- a/src/poly/poly.rs
+++ b/src/poly/poly.rs
@@ -0,0 +1 @@
+
diff --git a/src/ring/impl_u64/automorphism.rs b/src/ring/impl_u64/automorphism.rs
index c78b2ce..739cd35 100644
--- a/src/ring/impl_u64/automorphism.rs
+++ b/src/ring/impl_u64/automorphism.rs
@@ -33,11 +33,12 @@ impl Ring<u64>{
         let b_vec: &mut _ = &mut b.0;
         let a_vec: &_ = &a.0;
 
-        for i in 0..n{
+        a_vec.iter().enumerate().for_each(|(i, ai)|{
             let gal_el_i: u64 = i as u64 * gal_el;
-            let i_out: u64 = gal_el_i & mask;
             let sign: u64 = (gal_el_i>>log_n) & 1;
-            b_vec[i_out as usize] = a_vec[i] * (sign^1) | (q - a_vec[i]) * sign
-        }
+            let i_out: u64 = gal_el_i & mask;
+            b_vec[i_out as usize] = ai * (sign^1) | (q - ai) * sign
+        });
+
     }
 }
\ No newline at end of file