You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

342 lines
7.7 KiB

<%
//////////////////////
// montgomeryTemplate
//////////////////////
// This function creates functions with the montgomery transformation
// applied
// the round hook allows to add diferent code in the iteration
//
// All the montgomery functions modifies:
// r8, r9, 10, r11, rax, rcx
//////////////////////
function montgomeryTemplate(fnName, round) {
let r0, r1, r2;
function setR(step) {
if ((step % 3) == 0) {
r0 = "r8";
r1 = "r9";
r2 = "r10";
} else if ((step % 3) == 1) {
r0 = "r9";
r1 = "r10";
r2 = "r8";
} else {
r0 = "r10";
r1 = "r8";
r2 = "r9";
}
}
const base = bigInt.one.shiftLeft(64);
const np64 = base.minus(q.modInv(base));
%>
<%=fnName%>:
sub rsp, <%= n64*8 %> ; Reserve space for ms
mov rcx, rdx ; rdx is needed for multiplications so keep it in cx
mov r11, 0x<%= np64.toString(16) %> ; np
xor r8,r8
xor r9,r9
xor r10,r10
<%
// Main loop
for (let i=0; i<n64*2; i++) {
setR(i);
round(i, r0, r1, r2);
%>
<%
for (let j=i-1; j>=0; j--) { // All ms
if (((i-j)<n64)&&(j<n64)) {
%>
mov rax, [rsp + <%= j*8 %>]
mul qword [q + <%= (i-j)*8 %>]
add <%= r0 %>, rax
adc <%= r1 %>, rdx
adc <%= r2 %>, 0x0
<%
}
} // ms
%>
<%
if (i<n64) {
%>
mov rax, <%= r0 %>
mul r11
mov [rsp + <%= i*8 %>], rax
mul qword [q]
add <%= r0 %>, rax
adc <%= r1 %>, rdx
adc <%= r2 %>, 0x0
<%
} else {
%>
mov [rdi + <%= (i-n64)*8 %> ], <%= r0 %>
xor <%= r0 %>,<%= r0 %>
<%
}
%>
<%
} // Main Loop
%>
test <%= r1 %>, <%= r1 %>
jnz <%=fnName%>_mulM_sq
; Compare with q
<%
for (let i=0; i<n64; i++) {
%>
mov rax, [rdi + <%= (n64-i-1)*8 %>]
cmp rax, [q + <%= (n64-i-1)*8 %>]
jc <%=fnName%>_mulM_done ; q is bigget so done.
jnz <%=fnName%>_mulM_sq ; q is lower
<%
}
%>
; If equal substract q
<%=fnName%>_mulM_sq:
<%
for (let i=0; i<n64; i++) {
%>
mov rax, [q + <%= i*8 %>]
<%= i==0 ? "sub" : "sbb" %> [rdi + <%= i*8 %>], rax
<%
}
%>
<%=fnName%>_mulM_done:
mov rdx, rcx ; recover rdx to its original place.
add rsp, <%= n64*8 %> ; recover rsp
ret
<%
} // Template
%>
;;;;;;;;;;;;;;;;;;;;;;
; rawMontgomeryMul
;;;;;;;;;;;;;;;;;;;;;;
; Multiply two elements in montgomery form
; Params:
; rsi <= Pointer to the long data of element 1
; rdx <= Pointer to the long data of element 2
; rdi <= Pointer to the long data of result
; Modified registers:
; r8, r9, 10, r11, rax, rcx
;;;;;;;;;;;;;;;;;;;;;;
<%
montgomeryTemplate("rawMontgomeryMul", function(i, r0, r1, r2) {
// Same Digit
for (let o1=Math.max(0, i-n64+1); (o1<=i)&&(o1<n64); o1++) {
const o2= i-o1;
%>
mov rax, [rsi + <%= 8*o1 %>]
mul qword [rcx + <%= 8*o2 %>]
add <%= r0 %>, rax
adc <%= r1 %>, rdx
adc <%= r2 %>, 0x0
<%
} // Same digit
})
%>
;;;;;;;;;;;;;;;;;;;;;;
; rawMontgomerySquare
;;;;;;;;;;;;;;;;;;;;;;
; Square an element
; Params:
; rsi <= Pointer to the long data of element 1
; rdi <= Pointer to the long data of result
; Modified registers:
; r8, r9, 10, r11, rax, rcx
;;;;;;;;;;;;;;;;;;;;;;
<%
montgomeryTemplate("rawMontgomerySquare", function(i, r0, r1, r2) {
// Same Digit
for (let o1=Math.max(0, i-n64+1); (o1<((i+1)>>1) )&&(o1<n64); o1++) {
const o2= i-o1;
%>
mov rax, [rsi + <%= 8*o1 %>]
mul qword [rsi + <%= 8*o2 %>]
add <%= r0 %>, rax
adc <%= r1 %>, rdx
adc <%= r2 %>, 0x0
add <%= r0 %>, rax
adc <%= r1 %>, rdx
adc <%= r2 %>, 0x0
<%
} // Same digit
%>
<% if (i%2 == 0) { %>
mov rax, [rsi + <%= 8*(i/2) %>]
mul rax
add <%= r0 %>, rax
adc <%= r1 %>, rdx
adc <%= r2 %>, 0x0
<% } %>
<%
})
%>
;;;;;;;;;;;;;;;;;;;;;;
; rawMontgomeryMul1
;;;;;;;;;;;;;;;;;;;;;;
; Multiply two elements in montgomery form
; Params:
; rsi <= Pointer to the long data of element 1
; rdx <= second operand
; rdi <= Pointer to the long data of result
; Modified registers:
; r8, r9, 10, r11, rax, rcx
;;;;;;;;;;;;;;;;;;;;;;
<%
montgomeryTemplate("rawMontgomeryMul1", function(i, r0, r1, r2) {
// Same Digit
if (i<n64) {
%>
mov rax, [rsi + <%= 8*i %>]
mul rcx
add <%= r0 %>, rax
adc <%= r1 %>, rdx
adc <%= r2 %>, 0x0
<%
} // Same digit
})
%>
;;;;;;;;;;;;;;;;;;;;;;
; rawFromMontgomery
;;;;;;;;;;;;;;;;;;;;;;
; Multiply two elements in montgomery form
; Params:
; rsi <= Pointer to the long data of element 1
; rdi <= Pointer to the long data of result
; Modified registers:
; r8, r9, 10, r11, rax, rcx
;;;;;;;;;;;;;;;;;;;;;;
<%
montgomeryTemplate("rawFromMontgomery", function(i, r0, r1, r2) {
// Same Digit
if (i<n64) {
%>
add <%= r0 %>, [rdi + <%= 8*i %>]
adc <%= r1 %>, 0x0
adc <%= r2 %>, 0x0
<%
} // Same digit
})
%>
;;;;;;;;;;;;;;;;;;;;;;
; toMontgomery
;;;;;;;;;;;;;;;;;;;;;;
; Convert a number to Montgomery
; rdi <= Pointer element to convert
; Modified registers:
; r8, r9, 10, r11, rax, rcx
;;;;;;;;;;;;;;;;;;;;
<%=name%>_toMontgomery:
mov rax, [rdi]
bts rax, 62 ; check if montgomery
jc toMontgomery_doNothing
bts rax, 63
jc toMontgomeryLong
toMontgomeryShort:
mov [rdi], rax
add rdi, 8
push rsi
lea rsi, [R2]
movsx rdx, eax
cmp rdx, 0
js negMontgomeryShort
posMontgomeryShort:
call rawMontgomeryMul1
pop rsi
sub rdi, 8
ret
negMontgomeryShort:
neg rdx ; Do the multiplication positive and then negate the result.
call rawMontgomeryMul1
mov rsi, rdi
call rawNegL
pop rsi
sub rdi, 8
ret
toMontgomeryLong:
mov [rdi], rax
add rdi, 8
push rsi
mov rdx, rdi
lea rsi, [R2]
call rawMontgomeryMul
pop rsi
sub rdi, 8
toMontgomery_doNothing:
ret
;;;;;;;;;;;;;;;;;;;;;;
; toNormal
;;;;;;;;;;;;;;;;;;;;;;
; Convert a number from Montgomery
; rdi <= Pointer element to convert
; Modified registers:
; r8, r9, 10, r11, rax, rcx
;;;;;;;;;;;;;;;;;;;;
<%=name%>_toNormal:
mov rax, [rdi]
btc rax, 62 ; check if montgomery
jnc toNormal_doNothing
bt rax, 63 ; if short, it means it's converted
jnc toNormal_doNothing
toNormalLong:
mov [rdi], rax
add rdi, 8
call rawFromMontgomery
sub rdi, 8
toNormal_doNothing:
ret
;;;;;;;;;;;;;;;;;;;;;;
; toLongNormal
;;;;;;;;;;;;;;;;;;;;;;
; Convert a number to long normal
; rdi <= Pointer element to convert
; Modified registers:
; r8, r9, 10, r11, rax, rcx
;;;;;;;;;;;;;;;;;;;;
<%=name%>_toLongNormal:
mov rax, [rdi]
bt rax, 62 ; check if montgomery
jc toLongNormal_fromMontgomery
bt rax, 63 ; check if long
jnc toLongNormal_fromShort
ret ; It is already long
toLongNormal_fromMontgomery:
add rdi, 8
call rawFromMontgomery
sub rdi, 8
ret
toLongNormal_fromShort:
mov r8, rsi ; save rsi
movsx rsi, eax
call rawCopyS2L
mov rsi, r8 ; recover rsi
ret