<% function mulS1S2() { %> xor rax, rax mov eax, r8d imul r9d jo mul_manageOverflow ; rsi already is the 64bits result mov [rdi], rax ; not necessary to adjust so just save and return mul_manageOverflow: ; Do the operation in 64 bits push rsi movsx rax, r8d movsx rcx, r9d imul rcx mov rsi, rax call rawCopyS2L pop rsi <% } %> <% function mulL1S2(t) { %> push rsi add rsi, 8 movsx rdx, r9d add rdi, 8 cmp rdx, 0 <% const rawPositiveLabel = global.tmpLabel() %> jns <%= rawPositiveLabel %> neg rdx call rawMontgomeryMul1 mov rsi, rdi call rawNegL sub rdi, 8 pop rsi <% const done = global.tmpLabel() %> jmp <%= done %> <%= rawPositiveLabel %>: call rawMontgomeryMul1 sub rdi, 8 pop rsi <%= done %>: <% } %> <% function mulS1L2() { %> push rsi lea rsi, [rdx + 8] movsx rdx, r8d add rdi, 8 cmp rdx, 0 <% const rawPositiveLabel = global.tmpLabel() %> jns <%= rawPositiveLabel %> neg rdx call rawMontgomeryMul1 mov rsi, rdi call rawNegL sub rdi, 8 pop rsi <% const done = global.tmpLabel() %> jmp <%= done %> <%= rawPositiveLabel %>: call rawMontgomeryMul1 sub rdi, 8 pop rsi <%= done %>: <% } %> <% function mulL1L2() { %> add rdi, 8 add rsi, 8 add rdx, 8 call rawMontgomeryMul sub rdi, 8 sub rsi, 8 <% } %> <% function mulR3() { %> push rsi add rdi, 8 mov rsi, rdi lea rdx, [R3] call rawMontgomeryMul sub rdi, 8 pop rsi <% } %> ;;;;;;;;;;;;;;;;;;;;;; ; mul ;;;;;;;;;;;;;;;;;;;;;; ; Multiplies two elements of any kind ; Params: ; rsi <= Pointer to element 1 ; rdx <= Pointer to element 2 ; rdi <= Pointer to result ; [rdi] = [rsi] * [rdi] ; Modified Registers: ; r8, r9, 10, r11, rax, rcx ;;;;;;;;;;;;;;;;;;;;;; <%=name%>_mul: mov r8, [rsi] mov r9, [rdx] bt r8, 63 ; Check if is short first operand jc mul_l1 bt r9, 63 ; Check if is short second operand jc mul_s1l2 mul_s1s2: ; Both operands are short <%= mulS1S2() %> ret mul_l1: bt r9, 63 ; Check if is short second operand jc mul_l1l2 ;;;;;;;; mul_l1s2: bt r8, 62 ; check if montgomery first jc mul_l1ms2 mul_l1ns2: bt r9, 62 ; check if montgomery first jc mul_l1ns2m mul_l1ns2n: <%= global.setTypeDest("0xC0"); %> <%= mulL1S2() %> <%= mulR3() %> ret mul_l1ns2m: <%= global.setTypeDest("0x80"); %> <%= mulL1L2() %> ret mul_l1ms2: bt r9, 62 ; check if montgomery second jc mul_l1ms2m mul_l1ms2n: <%= global.setTypeDest("0x80"); %> <%= mulL1S2() %> ret mul_l1ms2m: <%= global.setTypeDest("0xC0"); %> <%= mulL1L2() %> ret ;;;;;;;; mul_s1l2: bt r8, 62 ; check if montgomery first jc mul_s1ml2 mul_s1nl2: bt r9, 62 ; check if montgomery first jc mul_s1nl2m mul_s1nl2n: <%= global.setTypeDest("0xC0"); %> <%= mulS1L2() %> <%= mulR3() %> ret mul_s1nl2m: <%= global.setTypeDest("0x80"); %> <%= mulS1L2(); %> ret mul_s1ml2: bt r9, 62 ; check if montgomery first jc mul_s1ml2m mul_s1ml2n: <%= global.setTypeDest("0x80"); %> <%= mulL1L2() %> ret mul_s1ml2m: <%= global.setTypeDest("0xC0"); %> <%= mulL1L2() %> ret ;;;; mul_l1l2: bt r8, 62 ; check if montgomery first jc mul_l1ml2 mul_l1nl2: bt r9, 62 ; check if montgomery second jc mul_l1nl2m mul_l1nl2n: <%= global.setTypeDest("0xC0"); %> <%= mulL1L2() %> <%= mulR3() %> ret mul_l1nl2m: <%= global.setTypeDest("0x80"); %> <%= mulL1L2() %> ret mul_l1ml2: bt r9, 62 ; check if montgomery seconf jc mul_l1ml2m mul_l1ml2n: <%= global.setTypeDest("0x80"); %> <%= mulL1L2() %> ret mul_l1ml2m: <%= global.setTypeDest("0xC0"); %> <%= mulL1L2() %> ret