Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
pqcrypto
faster-csidh
Commits
c00c37e0
Commit
c00c37e0
authored
Aug 23, 2018
by
Michael Meyer
Browse files
c-implementation
parents
Changes
14
Hide whitespace changes
Inline
Side-by-side
Makefile
0 → 100644
View file @
c00c37e0
all
:
@
gcc
\
-Wall
-Wextra
\
-O0
-funroll-loops
\
rng.c
\
u512.s fp.s
\
mont.c
\
csidh.c
\
main.c
\
-o
main
debug
:
gcc
\
-Wall
-Wextra
\
-g
\
rng.c
\
u512.s fp.s
\
mont.c
\
csidh.c
\
main.c
\
-o
main
clean
:
rm
-f
main
bench.c
0 → 100644
View file @
c00c37e0
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <time.h>
#include <assert.h>
#include "u512.h"
#include "fp.h"
#include "mont.h"
#include "csidh.h"
#include <inttypes.h>
static
__inline__
uint64_t
rdtsc
(
void
)
{
uint32_t
hi
,
lo
;
__asm__
__volatile__
(
"rdtsc"
:
"=a"
(
lo
),
"=d"
(
hi
));
return
lo
|
(
uint64_t
)
hi
<<
32
;
}
unsigned
long
its
=
10000
;
int
main
()
{
clock_t
t0
,
t1
,
time
=
0
;
uint64_t
c0
,
c1
,
cycles
=
0
;
private_key
priv
;
public_key
pub
=
base
;
for
(
unsigned
long
i
=
0
;
i
<
its
;
++
i
)
{
csidh_private
(
&
priv
);
t0
=
clock
();
c0
=
rdtsc
();
/**************************************/
assert
(
validate
(
&
pub
));
action
(
&
pub
,
&
pub
,
&
priv
);
/**************************************/
c1
=
rdtsc
();
t1
=
clock
();
cycles
+=
c1
-
c0
;
time
+=
t1
-
t0
;
}
printf
(
"iterations: %lu
\n
"
,
its
);
printf
(
"clock cycles: %"
PRIu64
"
\n
"
,
(
uint64_t
)
cycles
/
its
);
printf
(
"wall-clock time: %.3lf ms
\n
"
,
1000
.
*
time
/
CLOCKS_PER_SEC
/
its
);
}
csidh.c
0 → 100644
View file @
c00c37e0
#include <string.h>
#include <assert.h>
#include "csidh.h"
#include "rng.h"
/* specific to p, should perhaps be somewhere else */
const
unsigned
primes
[
num_primes
]
=
{
3
,
5
,
7
,
11
,
13
,
17
,
19
,
23
,
29
,
31
,
37
,
41
,
43
,
47
,
53
,
59
,
61
,
67
,
71
,
73
,
79
,
83
,
89
,
97
,
101
,
103
,
107
,
109
,
113
,
127
,
131
,
137
,
139
,
149
,
151
,
157
,
163
,
167
,
173
,
179
,
181
,
191
,
193
,
197
,
199
,
211
,
223
,
227
,
229
,
233
,
239
,
241
,
251
,
257
,
263
,
269
,
271
,
277
,
281
,
283
,
293
,
307
,
311
,
313
,
317
,
331
,
337
,
347
,
349
,
353
,
359
,
367
,
373
,
587
,
};
const
u512
four_sqrt_p
=
{{
0x85e2579c786882cf
,
0x4e3433657e18da95
,
0x850ae5507965a0b3
,
0xa15bc4e676475964
,
}};
const
public_key
base
=
{
0
};
/* A = 0 */
void
csidh_private
(
private_key
*
priv
)
{
memset
(
&
priv
->
e
,
0
,
sizeof
(
priv
->
e
));
for
(
size_t
i
=
0
;
i
<
num_primes
;
)
{
int8_t
buf
[
64
];
randombytes
(
buf
,
sizeof
(
buf
));
for
(
size_t
j
=
0
;
j
<
sizeof
(
buf
);
++
j
)
{
if
(
buf
[
j
]
<=
max_exponent
&&
buf
[
j
]
>=
-
max_exponent
)
{
priv
->
e
[
i
/
2
]
|=
(
buf
[
j
]
&
0xf
)
<<
i
%
2
*
4
;
if
(
++
i
>=
num_primes
)
break
;
}
}
}
}
/* compute [(p+1)/l] P for all l in our list of primes. */
/* divide and conquer is much faster than doing it naively,
* but uses more memory. */
static
void
cofactor_multiples
(
proj
*
P
,
const
proj
*
A
,
size_t
lower
,
size_t
upper
)
{
assert
(
lower
<
upper
);
if
(
upper
-
lower
==
1
)
return
;
size_t
mid
=
lower
+
(
upper
-
lower
+
1
)
/
2
;
u512
cl
=
u512_1
,
cu
=
u512_1
;
for
(
size_t
i
=
lower
;
i
<
mid
;
++
i
)
u512_mul3_64
(
&
cu
,
&
cu
,
primes
[
i
]);
for
(
size_t
i
=
mid
;
i
<
upper
;
++
i
)
u512_mul3_64
(
&
cl
,
&
cl
,
primes
[
i
]);
xMUL
(
&
P
[
mid
],
A
,
&
P
[
lower
],
&
cu
);
xMUL
(
&
P
[
lower
],
A
,
&
P
[
lower
],
&
cl
);
cofactor_multiples
(
P
,
A
,
lower
,
mid
);
cofactor_multiples
(
P
,
A
,
mid
,
upper
);
}
/* never accepts invalid keys. */
bool
validate
(
public_key
const
*
in
)
{
const
proj
A
=
{
in
->
A
,
fp_1
};
do
{
proj
P
[
num_primes
];
fp_random
(
&
P
->
x
);
P
->
z
=
fp_1
;
/* maximal 2-power in p+1 */
xDBL
(
P
,
&
A
,
P
);
xDBL
(
P
,
&
A
,
P
);
cofactor_multiples
(
P
,
&
A
,
0
,
num_primes
);
u512
order
=
u512_1
;
for
(
size_t
i
=
num_primes
-
1
;
i
<
num_primes
;
--
i
)
{
/* we only gain information if [(p+1)/l] P is non-zero */
if
(
memcmp
(
&
P
[
i
].
z
,
&
fp_0
,
sizeof
(
fp
)))
{
u512
tmp
;
u512_set
(
&
tmp
,
primes
[
i
]);
xMUL
(
&
P
[
i
],
&
A
,
&
P
[
i
],
&
tmp
);
if
(
memcmp
(
&
P
[
i
].
z
,
&
fp_0
,
sizeof
(
fp
)))
/* P does not have order dividing p+1. */
return
false
;
u512_mul3_64
(
&
order
,
&
order
,
primes
[
i
]);
if
(
u512_sub3
(
&
tmp
,
&
four_sqrt_p
,
&
order
))
/* returns borrow */
/* order > 4 sqrt(p), hence definitely supersingular */
return
true
;
}
}
/* P didn't have big enough order to prove supersingularity. */
}
while
(
1
);
}
/* compute x^3 + Ax^2 + x */
static
void
montgomery_rhs
(
fp
*
rhs
,
fp
const
*
A
,
fp
const
*
x
)
{
fp
tmp
;
*
rhs
=
*
x
;
fp_sq1
(
rhs
);
fp_mul3
(
&
tmp
,
A
,
x
);
fp_add2
(
rhs
,
&
tmp
);
fp_add2
(
rhs
,
&
fp_1
);
fp_mul2
(
rhs
,
x
);
}
/* totally not constant-time. */
void
action
(
public_key
*
out
,
public_key
const
*
in
,
private_key
const
*
priv
)
{
u512
k
[
2
];
u512_set
(
&
k
[
0
],
4
);
/* maximal 2-power in p+1 */
u512_set
(
&
k
[
1
],
4
);
/* maximal 2-power in p+1 */
uint8_t
e
[
2
][
num_primes
];
for
(
size_t
i
=
0
;
i
<
num_primes
;
++
i
)
{
int8_t
t
=
(
int8_t
)
(
priv
->
e
[
i
/
2
]
<<
i
%
2
*
4
)
>>
4
;
if
(
t
>
0
)
{
e
[
0
][
i
]
=
t
;
e
[
1
][
i
]
=
0
;
u512_mul3_64
(
&
k
[
1
],
&
k
[
1
],
primes
[
i
]);
}
else
if
(
t
<
0
)
{
e
[
1
][
i
]
=
-
t
;
e
[
0
][
i
]
=
0
;
u512_mul3_64
(
&
k
[
0
],
&
k
[
0
],
primes
[
i
]);
}
else
{
e
[
0
][
i
]
=
0
;
e
[
1
][
i
]
=
0
;
u512_mul3_64
(
&
k
[
0
],
&
k
[
0
],
primes
[
i
]);
u512_mul3_64
(
&
k
[
1
],
&
k
[
1
],
primes
[
i
]);
}
}
proj
A
=
{
in
->
A
,
fp_1
};
bool
done
[
2
]
=
{
false
,
false
};
do
{
assert
(
!
memcmp
(
&
A
.
z
,
&
fp_1
,
sizeof
(
fp
)));
proj
P
;
fp_random
(
&
P
.
x
);
P
.
z
=
fp_1
;
fp
rhs
;
montgomery_rhs
(
&
rhs
,
&
A
.
x
,
&
P
.
x
);
bool
sign
=
!
fp_issquare
(
&
rhs
);
if
(
done
[
sign
])
continue
;
xMUL
(
&
P
,
&
A
,
&
P
,
&
k
[
sign
]);
done
[
sign
]
=
true
;
for
(
size_t
i
=
num_primes
-
1
;
i
<
num_primes
;
--
i
)
{
//changed loop direction
if
(
e
[
sign
][
i
])
{
u512
cof
=
u512_1
;
for
(
size_t
j
=
i
-
1
;
j
<
num_primes
;
--
j
)
//changed loop direction
if
(
e
[
sign
][
j
])
u512_mul3_64
(
&
cof
,
&
cof
,
primes
[
j
]);
proj
K
;
xMUL
(
&
K
,
&
A
,
&
P
,
&
cof
);
if
(
memcmp
(
&
K
.
z
,
&
fp_0
,
sizeof
(
fp
)))
{
xISOG
(
&
A
,
&
P
,
&
K
,
primes
[
i
]);
if
(
!--
e
[
sign
][
i
])
u512_mul3_64
(
&
k
[
sign
],
&
k
[
sign
],
primes
[
i
]);
}
}
done
[
sign
]
&=
!
e
[
sign
][
i
];
}
fp_inv
(
&
A
.
z
);
fp_mul2
(
&
A
.
x
,
&
A
.
z
);
A
.
z
=
fp_1
;
}
while
(
!
(
done
[
0
]
&&
done
[
1
]));
out
->
A
=
A
.
x
;
}
/* includes public-key validation. */
bool
csidh
(
public_key
*
out
,
public_key
const
*
in
,
private_key
const
*
priv
)
{
if
(
!
validate
(
in
))
{
fp_random
(
&
out
->
A
);
return
false
;
}
action
(
out
,
in
,
priv
);
return
true
;
}
csidh.h
0 → 100644
View file @
c00c37e0
#ifndef CSIDH_H
#define CSIDH_H
#include "u512.h"
#include "fp.h"
#include "mont.h"
/* specific to p, should perhaps be somewhere else */
#define num_primes 74
#define max_exponent 5
/* (2*5+1)^74 is roughly 2^256 */
typedef
struct
private_key
{
int8_t
e
[(
num_primes
+
1
)
/
2
];
/* packed int4_t */
}
private_key
;
typedef
struct
public_key
{
fp
A
;
/* Montgomery coefficient: represents y^2 = x^3 + Ax^2 + x */
}
public_key
;
extern
const
public_key
base
;
void
csidh_private
(
private_key
*
priv
);
bool
csidh
(
public_key
*
out
,
public_key
const
*
in
,
private_key
const
*
priv
);
#endif
cycle.h
0 → 100644
View file @
c00c37e0
/*
* Copyright (c) 2003, 2007-8 Matteo Frigo
* Copyright (c) 2003, 2007-8 Massachusetts Institute of Technology
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
/* machine-dependent cycle counters code. Needs to be inlined. */
/***************************************************************************/
/* To use the cycle counters in your code, simply #include "cycle.h" (this
file), and then use the functions/macros:
ticks getticks(void);
ticks is an opaque typedef defined below, representing the current time.
You extract the elapsed time between two calls to gettick() via:
double elapsed(ticks t1, ticks t0);
which returns a double-precision variable in arbitrary units. You
are not expected to convert this into human units like seconds; it
is intended only for *comparisons* of time intervals.
(In order to use some of the OS-dependent timer routines like
Solaris' gethrtime, you need to paste the autoconf snippet below
into your configure.ac file and #include "config.h" before cycle.h,
or define the relevant macros manually if you are not using autoconf.)
*/
/***************************************************************************/
/* This file uses macros like HAVE_GETHRTIME that are assumed to be
defined according to whether the corresponding function/type/header
is available on your system. The necessary macros are most
conveniently defined if you are using GNU autoconf, via the tests:
dnl ---------------------------------------------------------------------
AC_C_INLINE
AC_HEADER_TIME
AC_CHECK_HEADERS([sys/time.h c_asm.h intrinsics.h mach/mach_time.h])
AC_CHECK_TYPE([hrtime_t],[AC_DEFINE(HAVE_HRTIME_T, 1, [Define to 1 if hrtime_t is defined in <sys/time.h>])],,[#if HAVE_SYS_TIME_H
#include <sys/time.h>
#endif])
AC_CHECK_FUNCS([gethrtime read_real_time time_base_to_time clock_gettime mach_absolute_time])
dnl Cray UNICOS _rtc() (real-time clock) intrinsic
AC_MSG_CHECKING([for _rtc intrinsic])
rtc_ok=yes
AC_TRY_LINK([#ifdef HAVE_INTRINSICS_H
#include <intrinsics.h>
#endif], [_rtc()], [AC_DEFINE(HAVE__RTC,1,[Define if you have the UNICOS _rtc() intrinsic.])], [rtc_ok=no])
AC_MSG_RESULT($rtc_ok)
dnl ---------------------------------------------------------------------
*/
/***************************************************************************/
#if TIME_WITH_SYS_TIME
# include <sys/time.h>
# include <time.h>
#else
# if HAVE_SYS_TIME_H
# include <sys/time.h>
# else
# include <time.h>
# endif
#endif
#define INLINE_ELAPSED(INL) static INL double elapsed(ticks t1, ticks t0) \
{ \
return (double)t1 - (double)t0; \
}
/*----------------------------------------------------------------*/
/* Solaris */
#if defined(HAVE_GETHRTIME) && defined(HAVE_HRTIME_T) && !defined(HAVE_TICK_COUNTER)
typedef
hrtime_t
ticks
;
#define getticks gethrtime
INLINE_ELAPSED
(
inline
)
#define HAVE_TICK_COUNTER
#endif
/*----------------------------------------------------------------*/
/* AIX v. 4+ routines to read the real-time clock or time-base register */
#if defined(HAVE_READ_REAL_TIME) && defined(HAVE_TIME_BASE_TO_TIME) && !defined(HAVE_TICK_COUNTER)
typedef
timebasestruct_t
ticks
;
static
__inline
ticks
getticks
(
void
)
{
ticks
t
;
read_real_time
(
&
t
,
TIMEBASE_SZ
);
return
t
;
}
static
__inline
double
elapsed
(
ticks
t1
,
ticks
t0
)
/* time in nanoseconds */
{
time_base_to_time
(
&
t1
,
TIMEBASE_SZ
);
time_base_to_time
(
&
t0
,
TIMEBASE_SZ
);
return
(((
double
)
t1
.
tb_high
-
(
double
)
t0
.
tb_high
)
*
1.0e9
+
((
double
)
t1
.
tb_low
-
(
double
)
t0
.
tb_low
));
}
#define HAVE_TICK_COUNTER
#endif
/*----------------------------------------------------------------*/
/*
* PowerPC ``cycle'' counter using the time base register.
*/
#if ((((defined(__GNUC__) && (defined(__powerpc__) || defined(__ppc__))) || (defined(__MWERKS__) && defined(macintosh)))) || (defined(__IBM_GCC_ASM) && (defined(__powerpc__) || defined(__ppc__)))) && !defined(HAVE_TICK_COUNTER)
typedef
unsigned
long
long
ticks
;
static
__inline__
ticks
getticks
(
void
)
{
unsigned
int
tbl
,
tbu0
,
tbu1
;
do
{
__asm__
__volatile__
(
"mftbu %0"
:
"=r"
(
tbu0
));
__asm__
__volatile__
(
"mftb %0"
:
"=r"
(
tbl
));
__asm__
__volatile__
(
"mftbu %0"
:
"=r"
(
tbu1
));
}
while
(
tbu0
!=
tbu1
);
return
(((
unsigned
long
long
)
tbu0
)
<<
32
)
|
tbl
;
}
INLINE_ELAPSED
(
__inline__
)
#define HAVE_TICK_COUNTER
#endif
/* MacOS/Mach (Darwin) time-base register interface (unlike UpTime,
from Carbon, requires no additional libraries to be linked). */
#if defined(HAVE_MACH_ABSOLUTE_TIME) && defined(HAVE_MACH_MACH_TIME_H) && !defined(HAVE_TICK_COUNTER)
#include <mach/mach_time.h>
typedef
uint64_t
ticks
;
#define getticks mach_absolute_time
INLINE_ELAPSED
(
__inline__
)
#define HAVE_TICK_COUNTER
#endif
/*----------------------------------------------------------------*/
/*
* Pentium cycle counter
*/
#if (defined(__GNUC__) || defined(__ICC)) && defined(__i386__) && !defined(HAVE_TICK_COUNTER)
typedef
unsigned
long
long
ticks
;
static
__inline__
ticks
getticks
(
void
)
{
ticks
ret
;
__asm__
__volatile__
(
"rdtsc"
:
"=A"
(
ret
));
/* no input, nothing else clobbered */
return
ret
;
}
INLINE_ELAPSED
(
__inline__
)
#define HAVE_TICK_COUNTER
#define TIME_MIN 5000.0
/* unreliable pentium IV cycle counter */
#endif
/* Visual C++ -- thanks to Morten Nissov for his help with this */
#if _MSC_VER >= 1200 && _M_IX86 >= 500 && !defined(HAVE_TICK_COUNTER)
#include <windows.h>
typedef
LARGE_INTEGER
ticks
;
#define RDTSC __asm __emit 0fh __asm __emit 031h
/* hack for VC++ 5.0 */
static
__inline
ticks
getticks
(
void
)
{
ticks
retval
;
__asm
{
RDTSC
mov
retval
.
HighPart
,
edx
mov
retval
.
LowPart
,
eax
}
return
retval
;
}
static
__inline
double
elapsed
(
ticks
t1
,
ticks
t0
)
{
return
(
double
)
t1
.
QuadPart
-
(
double
)
t0
.
QuadPart
;
}
#define HAVE_TICK_COUNTER
#define TIME_MIN 5000.0
/* unreliable pentium IV cycle counter */
#endif
/*----------------------------------------------------------------*/
/*
* X86-64 cycle counter
*/
#if (defined(__GNUC__) || defined(__ICC) || defined(__SUNPRO_C)) && defined(__x86_64__) && !defined(HAVE_TICK_COUNTER)
typedef
unsigned
long
long
ticks
;
static
__inline__
ticks
getticks
(
void
)
{
unsigned
a
,
d
;
asm
volatile
(
"rdtsc"
:
"=a"
(
a
),
"=d"
(
d
));
return
((
ticks
)
a
)
|
(((
ticks
)
d
)
<<
32
);
}
INLINE_ELAPSED
(
__inline__
)
#define HAVE_TICK_COUNTER
#endif
/* PGI compiler, courtesy Cristiano Calonaci, Andrea Tarsi, & Roberto Gori.
NOTE: this code will fail to link unless you use the -Masmkeyword compiler
option (grrr). */
#if defined(__PGI) && defined(__x86_64__) && !defined(HAVE_TICK_COUNTER)
typedef
unsigned
long
long
ticks
;
static
ticks
getticks
(
void
)
{
asm
(
" rdtsc; shl $0x20,%rdx; mov %eax,%eax; or %rdx,%rax; "
);
}
INLINE_ELAPSED
(
__inline__
)
#define HAVE_TICK_COUNTER
#endif
/* Visual C++, courtesy of Dirk Michaelis */
#if _MSC_VER >= 1400 && (defined(_M_AMD64) || defined(_M_X64)) && !defined(HAVE_TICK_COUNTER)
#include <intrin.h>
#pragma intrinsic(__rdtsc)
typedef
unsigned
__int64
ticks
;
#define getticks __rdtsc
INLINE_ELAPSED
(
__inline
)
#define HAVE_TICK_COUNTER
#endif
/*----------------------------------------------------------------*/
/*
* IA64 cycle counter
*/
/* intel's icc/ecc compiler */
#if (defined(__EDG_VERSION) || defined(__ECC)) && defined(__ia64__) && !defined(HAVE_TICK_COUNTER)
typedef
unsigned
long
ticks
;
#include <ia64intrin.h>
static
__inline__
ticks
getticks
(
void
)
{
return
__getReg
(
_IA64_REG_AR_ITC
);
}
INLINE_ELAPSED
(
__inline__
)
#define HAVE_TICK_COUNTER
#endif
/* gcc */
#if defined(__GNUC__) && defined(__ia64__) && !defined(HAVE_TICK_COUNTER)
typedef
unsigned
long
ticks
;
static
__inline__
ticks
getticks
(
void
)
{
ticks
ret
;
__asm__
__volatile__
(
"mov %0=ar.itc"
:
"=r"
(
ret
));
return
ret
;
}
INLINE_ELAPSED
(
__inline__
)
#define HAVE_TICK_COUNTER
#endif
/* HP/UX IA64 compiler, courtesy Teresa L. Johnson: */
#if defined(__hpux) && defined(__ia64) && !defined(HAVE_TICK_COUNTER)
#include <machine/sys/inline.h>
typedef
unsigned
long
ticks
;
static
inline
ticks
getticks
(
void
)
{
ticks
ret
;
ret
=
_Asm_mov_from_ar
(
_AREG_ITC
);
return
ret
;