Skip to content

Commit

Permalink
v3.7.9
Browse files Browse the repository at this point in the history
  • Loading branch information
JayDDee committed Jan 9, 2018
1 parent 2d2e54f commit bee78ea
Show file tree
Hide file tree
Showing 58 changed files with 2,817 additions and 499 deletions.
34 changes: 20 additions & 14 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ cpuminer_SOURCES = \
algo/argon2/ar2/cores.c \
algo/argon2/ar2/ar2-scrypt-jane.c \
algo/argon2/ar2/blake2b.c \
algo/axiom.c \
algo/blake/sph_blake.c \
algo/blake/blake-hash-4way.c \
algo/blake/blake-gate.c \
Expand All @@ -56,17 +55,16 @@ cpuminer_SOURCES = \
algo/blake/pentablake-4way.c \
algo/blake/pentablake.c \
algo/bmw/sph_bmw.c \
algo/bmw/bmw-hash-4way.c \
algo/bmw/bmw256.c \
algo/cryptonight/cryptolight.c \
algo/cryptonight/cryptonight-common.c\
algo/cryptonight/cryptonight-aesni.c\
algo/cryptonight/cryptonight.c\
algo/cubehash/sph_cubehash.c \
algo/cubehash/sse2/cubehash_sse2.c\
algo/drop.c \
algo/echo/sph_echo.c \
algo/echo/aes_ni/hash.c\
algo/fresh.c \
algo/gost/sph_gost.c \
algo/groestl/sph_groestl.c \
algo/groestl/groestl.c \
Expand Down Expand Up @@ -113,9 +111,8 @@ cpuminer_SOURCES = \
algo/nist5/nist5-gate.c \
algo/nist5/nist5-4way.c \
algo/nist5/nist5.c \
algo/nist5/zr5.c \
algo/pluck.c \
algo/polytimos/polytimos-gate.c \
algo/polytimos/polytimos.c \
algo/quark/quark.c \
algo/qubit/qubit.c \
algo/qubit/deep.c \
Expand All @@ -127,6 +124,7 @@ cpuminer_SOURCES = \
algo/sha/sha2.c \
algo/sha/sha256t.c \
algo/shabal/sph_shabal.c \
algo/shabal/shabal-hash-4way.c \
algo/shavite/sph_shavite.c \
algo/shavite/sph-shavite-aesni.c \
algo/shavite/shavite.c \
Expand All @@ -141,15 +139,10 @@ cpuminer_SOURCES = \
algo/skein/skein2.c \
algo/skein/skein2-4way.c \
algo/skein/skein2-gate.c \
algo/skunk.c \
algo/sm3/sm3.c \
algo/tiger/sph_tiger.c \
algo/timetravel.c \
algo/timetravel10.c \
algo/tribus/tribus-gate.c \
algo/tribus/tribus.c \
algo/tribus/tribus-4way.c \
algo/veltor.c \
algo/whirlpool/sph_whirlpool.c \
algo/whirlpool/whirlpool-hash-4way.c \
algo/whirlpool/whirlpool-gate.c \
Expand All @@ -165,6 +158,10 @@ cpuminer_SOURCES = \
algo/x11/c11-gate.c \
algo/x11/c11.c \
algo/x11/c11-4way.c \
algo/x11/tribus-gate.c \
algo/x11/tribus.c \
algo/x11/tribus-4way.c \
algo/x11/fresh.c \
algo/x11/x11evo.c \
algo/x13/x13-gate.c \
algo/x13/x13.c \
Expand All @@ -175,9 +172,20 @@ cpuminer_SOURCES = \
algo/x13/phi1612-gate.c \
algo/x13/phi1612.c \
algo/x13/phi1612-4way.c \
algo/x13/skunk-gate.c \
algo/x13/skunk-4way.c \
algo/x13/skunk.c \
algo/x13/drop.c \
algo/x14/x14-gate.c \
algo/x14/x14.c \
algo/x14/x14-4way.c \
algo/x14/veltor-gate.c \
algo/x14/veltor.c \
algo/x14/veltor-4way.c \
algo/x14/polytimos-gate.c \
algo/x14/polytimos.c \
algo/x14/polytimos-4way.c \
algo/x14/axiom.c \
algo/x15/x15-gate.c \
algo/x15/x15.c \
algo/x15/x15-4way.c \
Expand All @@ -189,10 +197,8 @@ cpuminer_SOURCES = \
algo/x17/xevan-4way.c \
algo/x17/hmq1725.c \
algo/yescrypt/yescrypt.c \
algo/yescrypt/sha256_Y.c\
algo/yescrypt/yescrypt-simd.c\
algo/zr5.c

algo/yescrypt/sha256_Y.c \
algo/yescrypt/yescrypt-simd.c

disable_flags =

Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ Supported Algorithms
timetravel10 Bitcore
tribus Denarius (DNR)
vanilla blake256r8vnl (VCash)
veltor
veltor (VLT)
whirlpool
whirlpoolx
x11 Dash
Expand All @@ -81,6 +81,7 @@ Supported Algorithms
x17
xevan Bitsend
yescrypt Globalboost-Y (BSTY)
yescryptr8 BitZeny (ZNY)\n\
yescryptr16 Yenten (YTN)
zr5 Ziftr

Expand Down
7 changes: 7 additions & 0 deletions RELEASE_NOTES
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,13 @@ Support for even older x86_64 without AES_NI or SSE2 is not availble.
Change Log
----------

v3.7.9

Partial 4way optimizations for veltor, skunk, polytimos, lyra2z.
Additional 4way optimizations for X algos.
New algo yescryptr8 for BitZeny, not to be confused with original
yescrypt Globalboost-Y.

v3.7.8

Partial 4way optimization for most X algos including c11, xevan, phi, hsr
Expand Down
5 changes: 3 additions & 2 deletions algo-gate-api.c
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,7 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
case ALGO_X17: register_x17_algo ( gate ); break;
case ALGO_XEVAN: register_xevan_algo ( gate ); break;
case ALGO_YESCRYPT: register_yescrypt_algo ( gate ); break;
case ALGO_YESCRYPTR8: register_yescryptr8_algo ( gate ); break;
case ALGO_YESCRYPTR16: register_yescryptr16_algo ( gate ); break;
case ALGO_ZR5: register_zr5_algo ( gate ); break;
default:
Expand Down Expand Up @@ -278,6 +279,7 @@ const char* const algo_alias_map[][2] =
{
// alias proper
{ "bitcore", "timetravel10" },
{ "bitzeny", "yescryptr8" },
{ "blake256r8", "blakecoin" },
{ "blake256r8vnl", "vanilla" },
{ "blake256r14", "blake" },
Expand All @@ -300,10 +302,9 @@ const char* const algo_alias_map[][2] =
// { "sia", "blake2b" },
{ "sib", "x11gost" },
{ "timetravel8", "timetravel" },
{ "yes", "yescrypt" },
{ "ziftr", "zr5" },
{ "yenten", "yescryptr16" },
{ "yescryptr8", "yescrypt" },
{ "yescryptr8k", "yescrypt" },
{ "zcoin", "lyra2z" },
{ "zoin", "lyra2z330" },
{ NULL, NULL }
Expand Down
58 changes: 16 additions & 42 deletions algo/blake/blake-hash-4way.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@
#include <string.h>
#include <limits.h>

//#include "sph_blake.h"
#include "blake-hash-4way.h"

#ifdef __cplusplus
Expand Down Expand Up @@ -98,18 +97,6 @@ static const unsigned sigma[16][16] = {
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 }
};

/*
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
14 10 4 8 9 15 13 6 1 12 0 2 11 7 5 3
11 8 12 0 5 2 15 13 10 14 3 6 7 1 9 4
7 9 3 1 13 12 11 14 2 6 5 10 4 0 15 8
9 0 5 7 2 4 10 15 14 1 11 12 6 8 3 13
2 12 6 10 0 11 8 3 4 13 7 5 15 14 1 9
12 5 1 15 14 13 4 10 0 7 6 3 9 2 8 11
13 11 7 14 12 1 3 9 5 0 15 4 8 6 2 10
6 15 14 9 11 3 0 8 12 2 13 7 1 4 10 5
10 2 8 4 7 6 1 5 15 11 9 14 3 12 13 0
*/
#endif

#define Z00 0
Expand Down Expand Up @@ -914,34 +901,29 @@ blake32_4way_close( blake_4way_small_context *sc, unsigned ub, unsigned n,

ptr = sc->ptr;
bit_len = ((unsigned)ptr << 3);
// unsigned z = 0x80 >> n;
// unsigned zz = ((ub & -z) | z) & 0xFF;
// u.buf[ptr>>2] = _mm_set_epi32( zz, zz, zz, zz );
u.buf[ptr>>2] = _mm_set1_epi32( 0x80 );
tl = sc->T0 + bit_len;
th = sc->T1;

if ( ptr == 0 )
{
sc->T0 = SPH_C32(0xFFFFFE00);
sc->T1 = SPH_C32(0xFFFFFFFF);
sc->T0 = SPH_C32(0xFFFFFE00UL);
sc->T1 = SPH_C32(0xFFFFFFFFUL);
}
else if ( sc->T0 == 0 )
{
sc->T0 = SPH_C32(0xFFFFFE00) + bit_len;
sc->T0 = SPH_C32(0xFFFFFE00UL) + bit_len;
sc->T1 = SPH_T32(sc->T1 - 1);
}
else
sc->T0 -= 512 - bit_len;

// if ( ptr <= 48 )
if ( ptr <= 52 )
{
memset_zero_128( u.buf + (ptr>>2) + 1, (52 - ptr) >> 2 );
// memset_zero_128( u.buf + (ptr>>2) + 1, (48 - ptr) >> 2 );
if (out_size_w32 == 8)
u.buf[52>>2] = _mm_or_si128( u.buf[52>>2],
_mm_set1_epi32( 0x010000000 ) );
_mm_set1_epi32( 0x01000000UL ) );
*(u.buf+(56>>2)) = mm_byteswap_32( _mm_set1_epi32( th ) );
*(u.buf+(60>>2)) = mm_byteswap_32( _mm_set1_epi32( tl ) );
blake32_4way( sc, u.buf + (ptr>>2), 64 - ptr );
Expand All @@ -950,19 +932,18 @@ blake32_4way_close( blake_4way_small_context *sc, unsigned ub, unsigned n,
{
memset_zero_128( u.buf + (ptr>>2) + 1, (60-ptr) >> 2 );
blake32_4way( sc, u.buf + (ptr>>2), 64 - ptr );
sc->T0 = SPH_C32(0xFFFFFE00);
sc->T1 = SPH_C32(0xFFFFFFFF);
sc->T0 = SPH_C32(0xFFFFFE00UL);
sc->T1 = SPH_C32(0xFFFFFFFFUL);
memset_zero_128( u.buf, 56>>2 );
if (out_size_w32 == 8)
u.buf[52>>2] = _mm_set1_epi32( 0x010000000 );
u.buf[52>>2] = _mm_set1_epi32( 0x01000000UL );
*(u.buf+(56>>2)) = mm_byteswap_32( _mm_set1_epi32( th ) );
*(u.buf+(60>>2)) = mm_byteswap_32( _mm_set1_epi32( tl ) );
blake32_4way( sc, u.buf, 64 );
}
out = (__m128i*)dst;
for ( k = 0; k < out_size_w32; k++ )
out[k] = mm_byteswap_32( sc->H[k] );
// out[k] = sc->H[k];
}

#if defined (__AVX2__)
Expand All @@ -975,9 +956,9 @@ blake64_4way_init( blake_4way_big_context *sc, const sph_u64 *iv,
{
int i;
for ( i = 0; i < 8; i++ )
sc->H[i] = _mm256_set_epi64x( iv[i], iv[i], iv[i], iv[i] );
sc->H[i] = _mm256_set1_epi64x( iv[i] );
for ( i = 0; i < 4; i++ )
sc->S[i] = _mm256_set_epi64x( salt[i], salt[i], salt[i], salt[i] );
sc->S[i] = _mm256_set1_epi64x( salt[i] );
sc->T0 = sc->T1 = 0;
sc->ptr = 0;
}
Expand Down Expand Up @@ -1049,12 +1030,12 @@ blake64_4way_close( blake_4way_big_context *sc,
th = sc->T1;
if (ptr == 0 )
{
sc->T0 = SPH_C64(0xFFFFFFFFFFFFFC00);
sc->T1 = SPH_C64(0xFFFFFFFFFFFFFFFF);
sc->T0 = SPH_C64(0xFFFFFFFFFFFFFC00ULL);
sc->T1 = SPH_C64(0xFFFFFFFFFFFFFFFFULL);
}
else if ( sc->T0 == 0 )
{
sc->T0 = SPH_C64(0xFFFFFFFFFFFFFC00) + bit_len;
sc->T0 = SPH_C64(0xFFFFFFFFFFFFFC00ULL) + bit_len;
sc->T1 = SPH_T64(sc->T1 - 1);
}
else
Expand All @@ -1066,10 +1047,7 @@ blake64_4way_close( blake_4way_big_context *sc,
memset_zero_256( u.buf + (ptr>>3) + 1, (104-ptr) >> 3 );
if ( out_size_w64 == 8 )
u.buf[(104>>3)] = _mm256_or_si256( u.buf[(104>>3)],
_mm256_set_epi64x( 0x0100000000000000,
0x0100000000000000,
0x0100000000000000,
0x0100000000000000 ) );
_mm256_set1_epi64x( 0x0100000000000000ULL ) );
*(u.buf+(112>>3)) = mm256_byteswap_64(
_mm256_set_epi64x( th, th, th, th ) );
*(u.buf+(120>>3)) = mm256_byteswap_64(
Expand All @@ -1082,15 +1060,11 @@ blake64_4way_close( blake_4way_big_context *sc,
memset_zero_256( u.buf + (ptr>>3) + 1, (120 - ptr) >> 3 );

blake64_4way( sc, u.buf + (ptr>>3), 128 - ptr );
sc->T0 = SPH_C64(0xFFFFFFFFFFFFFC00);
sc->T1 = SPH_C64(0xFFFFFFFFFFFFFFFF);
sc->T0 = SPH_C64(0xFFFFFFFFFFFFFC00ULL);
sc->T1 = SPH_C64(0xFFFFFFFFFFFFFFFFULL);
memset_zero_256( u.buf, 112>>3 );
if ( out_size_w64 == 8 )
u.buf[104>>3] = _mm256_set_epi64x( 0x0100000000000000,
0x0100000000000000,
0x0100000000000000,
0x0100000000000000 );

u.buf[104>>3] = _mm256_set1_epi64x( 0x0100000000000000ULL );
*(u.buf+(112>>3)) = mm256_byteswap_64(
_mm256_set_epi64x( th, th, th, th ) );
*(u.buf+(120>>3)) = mm256_byteswap_64(
Expand Down
Loading

0 comments on commit bee78ea

Please sign in to comment.