Skip to content

Commit

Permalink
v3.7.4
Browse files Browse the repository at this point in the history
  • Loading branch information
JayDDee committed Nov 28, 2017
1 parent 6d1361c commit 4b57ac0
Show file tree
Hide file tree
Showing 70 changed files with 10,549 additions and 2,852 deletions.
56 changes: 35 additions & 21 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -22,24 +22,6 @@ cpuminer_SOURCES = \
api.c \
sysinfos.c \
algo-gate-api.c\
algo/groestl/sph_groestl.c \
algo/bmw/sph_bmw.c \
algo/shavite/sph_shavite.c \
algo/shavite/shavite.c \
algo/echo/sph_echo.c \
algo/heavy/sph_hefty1.c \
algo/luffa/sph_luffa.c \
algo/cubehash/sph_cubehash.c \
algo/simd/sph_simd.c \
algo/hamsi/sph_hamsi.c \
algo/fugue/sph_fugue.c \
algo/gost/sph_gost.c \
algo/jh/sph_jh.c \
algo/sha/sph_sha2.c \
algo/sha/sph_sha2big.c \
algo/shabal/sph_shabal.c \
algo/sm3/sm3.c \
algo/whirlpool/sph_whirlpool.c\
crypto/blake2s.c \
crypto/oaes_lib.c \
crypto/c_keccak.c \
Expand Down Expand Up @@ -67,22 +49,34 @@ cpuminer_SOURCES = \
algo/blake/blake2s.c \
algo/blake/mod_blakecoin.c \
algo/blake/blakecoin.c \
algo/blake/decred-gate.c \
algo/blake/decred.c \
algo/blake/decred-4way.c \
algo/blake/pentablake-gate.c \
algo/blake/pentablake-4way.c \
algo/blake/pentablake.c \
algo/bmw/sph_bmw.c \
algo/bmw/bmw256.c \
algo/cubehash/sse2/cubehash_sse2.c\
algo/cryptonight/cryptolight.c \
algo/cryptonight/cryptonight-common.c\
algo/cryptonight/cryptonight-aesni.c\
algo/cryptonight/cryptonight.c\
algo/cubehash/sph_cubehash.c \
algo/cubehash/sse2/cubehash_sse2.c\
algo/drop.c \
algo/echo/sph_echo.c \
algo/echo/aes_ni/hash.c\
algo/fresh.c \
algo/gost/sph_gost.c \
algo/groestl/sph_groestl.c \
algo/groestl/groestl.c \
algo/groestl/myr-groestl.c \
algo/groestl/aes_ni/hash-groestl.c \
algo/groestl/aes_ni/hash-groestl256.c \
algo/fugue/sph_fugue.c \
algo/hamsi/sph_hamsi.c \
algo/haval/haval.c\
algo/heavy/sph_hefty1.c \
algo/heavy/heavy.c \
algo/heavy/bastion.c \
algo/hmq1725.c \
Expand All @@ -91,6 +85,10 @@ cpuminer_SOURCES = \
algo/hodl/hodl-wolf.c \
algo/hodl/sha512_avx.c \
algo/hodl/sha512_avx2.c \
algo/jh/sph_jh.c \
algo/jh/jh-hash-4way.c \
algo/jh/jha-gate.c \
algo/jh/jha-4way.c \
algo/jh/jha.c \
algo/keccak/sph_keccak.c \
algo/keccak/keccak.c\
Expand All @@ -99,6 +97,7 @@ cpuminer_SOURCES = \
algo/keccak/keccak-gate.c \
algo/keccak/sse2/keccak.c \
algo/lbry.c \
algo/luffa/sph_luffa.c \
algo/luffa/luffa.c \
algo/luffa/sse2/luffa_for_sse2.c \
algo/lyra2/lyra2.c \
Expand All @@ -109,7 +108,9 @@ cpuminer_SOURCES = \
algo/lyra2/lyra2z330.c \
algo/m7m.c \
algo/neoscrypt.c \
algo/nist5.c \
algo/nist5/nist5-gate.c \
algo/nist5/nist5-4way.c \
algo/nist5/nist5.c \
algo/pluck.c \
algo/polytimos/polytimos-gate.c \
algo/polytimos/polytimos.c \
Expand All @@ -119,8 +120,14 @@ cpuminer_SOURCES = \
algo/ripemd/sph_ripemd.c \
algo/scrypt.c \
algo/scryptjane/scrypt-jane.c \
algo/sha/sph_sha2.c \
algo/sha/sph_sha2big.c \
algo/sha/sha2.c \
algo/sha/sha256t.c \
algo/shabal/sph_shabal.c \
algo/shavite/sph_shavite.c \
algo/shavite/shavite.c \
algo/simd/sph_simd.c \
algo/simd/sse2/nist.c \
algo/simd/sse2/vector.c \
algo/skein/sph_skein.c \
Expand All @@ -132,11 +139,18 @@ cpuminer_SOURCES = \
algo/skein/skein2-4way.c \
algo/skein/skein2-gate.c \
algo/skunk.c \
algo/sm3/sm3.c \
algo/tiger/sph_tiger.c \
algo/timetravel.c \
algo/timetravel10.c \
algo/tribus.c \
algo/tribus/tribus-gate.c \
algo/tribus/tribus.c \
algo/tribus/tribus-4way.c \
algo/veltor.c \
algo/whirlpool/sph_whirlpool.c \
algo/whirlpool/whirlpool-hash-4way.c \
algo/whirlpool/whirlpool-gate.c \
algo/whirlpool/whirlpool-4way.c \
algo/whirlpool/whirlpool.c \
algo/whirlpool/whirlpoolx.c \
algo/x11/phi1612.c \
Expand Down
18 changes: 11 additions & 7 deletions README.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,17 @@ supported by cpuminer-opt due to an incompatible implementation of SSE2 on
these CPUs. Some algos may crash the miner with an invalid instruction.
Users are recommended to use an unoptimized miner such as cpuminer-multi.

Exe name Compile opts Arch name

cpuminer-sse2.exe -march=core2, Core2
cpuminer-sse42.exe -march=corei7, Nehalem
cpuminer-aes-sse42.exe -maes -msse4.2 Westmere
cpuminer-aes-avx.exe -march=corei7-avx, Sandybridge, Ivybridge
cpuminer-aes-avx2.exe -march=core-avx2, Haswell, Broadwell, Skylake, Kabylake
Exe name Compile opts Arch name

cpuminer-sse2.exe -march=core2 Core2
cpuminer-sse42.exe -march=corei7 Nehalem
cpuminer-aes-sse42.exe -maes -msse4.2" Westmere
cpuminer-aes-avx.exe -march=corei7-avx" Sandybridge, Ivybridge
cpuminer-aes-avx2.exe "-march=core-avx2" Haswell, Broadwell, Skylake, Kabylake
cpuminer-4way.exe "-march=core-avx2 -DFOUR_WAY"

4way requires a CPU with AES and AVX2. It is still under development and
only a few algos are supported. See change log in RELEASE_NOTES in source
package for supported algos.

There is no binary support available for SHA on AMD Ryzen CPUs.
16 changes: 16 additions & 0 deletions RELEASE_NOTES
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,11 @@ have been due to AVX and AVX2 optimizations added to that version.
Additional improvements are expected on Ryzen with openssl 1.1.
"-march-znver1" or "-msha".

Additional instructions for static compilalation can be found here:
https://lxadm.com/Static_compilation_of_cpuminer
Static builds should only considered in a homogeneous HW and SW environment.
Local builds will always have the best performance and compatibility.

Extract cpuminer source.

tar xvzf cpuminer-opt-x.y.z.tar.gz
Expand Down Expand Up @@ -96,6 +101,11 @@ Start mining.

./cpuminer -a algo -o url -u username -p password

Windows

The following in how the Windows binary releases are built. It's old and
not very good but it works, for me anyway.

Building on Windows prerequisites:

msys
Expand Down Expand Up @@ -154,6 +164,12 @@ Support for even older x86_64 without AES_NI or SSE2 is not availble.
Change Log
----------

v3.7.4

Removed unnecessary build options.

Added 4way support for tribus and nist5.

v3.7.3

Added polytimos algo.
Expand Down
7 changes: 3 additions & 4 deletions algo/blake/blake-4way.c
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
#include "algo-gate-api.h"
#include "blake-gate.h"
#include "sph_blake.h"
#include "blake-hash-4way.h"
#include <string.h>
#include <stdint.h>
#include <memory.h>

#if defined (__AVX__)
#if defined (BLAKE_4WAY)

void blakehash_4way(void *state, const void *input)
{
Expand Down Expand Up @@ -41,7 +41,7 @@ int scanhash_blake_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint32_t n = first_nonce;
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found;
int num_found = 0;

// if (opt_benchmark)
// HTarget = 0x7f;
Expand All @@ -55,7 +55,6 @@ int scanhash_blake_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint32_t *noncep = vdata + 76; // 19*4
do {
found[0] = found[1] = found[2] = found[3] = false;
num_found = 0;
be32enc( noncep, n );
be32enc( noncep +2, n+1 );
be32enc( noncep +4, n+2 );
Expand Down
4 changes: 2 additions & 2 deletions algo/blake/blake-gate.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@ int64_t blake_get_max64 ()
bool register_blake_algo( algo_gate_t* gate )
{
gate->get_max64 = (void*)&blake_get_max64;
#if defined (__AVX2__) && defined (FOUR_WAY)
//#if defined (__AVX2__) && defined (FOUR_WAY)
// gate->optimizations = SSE2_OPT | AVX_OPT | AVX2_OPT;
// gate->scanhash = (void*)&scanhash_blake_8way;
// gate->hash = (void*)&blakehash_8way;
#elif defined(__AVX__) && defined (FOUR_WAY)
#if defined(BLAKE_4WAY)
gate->optimizations = SSE2_OPT | AVX_OPT;
gate->scanhash = (void*)&scanhash_blake_4way;
gate->hash = (void*)&blakehash_4way;
Expand Down
8 changes: 3 additions & 5 deletions algo/blake/blake-gate.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,11 @@
#include "algo-gate-api.h"
#include <stdint.h>

#if defined (__AVX2__)
//void blakehash_84way(void *state, const void *input);
//int scanhash_blake_8way( int thr_id, struct work *work, uint32_t max_nonce,
// uint64_t *hashes_done );
#if defined(FOUR_WAY) && defined(__AVX__)
#define BLAKE_4WAY
#endif

#if defined (__AVX__)
#if defined (BLAKE_4WAY)
void blakehash_4way(void *state, const void *input);
int scanhash_blake_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done );
Expand Down
22 changes: 14 additions & 8 deletions algo/blake/blake-hash-4way.c
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ static const sph_u32 CS[16] = {
#define CBF SPH_C64(0x636920D871574E69)

#if SPH_COMPACT_BLAKE_64

// not used
static const sph_u64 CB[16] = {
SPH_C64(0x243F6A8885A308D3), SPH_C64(0x13198A2E03707344),
SPH_C64(0xA4093822299F31D0), SPH_C64(0x082EFA98EC4E6C89),
Expand Down Expand Up @@ -428,7 +428,7 @@ do { \
} while (0)

#if SPH_COMPACT_BLAKE_64

// not used
#define ROUND_B_4WAY(r) do { \
GB_4WAY(M[sigma[r][0x0]], M[sigma[r][0x1]], \
CB[sigma[r][0x0]], CB[sigma[r][0x1]], V0, V4, V8, VC); \
Expand All @@ -449,7 +449,7 @@ do { \
} while (0)

#else

//current_impl
#define ROUND_B_4WAY(r) do { \
GB_4WAY(Mx(r, 0), Mx(r, 1), CBx(r, 0), CBx(r, 1), V0, V4, V8, VC); \
GB_4WAY(Mx(r, 2), Mx(r, 3), CBx(r, 2), CBx(r, 3), V1, V5, V9, VD); \
Expand Down Expand Up @@ -696,6 +696,7 @@ do { \

#if SPH_COMPACT_BLAKE_64

// not used
#define COMPRESS64_4WAY do { \
__m256i M[16]; \
__m256i V0, V1, V2, V3, V4, V5, V6, V7; \
Expand Down Expand Up @@ -759,6 +760,8 @@ do { \

#else

//current impl

#define COMPRESS64_4WAY do { \
__m256i M0, M1, M2, M3, M4, M5, M6, M7; \
__m256i M8, M9, MA, MB, MC, MD, ME, MF; \
Expand Down Expand Up @@ -986,7 +989,7 @@ blake64_4way( blake_4way_big_context *sc, const void *data, size_t len)
size_t ptr;
DECL_STATE64_4WAY

const int buf_size = 64; // sizeof/8
const int buf_size = 128; // sizeof/8

buf = sc->buf;
ptr = sc->ptr;
Expand Down Expand Up @@ -1037,7 +1040,7 @@ blake64_4way_close( blake_4way_big_context *sc,
__m256i *out;

ptr = sc->ptr;
bit_len = ((unsigned)ptr << 3) + n;
bit_len = ((unsigned)ptr << 3);
z = 0x80 >> n;
zz = ((ub & -z) | z) & 0xFF;
u.buf[ptr>>3] = _mm256_set_epi64x( zz, zz, zz, zz );
Expand All @@ -1057,9 +1060,9 @@ blake64_4way_close( blake_4way_big_context *sc,
{
sc->T0 -= 1024 - bit_len;
}
if ( ptr <= (96 >> 3) )
if ( ptr <= 104 )
{
memset_zero_m256i( u.buf + (ptr>>3) + 1, (96-ptr) >> 3 );
memset_zero_m256i( u.buf + (ptr>>3) + 1, (104-ptr) >> 3 );
if ( out_size_w64 == 8 )
u.buf[(104>>3)] = _mm256_or_si256( u.buf[(104>>3)],
_mm256_set_epi64x( 0x0100000000000000,
Expand All @@ -1070,11 +1073,13 @@ blake64_4way_close( blake_4way_big_context *sc,
_mm256_set_epi64x( th, th, th, th ) );
*(u.buf+(120>>3)) = mm256_byteswap_epi64(
_mm256_set_epi64x( tl, tl, tl, tl ) );

blake64_4way( sc, u.buf + (ptr>>3), 128 - ptr );
}
else
{
memset_zero_m256i( u.buf + (ptr>>3) + 1, (127 - ptr) >> 3 );
memset_zero_m256i( u.buf + (ptr>>3) + 1, (120 - ptr) >> 3 );

blake64_4way( sc, u.buf + (ptr>>3), 128 - ptr );
sc->T0 = SPH_C64(0xFFFFFFFFFFFFFC00);
sc->T1 = SPH_C64(0xFFFFFFFFFFFFFFFF);
Expand All @@ -1089,6 +1094,7 @@ blake64_4way_close( blake_4way_big_context *sc,
_mm256_set_epi64x( th, th, th, th ) );
*(u.buf+(120>>3)) = mm256_byteswap_epi64(
_mm256_set_epi64x( tl, tl, tl, tl ) );

blake64_4way( sc, u.buf, 128 );
}
out = (__m256i*)dst;
Expand Down
6 changes: 3 additions & 3 deletions algo/blake/blake-hash-4way.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,9 @@ extern "C"{
#ifdef __AVX__
typedef struct {
__m128i buf[16] __attribute__ ((aligned (64)));
size_t ptr;
__m128i H[8];
__m128i S[4];
size_t ptr;
sph_u32 T0, T1;
} blake_4way_small_context;

Expand All @@ -82,13 +82,13 @@ void blake256_4way_addbits_and_close(

typedef struct {
__m256i buf[16] __attribute__ ((aligned (64)));
size_t ptr;
__m256i H[8];
__m256i S[4];
size_t ptr;
sph_u64 T0, T1;
} blake_4way_big_context;

typedef blake_4way_big_context blake512_avx2_context;
typedef blake_4way_big_context blake512_4way_context;

void blake512_4way_init(void *cc);
void blake512_4way(void *cc, const void *data, size_t len);
Expand Down
Loading

0 comments on commit 4b57ac0

Please sign in to comment.