/* Written in 2021 by Sebastiano Vigna (vigna@acm.org) To the extent possible under law, the author has dedicated all copyright and related and neighboring rights to this software to the public domain worldwide. This software is distributed without any warranty. See . */ #include #include /* This is a Marsaglia multiply-with-carry generator with period approximately 2^255. It is close in speed to a scrambled linear generator, as its only 128-bit operations are a multiplication and sum; is an excellent generator based on congruential arithmetic. As all MWC generators, it simulates a multiplicative LCG with prime modulus m = 0xff377e26f82da749ffffffffffffffffffffffffffffffffffffffffffffffff and multiplier given by the inverse of 2^64 modulo m. The modulus has a particular form, which creates some theoretical issues, but at this size a generator of this kind passes all known statistical tests. For a generator of the same type with stronger theoretical guarantees consider a Goresky-Klapper generalized multiply-with-carry generator. */ #define MWC_A3 0xff377e26f82da74a /* The state must be initialized so that 0 < c < MWC_A3 - 1. */ uint64_t x, y, z, c; uint64_t inline next() { const __uint128_t t = MWC_A3 * (__uint128_t)x + c; x = y; y = z; c = t >> 64; return z = t; } /* The following jump functions use a minimal multiprecision library. */ #define MP_SIZE 5 #include "mp.c" static uint64_t mod[MP_SIZE] = { 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, MWC_A3 - 1 }; /* This is the jump function for the generator. It is equivalent to 2^128 calls to next(); it can be used to generate 2^128 non-overlapping subsequences for parallel computations. Equivalent C++ Boost multiprecision code: cpp_int b = cpp_int(1) << 64; cpp_int b3 = b * b * b; cpp_int m = MWC_A3 * b3 - 1; cpp_int r = cpp_int("0x377fc42deaad8b463e78ff9958b436d98aeb90fc17d34f8c049ffebb8aed35da"); cpp_int s = ((x + y * b + z * b * b + c * b3) * r) % m; x = uint64_t(s); y = uint64_t(s >> 64); z = uint64_t(s >> 128); c = uint64_t(s >> 192); */ void jump(void) { static uint64_t jump[MP_SIZE] = { 0x49ffebb8aed35da, 0x8aeb90fc17d34f8c, 0x3e78ff9958b436d9, 0x377fc42deaad8b46 }; uint64_t state[MP_SIZE] = { x, y, z, c }; mul(state, jump, mod); x = state; y = state; z = state; c = state; } /* This is the long-jump function for the generator. It is equivalent to 2^192 calls to next(); it can be used to generate 2^64 starting points, from each of which jump() will generate 2^64 non-overlapping subsequences for parallel distributed computations. Equivalent C++ Boost multiprecision code: cpp_int b = cpp_int(1) << 64; cpp_int b3 = b * b * b; cpp_int m = MWC_A3 * b3 - 1; cpp_int r = cpp_int("0x630e9c671e238c8a0f4fc97e3b80db1b1eafd94d7d3ac65c7cbd7641a0db932f"); cpp_int s = ((x + y * b + z * b * b + c * b3) * r) % m; x = uint64_t(s); y = uint64_t(s >> 64); z = uint64_t(s >> 128); c = uint64_t(s >> 192); */ void long_jump(void) { static uint64_t long_jump[MP_SIZE] = { 0x7cbd7641a0db932f, 0x1eafd94d7d3ac65c, 0xf4fc97e3b80db1b, 0x630e9c671e238c8a }; uint64_t state[MP_SIZE] = { x, y, z, c }; mul(state, long_jump, mod); x = state; y = state; z = state; c = state; }