SHA-256 and bitcoin mining
There is a lottery in bitcoin network that takes place every ~10 minutes. The lottery winner submits a new block to blockchain (if the majority of the network agrees with it) and awards with bitcoins (25 initally, halve every 210000 blocks or ~4 years). The winner is someone who was first to find a nonce (a number) that beeing concatenated with some other data (including previous block hash and timestamp) results in a hash function output smaller than some value (difficulty, adjusts every ~2 weeks in order to keep the time needs to resolve the task in about 10 minutes).
What to hash:
- version:
0x20000000 - reversed previous block hash
- transactions (reversed Merkle tree root)
- timestamp
- nonce
block_hash = sha256(sha256(
version +
reversed(prev_block) +
reversed(mrkl_root) +
timestamp +
bits +
nonce
))
An example using python
Took a block from the blockchain:
Previous block: 00000000000000000146161cdb757ffc5a8b22dff06b27a76f6f7d0584f5df05
Hash: 00000000000000000244bf1d3600aada272d2d08aa1919a88ba9ecd14f42f3ae
Merkle root: 536e129807282bf22dcb0c169dc0e5cfeb47dac85c7afde3afb2e0fb02161076
Timestamp: 2016-09-27 13:38:38 (0x57ea765e)
Bits: 402951892 (0x18048ed4)
Nonce: 2612046070 (0x9bb0a8f6)
bits = 0x18048ed4
exp = bits >> 24 # 0x18
mant = bits & 0xffffff # 0x48ed4
target_hexstr = '%064x' % (mant * (1<<(8*(exp - 3))))
# '0000000000000000048ed4000000000000000000000000000000000000000000'
import hashlib
import struct
import binascii
sha256 = hashlib.sha256
block = (
struct.pack('<L', 0x20000000) +
bytes.fromhex('00000000000000000146161cdb757ffc5a8b22dff06b27a76f6f7d0584f5df05')[::-1] +
bytes.fromhex('536e129807282bf22dcb0c169dc0e5cfeb47dac85c7afde3afb2e0fb02161076')[::-1] +
struct.pack('<LLL', 0x57ea765e, 0x18048ed4, 0x9bb0a8f6)
)
first_hash = sha256(block).digest()
second_hash = sha256(first_hash).digest()
print(binascii.b2a_hex(block))
print(binascii.b2a_hex(first_hash[::-1]))
print(binascii.b2a_hex(second_hash[::-1]))
# b'0000002005dff584057d6f6fa7276bf0df228b5afc7f75db1c164601000000000000000076101602fbe0b2afe3fd7a5cc8da47ebcfe5c09d160ccb2df22b280798126e535e76ea57d48e0418f6a8b09b'
# b'57e9fff3d914c07dd4eadc189887cbcc0ead44bc0e753c6ee963e59e618b215d'
# b'00000000000000000244bf1d3600aada272d2d08aa1919a88ba9ecd14f42f3ae'
SHA-256
This code has been written just for fun, it is slow and may produce inaccurate results. The idea was to transition from mathematics to an algorithm.
sha256.py gist on GitHub.
import binascii
import hashlib
import itertools
def int_to_list(n):
return [int(i) for i in '{:32b}'.format(n).replace(' ', '0')]
def list_to_int(l):
s = 0
c = 1
for i in reversed(l):
s += i * c
c *= 2
return s
def list_to_digest(binary):
res = b''
for i in range(0, len(binary), 8):
res += list_to_int(binary[i: i + 8]).to_bytes(1, byteorder='big')
return res
def bin_maj(*parts):
res = []
for i in range(0, 32):
s = 0
for part in parts:
s += part[i]
res.append(1 if s > len(parts) // 2 else 0)
return res
def bin_rrot(a, shift):
return a[-shift:] + a[:-shift]
def bin_rshift(a, shift):
return (shift * [0]) + list(a[:-shift])
def bin_xor(*parts):
res = []
for i in range(0, len(parts[0])):
s = 0
for part in parts:
s += part[i]
res.append(1 if s % 2 == 1 else 0)
return res
def bin_ch(n, l1, l2):
res = []
for i in range(0, len(n)):
res.append(l1[i] if n[i] else l2[i])
return res
def bin_sum(*parts):
res = []
mov = 0
for i in range(0, len(parts[0])):
s = 0
for p in parts:
s += p[-i-1]
s += mov
res.append(s % 2)
mov = s // 2
res.reverse()
return res[-32:]
def to_chunks(iterable, n=512):
it = iter(iterable)
while True:
chunk = tuple(itertools.islice(it, n))
if not chunk:
return
yield chunk
# Provided by NSA
_k = (
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
)
_h = (
0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
)
def preprocess(data_str):
data_bin = []
for c in data_str:
for i in '{:8b}'.format(c).replace(' ', '0'):
data_bin.append(int(i))
data_bin.append(1)
while len(data_bin) % 512 != 448:
data_bin.append(0)
for i in '{:64b}'.format(len(data_str) * 8).replace(' ', '0'):
data_bin.append(int(i))
return data_bin
def sha256(data):
a0, b0, c0, d0, e0, f0, g0, h0 = map(int_to_list, _h)
for chunk in to_chunks(preprocess(data), n=512):
w = [0] * 64
w[0:16] = to_chunks(chunk, n=32)
for i in range(16, 64):
s0 = bin_xor(bin_rrot(w[i-15], 7), bin_rrot(w[i-15], 18), bin_rshift(w[i-15], 3))
s1 = bin_xor(bin_rrot(w[i-2], 17), bin_rrot(w[i-2], 19), bin_rshift(w[i-2], 10))
w[i] = bin_sum(
w[i-16],
s0,
w[i-7],
s1
)
a, b, c, d, e, f, g, h = a0, b0, c0, d0, e0, f0, g0, h0
for i in range(0, 64):
sum1 = bin_sum(
w[i],
int_to_list(_k[i]),
h,
bin_ch(e, f, g),
bin_xor(bin_rrot(e, 6), bin_rrot(e, 11), bin_rrot(e, 25))
)
sum2 = bin_sum(
bin_xor(bin_rrot(a, 2), bin_rrot(a, 13), bin_rrot(a, 22)),
bin_maj(a, b, c),
sum1
)
a, b, c, d, e, f, g, h = sum2, a, b, c, bin_sum(d, sum1)[-32:], e, f, g
a0 = bin_sum(a0, a)
b0 = bin_sum(b0, b)
c0 = bin_sum(c0, c)
d0 = bin_sum(d0, d)
e0 = bin_sum(e0, e)
f0 = bin_sum(f0, f)
g0 = bin_sum(g0, g)
h0 = bin_sum(h0, h)
return a0 + b0 + c0 + d0 + e0 + f0 + g0 + h0
if __name__ == '__main__':
assert binascii.b2a_hex(list_to_digest(sha256(''))) == b'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855'
block = b'0000002005dff584057d6f6fa7276bf0df228b5afc7f75db1c164601000000000000000076101602fbe0b2afe3fd7a5cc8da47ebcfe5c09d160ccb2df22b280798126e535e76ea57d48e0418f6a8b09b'
assert list_to_digest(sha256(block)) == hashlib.sha256(block).digest()
Optimizations

On the image:
- green: fixed data
- yellow: changes slowly (for each new block)
- red: changes frequently
One may notice that if we manipulate only nonce we may reuse results of the first computation.
Here we omit sha256 computation of first message parts for all nonce except the first one:
import struct
class Miner():
def __init__(self, previous_hash, transactions_hash, timestamp, bits, nonces):
self.previous_hash = previous_hash
self.transactions_hash = transactions_hash
self.timestamp = timestamp
self.bits = bits
self.nonces = nonces
_h = (
0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
)
_k = (
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
)
@staticmethod
def rrot(v, shift):
return ((v >> shift) | (v << (32-shift))) & 0xFFFFFFFF
@staticmethod
def ch(a, b, c):
return (a & b) ^ ((~a) & c)
@staticmethod
def maj(a, b, c):
return (a & b) ^ (a & c) ^ (b & c)
@classmethod
def get_w(cls, block):
w = [0] * 64
w[0:16] = [(block >> (480 - i * 32)) & 0xffffffff for i in range(0, 16)]
for i in range(16, 64):
w[i] = (
w[i-16] +
(cls.rrot(w[i-15], 7) ^ cls.rrot(w[i-15], 18) ^ (w[i-15] >> 3)) +
w[i-7] +
(cls.rrot(w[i-2], 17) ^ cls.rrot(w[i-2], 19) ^ (w[i-2] >> 10))
) & 0xffffffff
return w
@classmethod
def iteration(cls, w, k, a, b, c, d, e, f, g, h):
s = (w + k + h + cls.ch(e, f, g) + (cls.rrot(e, 6) ^ cls.rrot(e, 11) ^ cls.rrot(e, 25))) & 0xffffffff
a, b, c, d, e, f, g, h = (
((cls.rrot(a, 2) ^ cls.rrot(a, 13) ^ cls.rrot(a, 22)) + cls.maj(a, b, c) + s) & 0xffffffff,
a, b, c,
(d + s) & 0xffffffff,
e, f, g
)
return a, b, c, d, e, f, g, h
def run(self):
pack = (
struct.pack('<L', 0x20000000) +
bytes.fromhex(self.previous_hash)[::-1] +
(bytes.fromhex(self.transactions_hash)[::-1])[:-4]
)
block = 0
for i in pack[:512]:
block = (block << 8) | i
w = self.get_w(block)
sha1 = list(self._h)
for i in range(0, 64):
sha1 = self.iteration(w[i], self._k[i], *sha1)
sha1 = [(sha1[i] + self._h[i]) & 0xffffffff for i in range(0, 8)]
results = []
for nonce in self.nonces:
pack = (
(bytes.fromhex(self.transactions_hash)[::-1])[-4:] +
struct.pack('<LLL', self.timestamp, self.bits, nonce) +
struct.pack('>LLLLLLLLLLLL', 0x80000000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 640)
)
block = 0
for i in pack[:512]:
block = (block << 8) | i
w = self.get_w(block)
sha2 = list(sha1)
for i in range(0, 64):
sha2 = self.iteration(w[i], self._k[i], *sha2)
sha2 = [((sha2[i] + sha1[i]) & 0xffffffff) for i in range(0, 8)]
block = 0
for i in range(0, 8):
block = block << 32 | sha2[i]
block = (block << 256) | (1 << 255) | 256
w = self.get_w(block)
sha3 = list(self._h)
for i in range(0, 64):
sha3 = self.iteration(w[i], self._k[i], *sha3)
sha3 = [(sha3[i] + self._h[i]) & 0xffffffff for i in range(0, 8)]
result = 0
for i in struct.pack('>LLLLLLLL', *sha3)[::-1]:
result = (result << 8) | i
results.append(result)
return results
if __name__ == '__main__':
previous_hash = '00000000000000000146161cdb757ffc5a8b22dff06b27a76f6f7d0584f5df05'
transactions_hash = '536e129807282bf22dcb0c169dc0e5cfeb47dac85c7afde3afb2e0fb02161076'
timestamp = 1474983518
bits = 0x18048ed4
nonces = [
0x9bb0a8f6,
0xabb0a8f6
]
assert Miner(previous_hash=previous_hash,
transactions_hash=transactions_hash,
timestamp=timestamp,
bits=bits,
nonces=nonces
).run() == [
55624467632295270487489375918890129241800824638908068782,
75916835553102774891823884113900634385218467396093025039177034268615350443547
]
It gives us about 30% efficiency increase. We may still improve the algorithm for a few more percents (see The Unreasonable Fundamental Incertitudes Behind Bitcoin Mining).
Hardware
Mining with CPU: ~4000W / GH. Mining with GPU: ~210W / GH (Radeon 7790). Mining with FPGA (45nm): ~50W / GH. Mining with ASIC (16nm): ~0.1W / GH, 14TH/s (ANTMINER S9, 12 June 2016 model).
Vocabulary
Big and small endian
Endianness is the order of the bytes that compose a digital word in computer memory. When storing a word in big-endian format the most significant byte, which is the byte containing the most significant bit, is stored first.
See python struct Byte Order, Size, and Alignment.
import struct
import binascii
binascii.b2a_hex(struct.pack('<L', 10)) # little-endian
# b'0a000000'
binascii.b2a_hex(struct.pack('>L', 10)) # big-endian
# b'0000000a'
L - unsigned long, 4 bites.
Hash digest
Accrding to hashlib - Secure hashes and message digests:
Return the digest of the data passed to the update() method so far. This is a bytes object of size digest_size which may contain bytes in the whole range from 0 to 255.
Links
Bitcoin mining the hard way: the algorithms, protocols, and bytes, Ken Shirriff's blog pysha2 by thomdixon on GitHub Mining Bitcoin with pencil and paper: 0.67 hashes per day on Ken Shirriff's blog Endianness on Wikipedia SHA-2 on Wikipedia The Unreasonable Fundamental Incertitudes Behind Bitcoin Mining paper by Nicolas T. Courtois, Marek Grajek and Rahul Naik
Licensed under CC BY-SA 3.0