/
friendlyhash.go
226 lines (191 loc) · 6.21 KB
/
friendlyhash.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
// Package friendlyhash implements human-readable and reversible representation
// of known-length byte slices.
package friendlyhash
import (
"bytes"
"encoding/binary"
"errors"
"fmt"
"math"
)
// New creates a struct responsible for encoding and decoding the data using
// the provided dictionary and hash length. The dictionary should be a list of
// at least two unique strings, usually words. To decode once encoded data an
// identical dictionary has to be used. Hash length is required to avoid
// including the length of the data in the resulting representation or using
// padding characters.
func New(dictionary []string, hashLength int) (*FriendlyHash, error) {
if err := validateDictionary(dictionary); err != nil {
return nil, err
}
rv := &FriendlyHash{
dictionary: dictionary,
hashLength: hashLength,
}
return rv, nil
}
// FriendlyHash creates a human-friendly representation of byte slices.
type FriendlyHash struct {
dictionary []string
hashLength int
}
// Humanize encodes the provided byte slice and returns its representation as a
// list of words from the dictionary.
func (h *FriendlyHash) Humanize(hash []byte) ([]string, error) {
if len(hash) != h.hashLength {
return nil, errors.New("invalid hash length")
}
bitsPerWord := howManyBitsPerWord(len(h.dictionary))
indexes, err := h.splitIntoIndexes(hash, bitsPerWord)
if err != nil {
return nil, err
}
var words []string
for _, index := range indexes {
words = append(words, h.dictionary[index])
}
return words, nil
}
// Dehumanize converts the provided list of words previously created using the
// humanize function back to its byte slice equivalent.
func (h *FriendlyHash) Dehumanize(words []string) ([]byte, error) {
bitsPerWord := howManyBitsPerWord(len(h.dictionary))
if len(words) != howManyWords(bitsPerWord, h.hashLength) {
return nil, errors.New("invalid words length")
}
to := make([]byte, h.hashLength)
for i, word := range words {
// Get the byte representation of this word
wordIndex, err := findIndex(word, h.dictionary)
if err != nil {
return nil, err
}
buf := &bytes.Buffer{}
if err := binary.Write(buf, binary.BigEndian, uint32(wordIndex)); err != nil {
return nil, err
}
// Copy the relevant bits to the output slice
from := buf.Bytes()
fromI := len(from)*8 - bitsPerWord
toI := i * bitsPerWord
n := bitsPerWord
if toI+n > len(to)*8 { // In case we run out of space in the to slice
n = len(to)*8 - toI // Use only as many bits as can fit in that slice
}
if err := copyBits(from, fromI, to, toI, n); err != nil {
return nil, err
}
}
return to, nil
}
// NumberOfWords returns the number of words returned by the humanize function.
func (h *FriendlyHash) NumberOfWords() int {
bitsPerWord := howManyBitsPerWord(len(h.dictionary))
return howManyWords(bitsPerWord, h.hashLength)
}
// NumberOfBytes returns the hash length returned by the dehumanize function.
func (h *FriendlyHash) NumberOfBytes() int {
return h.hashLength
}
func findIndex(element string, slice []string) (int, error) {
for i, sliceElement := range slice {
if sliceElement == element {
return i, nil
}
}
return -1, fmt.Errorf("word is not in the dictionary: %s", element)
}
func (h *FriendlyHash) splitIntoIndexes(data []byte, bitsPerWord int) ([]int, error) {
var indexes []int
for bitI := 0; bitI < len(data)*8; bitI += bitsPerWord {
index, err := getBits(data, bitI, bitsPerWord)
if err != nil {
return nil, err
}
indexes = append(indexes, int(index))
}
return indexes, nil
}
// getBits attempts to extract "n" bits starting from the bit position "i" in
// the data slice and return them as a number.
func getBits(data []byte, i int, n int) (uint32, error) {
var tmpData = make([]byte, 4)
length := len(data) * 8
nBits := n
if i+nBits > length { // If there are no n bits available
nBits = length - i // Copy all remaining bits
}
if err := copyBits(data, i, tmpData, len(tmpData)*8-n, nBits); err != nil {
return 0, err
}
var rv uint32
if err := binary.Read(bytes.NewBuffer(tmpData), binary.BigEndian, &rv); err != nil {
return 0, err
}
return rv, nil
}
// copyBits copies n bits between the specified byte slices, starting with the
// specified bit indexes.
func copyBits(from []byte, fromStartBit int, to []byte, toStartBit int, n int) error {
if fromStartBit+n > len(from)*8 {
return errors.New("not enough bits in the from slice")
}
if toStartBit+n > len(to)*8 {
return errors.New("not enough bits in the to slice")
}
for i := 0; i < n; i++ {
// read bit
fromBitI := fromStartBit + i
currentFromByte := fromBitI / 8
currentFromBit := fromBitI % 8
bitState := checkBit(from[currentFromByte], currentFromBit)
// write bit
toBitI := toStartBit + i
currentToByte := toBitI / 8
currentToBit := toBitI % 8
if bitState {
to[currentToByte] = setBit(to[currentToByte], currentToBit)
} else {
to[currentToByte] = clearBit(to[currentToByte], currentToBit)
}
}
return nil
}
// checkBit returns true if the specified bit is set to 1 and false otherwise.
func checkBit(b byte, i int) bool {
var mask byte = 1 << uint(7-i)
return b&mask != 0
}
// setBit returns the provided byte with the specified bit set to 1.
func setBit(b byte, i int) byte {
return b | 1<<uint(7-i)
}
// clearBit returns the provided byte with the specified bit set to 0.
func clearBit(b byte, i int) byte {
return b & ^(1 << uint(7-i))
}
// howManyWords returns the number of words needed to encode a hash of a
// specified length if each word represents a specified amount of bits.
func howManyWords(bitsPerWord int, hashLength int) int {
return int(math.Ceil(float64(hashLength) * 8.0 / float64(bitsPerWord)))
}
// howManyBitsPerWord returns the number of bits that can be represented using
// a single word from a dictionary of the specified size.
func howManyBitsPerWord(numberOfWords int) int {
return int(math.Floor(math.Log2(float64(numberOfWords))))
}
func validateDictionary(dictionary []string) error {
// Check length
if len(dictionary) < 2 {
return errors.New("dictionary must have at least 2 entries")
}
// Check duplicates
m := make(map[string]bool)
for _, word := range dictionary {
m[word] = true
}
if len(m) != len(dictionary) {
return errors.New("dictionary entries must be unique")
}
return nil
}