forked from benhoyt/go-1brc
/
r2.go
77 lines (67 loc) · 1.35 KB
/
r2.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
// r2: use stats pointer as map value to avoid double hashing
//
// ~921ms for 10M rows (1.09x as fast as r1)
package main
import (
"bufio"
"fmt"
"io"
"os"
"sort"
"strconv"
"strings"
)
func r2(inputPath string, output io.Writer) error {
type stats struct {
min, max, sum float64
count int64
}
f, err := os.Open(inputPath)
if err != nil {
return err
}
defer f.Close()
stationStats := make(map[string]*stats)
scanner := bufio.NewScanner(f)
for scanner.Scan() {
line := scanner.Text()
station, tempStr, hasSemi := strings.Cut(line, ";")
if !hasSemi {
continue
}
temp, err := strconv.ParseFloat(tempStr, 64)
if err != nil {
return err
}
s := stationStats[station]
if s == nil {
stationStats[station] = &stats{
min: temp,
max: temp,
sum: temp,
count: 1,
}
} else {
s.min = min(s.min, temp)
s.max = max(s.max, temp)
s.sum += temp
s.count++
}
}
stations := make([]string, 0, len(stationStats))
for station := range stationStats {
stations = append(stations, station)
}
sort.Strings(stations)
fmt.Fprint(output, "{")
for i, station := range stations {
if i > 0 {
fmt.Fprint(output, ", ")
}
s := stationStats[station]
mean := s.sum / float64(s.count)
fmt.Fprintf(output, "%s=%.1f/%.1f/%.1f", station, s.min, mean, s.max)
}
fmt.Fprint(output, "}\n")
return nil
}