1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
|
package compression
import (
"bytes"
"compress/gzip"
"fmt"
"io"
"github.com/golang/snappy"
"github.com/klauspost/compress/zstd"
"github.com/pierrec/lz4/v4"
)
// nopCloser wraps an io.Reader to provide a no-op Close method
type nopCloser struct {
io.Reader
}
func (nopCloser) Close() error { return nil }
// CompressionCodec represents the compression codec used in Kafka record batches
type CompressionCodec int8
const (
None CompressionCodec = 0
Gzip CompressionCodec = 1
Snappy CompressionCodec = 2
Lz4 CompressionCodec = 3
Zstd CompressionCodec = 4
)
// String returns the string representation of the compression codec
func (c CompressionCodec) String() string {
switch c {
case None:
return "none"
case Gzip:
return "gzip"
case Snappy:
return "snappy"
case Lz4:
return "lz4"
case Zstd:
return "zstd"
default:
return fmt.Sprintf("unknown(%d)", c)
}
}
// IsValid returns true if the compression codec is valid
func (c CompressionCodec) IsValid() bool {
return c >= None && c <= Zstd
}
// ExtractCompressionCodec extracts the compression codec from record batch attributes
func ExtractCompressionCodec(attributes int16) CompressionCodec {
return CompressionCodec(attributes & 0x07) // Lower 3 bits
}
// SetCompressionCodec sets the compression codec in record batch attributes
func SetCompressionCodec(attributes int16, codec CompressionCodec) int16 {
return (attributes &^ 0x07) | int16(codec)
}
// Compress compresses data using the specified codec
func Compress(codec CompressionCodec, data []byte) ([]byte, error) {
if codec == None {
return data, nil
}
var buf bytes.Buffer
var writer io.WriteCloser
var err error
switch codec {
case Gzip:
writer = gzip.NewWriter(&buf)
case Snappy:
// Snappy doesn't have a streaming writer, so we compress directly
compressed := snappy.Encode(nil, data)
if compressed == nil {
compressed = []byte{}
}
return compressed, nil
case Lz4:
writer = lz4.NewWriter(&buf)
case Zstd:
writer, err = zstd.NewWriter(&buf)
if err != nil {
return nil, fmt.Errorf("failed to create zstd writer: %w", err)
}
default:
return nil, fmt.Errorf("unsupported compression codec: %s", codec)
}
if _, err := writer.Write(data); err != nil {
writer.Close()
return nil, fmt.Errorf("failed to write compressed data: %w", err)
}
if err := writer.Close(); err != nil {
return nil, fmt.Errorf("failed to close compressor: %w", err)
}
return buf.Bytes(), nil
}
// Decompress decompresses data using the specified codec
func Decompress(codec CompressionCodec, data []byte) ([]byte, error) {
if codec == None {
return data, nil
}
var reader io.ReadCloser
var err error
buf := bytes.NewReader(data)
switch codec {
case Gzip:
reader, err = gzip.NewReader(buf)
if err != nil {
return nil, fmt.Errorf("failed to create gzip reader: %w", err)
}
case Snappy:
// Snappy doesn't have a streaming reader, so we decompress directly
decompressed, err := snappy.Decode(nil, data)
if err != nil {
return nil, fmt.Errorf("failed to decompress snappy data: %w", err)
}
if decompressed == nil {
decompressed = []byte{}
}
return decompressed, nil
case Lz4:
lz4Reader := lz4.NewReader(buf)
// lz4.Reader doesn't implement Close, so we wrap it
reader = &nopCloser{Reader: lz4Reader}
case Zstd:
zstdReader, err := zstd.NewReader(buf)
if err != nil {
return nil, fmt.Errorf("failed to create zstd reader: %w", err)
}
defer zstdReader.Close()
var result bytes.Buffer
if _, err := io.Copy(&result, zstdReader); err != nil {
return nil, fmt.Errorf("failed to decompress zstd data: %w", err)
}
decompressed := result.Bytes()
if decompressed == nil {
decompressed = []byte{}
}
return decompressed, nil
default:
return nil, fmt.Errorf("unsupported compression codec: %s", codec)
}
defer reader.Close()
var result bytes.Buffer
if _, err := io.Copy(&result, reader); err != nil {
return nil, fmt.Errorf("failed to decompress data: %w", err)
}
decompressed := result.Bytes()
if decompressed == nil {
decompressed = []byte{}
}
return decompressed, nil
}
// CompressRecordBatch compresses the records portion of a Kafka record batch
// This function compresses only the records data, not the entire batch header
func CompressRecordBatch(codec CompressionCodec, recordsData []byte) ([]byte, int16, error) {
if codec == None {
return recordsData, 0, nil
}
compressed, err := Compress(codec, recordsData)
if err != nil {
return nil, 0, fmt.Errorf("failed to compress record batch: %w", err)
}
attributes := int16(codec)
return compressed, attributes, nil
}
// DecompressRecordBatch decompresses the records portion of a Kafka record batch
func DecompressRecordBatch(attributes int16, compressedData []byte) ([]byte, error) {
codec := ExtractCompressionCodec(attributes)
if codec == None {
return compressedData, nil
}
decompressed, err := Decompress(codec, compressedData)
if err != nil {
return nil, fmt.Errorf("failed to decompress record batch: %w", err)
}
return decompressed, nil
}
|