1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
|
package schema
import (
"fmt"
"sync"
"google.golang.org/protobuf/proto"
"google.golang.org/protobuf/reflect/protodesc"
"google.golang.org/protobuf/reflect/protoreflect"
"google.golang.org/protobuf/reflect/protoregistry"
"google.golang.org/protobuf/types/descriptorpb"
"google.golang.org/protobuf/types/dynamicpb"
)
// ProtobufSchema represents a parsed Protobuf schema with message type information
type ProtobufSchema struct {
FileDescriptorSet *descriptorpb.FileDescriptorSet
MessageDescriptor protoreflect.MessageDescriptor
MessageName string
PackageName string
Dependencies []string
}
// ProtobufDescriptorParser handles parsing of Confluent Schema Registry Protobuf descriptors
type ProtobufDescriptorParser struct {
mu sync.RWMutex
// Cache for parsed descriptors to avoid re-parsing
descriptorCache map[string]*ProtobufSchema
}
// NewProtobufDescriptorParser creates a new parser instance
func NewProtobufDescriptorParser() *ProtobufDescriptorParser {
return &ProtobufDescriptorParser{
descriptorCache: make(map[string]*ProtobufSchema),
}
}
// ParseBinaryDescriptor parses a Confluent Schema Registry Protobuf binary descriptor
// The input is typically a serialized FileDescriptorSet from the schema registry
func (p *ProtobufDescriptorParser) ParseBinaryDescriptor(binaryData []byte, messageName string) (*ProtobufSchema, error) {
// Check cache first
cacheKey := fmt.Sprintf("%x:%s", binaryData[:min(32, len(binaryData))], messageName)
p.mu.RLock()
if cached, exists := p.descriptorCache[cacheKey]; exists {
p.mu.RUnlock()
// If we have a cached schema but no message descriptor, return the same error
if cached.MessageDescriptor == nil {
return cached, fmt.Errorf("failed to find message descriptor for %s: message descriptor resolution not fully implemented in Phase E1 - found message %s in package %s", messageName, messageName, cached.PackageName)
}
return cached, nil
}
p.mu.RUnlock()
// Parse the FileDescriptorSet from binary data
var fileDescriptorSet descriptorpb.FileDescriptorSet
if err := proto.Unmarshal(binaryData, &fileDescriptorSet); err != nil {
return nil, fmt.Errorf("failed to unmarshal FileDescriptorSet: %w", err)
}
// Validate the descriptor set
if err := p.validateDescriptorSet(&fileDescriptorSet); err != nil {
return nil, fmt.Errorf("invalid descriptor set: %w", err)
}
// If no message name provided, try to find the first available message
if messageName == "" {
messageName = p.findFirstMessageName(&fileDescriptorSet)
if messageName == "" {
return nil, fmt.Errorf("no messages found in FileDescriptorSet")
}
}
// Find the target message descriptor
messageDesc, packageName, err := p.findMessageDescriptor(&fileDescriptorSet, messageName)
if err != nil {
// For Phase E1, we still cache the FileDescriptorSet even if message resolution fails
// This allows us to test caching behavior and avoid re-parsing the same binary data
schema := &ProtobufSchema{
FileDescriptorSet: &fileDescriptorSet,
MessageDescriptor: nil, // Not resolved in Phase E1
MessageName: messageName,
PackageName: packageName,
Dependencies: p.extractDependencies(&fileDescriptorSet),
}
p.mu.Lock()
p.descriptorCache[cacheKey] = schema
p.mu.Unlock()
return schema, fmt.Errorf("failed to find message descriptor for %s: %w", messageName, err)
}
// Extract dependencies
dependencies := p.extractDependencies(&fileDescriptorSet)
// Create the schema object
schema := &ProtobufSchema{
FileDescriptorSet: &fileDescriptorSet,
MessageDescriptor: messageDesc,
MessageName: messageName,
PackageName: packageName,
Dependencies: dependencies,
}
// Cache the result
p.mu.Lock()
p.descriptorCache[cacheKey] = schema
p.mu.Unlock()
return schema, nil
}
// validateDescriptorSet performs basic validation on the FileDescriptorSet
func (p *ProtobufDescriptorParser) validateDescriptorSet(fds *descriptorpb.FileDescriptorSet) error {
if len(fds.File) == 0 {
return fmt.Errorf("FileDescriptorSet contains no files")
}
for i, file := range fds.File {
if file.Name == nil {
return fmt.Errorf("file descriptor %d has no name", i)
}
if file.Package == nil {
return fmt.Errorf("file descriptor %s has no package", *file.Name)
}
}
return nil
}
// findFirstMessageName finds the first message name in the FileDescriptorSet
func (p *ProtobufDescriptorParser) findFirstMessageName(fds *descriptorpb.FileDescriptorSet) string {
for _, file := range fds.File {
if len(file.MessageType) > 0 {
return file.MessageType[0].GetName()
}
}
return ""
}
// findMessageDescriptor locates a specific message descriptor within the FileDescriptorSet
func (p *ProtobufDescriptorParser) findMessageDescriptor(fds *descriptorpb.FileDescriptorSet, messageName string) (protoreflect.MessageDescriptor, string, error) {
// This is a simplified implementation for Phase E1
// In a complete implementation, we would:
// 1. Build a complete descriptor registry from the FileDescriptorSet
// 2. Resolve all imports and dependencies
// 3. Handle nested message types and packages correctly
// 4. Support fully qualified message names
for _, file := range fds.File {
packageName := ""
if file.Package != nil {
packageName = *file.Package
}
// Search for the message in this file
for _, messageType := range file.MessageType {
if messageType.Name != nil && *messageType.Name == messageName {
// Try to build a proper descriptor from the FileDescriptorProto
fileDesc, err := p.buildFileDescriptor(file)
if err != nil {
return nil, packageName, fmt.Errorf("failed to build file descriptor: %w", err)
}
// Find the message descriptor in the built file
msgDesc := p.findMessageInFileDescriptor(fileDesc, messageName)
if msgDesc != nil {
return msgDesc, packageName, nil
}
return nil, packageName, fmt.Errorf("message descriptor built but not found: %s", messageName)
}
// Search nested messages (simplified)
if nestedDesc := p.searchNestedMessages(messageType, messageName); nestedDesc != nil {
// Try to build descriptor for nested message
fileDesc, err := p.buildFileDescriptor(file)
if err != nil {
return nil, packageName, fmt.Errorf("failed to build file descriptor for nested message: %w", err)
}
msgDesc := p.findMessageInFileDescriptor(fileDesc, messageName)
if msgDesc != nil {
return msgDesc, packageName, nil
}
return nil, packageName, fmt.Errorf("nested message descriptor built but not found: %s", messageName)
}
}
}
return nil, "", fmt.Errorf("message %s not found in descriptor set", messageName)
}
// buildFileDescriptor builds a protoreflect.FileDescriptor from a FileDescriptorProto
func (p *ProtobufDescriptorParser) buildFileDescriptor(fileProto *descriptorpb.FileDescriptorProto) (protoreflect.FileDescriptor, error) {
// Create a local registry to avoid conflicts
localFiles := &protoregistry.Files{}
// Build the file descriptor using protodesc
fileDesc, err := protodesc.NewFile(fileProto, localFiles)
if err != nil {
return nil, fmt.Errorf("failed to create file descriptor: %w", err)
}
return fileDesc, nil
}
// findMessageInFileDescriptor searches for a message descriptor within a file descriptor
func (p *ProtobufDescriptorParser) findMessageInFileDescriptor(fileDesc protoreflect.FileDescriptor, messageName string) protoreflect.MessageDescriptor {
// Search top-level messages
messages := fileDesc.Messages()
for i := 0; i < messages.Len(); i++ {
msgDesc := messages.Get(i)
if string(msgDesc.Name()) == messageName {
return msgDesc
}
// Search nested messages
if nestedDesc := p.findNestedMessageDescriptor(msgDesc, messageName); nestedDesc != nil {
return nestedDesc
}
}
return nil
}
// findNestedMessageDescriptor recursively searches for nested messages
func (p *ProtobufDescriptorParser) findNestedMessageDescriptor(msgDesc protoreflect.MessageDescriptor, messageName string) protoreflect.MessageDescriptor {
nestedMessages := msgDesc.Messages()
for i := 0; i < nestedMessages.Len(); i++ {
nestedDesc := nestedMessages.Get(i)
if string(nestedDesc.Name()) == messageName {
return nestedDesc
}
// Recursively search deeper nested messages
if deeperNested := p.findNestedMessageDescriptor(nestedDesc, messageName); deeperNested != nil {
return deeperNested
}
}
return nil
}
// searchNestedMessages recursively searches for nested message types
func (p *ProtobufDescriptorParser) searchNestedMessages(messageType *descriptorpb.DescriptorProto, targetName string) *descriptorpb.DescriptorProto {
for _, nested := range messageType.NestedType {
if nested.Name != nil && *nested.Name == targetName {
return nested
}
// Recursively search deeper nesting
if found := p.searchNestedMessages(nested, targetName); found != nil {
return found
}
}
return nil
}
// extractDependencies extracts the list of dependencies from the FileDescriptorSet
func (p *ProtobufDescriptorParser) extractDependencies(fds *descriptorpb.FileDescriptorSet) []string {
dependencySet := make(map[string]bool)
for _, file := range fds.File {
for _, dep := range file.Dependency {
dependencySet[dep] = true
}
}
dependencies := make([]string, 0, len(dependencySet))
for dep := range dependencySet {
dependencies = append(dependencies, dep)
}
return dependencies
}
// GetMessageFields returns information about the fields in the message
func (s *ProtobufSchema) GetMessageFields() ([]FieldInfo, error) {
if s.FileDescriptorSet == nil {
return nil, fmt.Errorf("no FileDescriptorSet available")
}
// Find the message descriptor for this schema
messageDesc := s.findMessageDescriptor(s.MessageName)
if messageDesc == nil {
return nil, fmt.Errorf("message %s not found in descriptor set", s.MessageName)
}
// Extract field information
fields := make([]FieldInfo, 0, len(messageDesc.Field))
for _, field := range messageDesc.Field {
fieldInfo := FieldInfo{
Name: field.GetName(),
Number: field.GetNumber(),
Type: s.fieldTypeToString(field.GetType()),
Label: s.fieldLabelToString(field.GetLabel()),
}
// Set TypeName for message/enum types
if field.GetTypeName() != "" {
fieldInfo.TypeName = field.GetTypeName()
}
fields = append(fields, fieldInfo)
}
return fields, nil
}
// FieldInfo represents information about a Protobuf field
type FieldInfo struct {
Name string
Number int32
Type string
Label string // optional, required, repeated
TypeName string // for message/enum types
}
// GetFieldByName returns information about a specific field
func (s *ProtobufSchema) GetFieldByName(fieldName string) (*FieldInfo, error) {
fields, err := s.GetMessageFields()
if err != nil {
return nil, err
}
for _, field := range fields {
if field.Name == fieldName {
return &field, nil
}
}
return nil, fmt.Errorf("field %s not found", fieldName)
}
// GetFieldByNumber returns information about a field by its number
func (s *ProtobufSchema) GetFieldByNumber(fieldNumber int32) (*FieldInfo, error) {
fields, err := s.GetMessageFields()
if err != nil {
return nil, err
}
for _, field := range fields {
if field.Number == fieldNumber {
return &field, nil
}
}
return nil, fmt.Errorf("field number %d not found", fieldNumber)
}
// findMessageDescriptor finds a message descriptor by name in the FileDescriptorSet
func (s *ProtobufSchema) findMessageDescriptor(messageName string) *descriptorpb.DescriptorProto {
if s.FileDescriptorSet == nil {
return nil
}
for _, file := range s.FileDescriptorSet.File {
// Check top-level messages
for _, message := range file.MessageType {
if message.GetName() == messageName {
return message
}
// Check nested messages
if nested := searchNestedMessages(message, messageName); nested != nil {
return nested
}
}
}
return nil
}
// searchNestedMessages recursively searches for nested message types
func searchNestedMessages(messageType *descriptorpb.DescriptorProto, targetName string) *descriptorpb.DescriptorProto {
for _, nested := range messageType.NestedType {
if nested.Name != nil && *nested.Name == targetName {
return nested
}
// Recursively search deeper nesting
if found := searchNestedMessages(nested, targetName); found != nil {
return found
}
}
return nil
}
// fieldTypeToString converts a FieldDescriptorProto_Type to string
func (s *ProtobufSchema) fieldTypeToString(fieldType descriptorpb.FieldDescriptorProto_Type) string {
switch fieldType {
case descriptorpb.FieldDescriptorProto_TYPE_DOUBLE:
return "double"
case descriptorpb.FieldDescriptorProto_TYPE_FLOAT:
return "float"
case descriptorpb.FieldDescriptorProto_TYPE_INT64:
return "int64"
case descriptorpb.FieldDescriptorProto_TYPE_UINT64:
return "uint64"
case descriptorpb.FieldDescriptorProto_TYPE_INT32:
return "int32"
case descriptorpb.FieldDescriptorProto_TYPE_FIXED64:
return "fixed64"
case descriptorpb.FieldDescriptorProto_TYPE_FIXED32:
return "fixed32"
case descriptorpb.FieldDescriptorProto_TYPE_BOOL:
return "bool"
case descriptorpb.FieldDescriptorProto_TYPE_STRING:
return "string"
case descriptorpb.FieldDescriptorProto_TYPE_GROUP:
return "group"
case descriptorpb.FieldDescriptorProto_TYPE_MESSAGE:
return "message"
case descriptorpb.FieldDescriptorProto_TYPE_BYTES:
return "bytes"
case descriptorpb.FieldDescriptorProto_TYPE_UINT32:
return "uint32"
case descriptorpb.FieldDescriptorProto_TYPE_ENUM:
return "enum"
case descriptorpb.FieldDescriptorProto_TYPE_SFIXED32:
return "sfixed32"
case descriptorpb.FieldDescriptorProto_TYPE_SFIXED64:
return "sfixed64"
case descriptorpb.FieldDescriptorProto_TYPE_SINT32:
return "sint32"
case descriptorpb.FieldDescriptorProto_TYPE_SINT64:
return "sint64"
default:
return "unknown"
}
}
// fieldLabelToString converts a FieldDescriptorProto_Label to string
func (s *ProtobufSchema) fieldLabelToString(label descriptorpb.FieldDescriptorProto_Label) string {
switch label {
case descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL:
return "optional"
case descriptorpb.FieldDescriptorProto_LABEL_REQUIRED:
return "required"
case descriptorpb.FieldDescriptorProto_LABEL_REPEATED:
return "repeated"
default:
return "unknown"
}
}
// ValidateMessage validates that a message conforms to the schema
func (s *ProtobufSchema) ValidateMessage(messageData []byte) error {
if s.MessageDescriptor == nil {
return fmt.Errorf("no message descriptor available for validation")
}
// Create a dynamic message from the descriptor
msgType := dynamicpb.NewMessageType(s.MessageDescriptor)
msg := msgType.New()
// Try to unmarshal the message data
if err := proto.Unmarshal(messageData, msg.Interface()); err != nil {
return fmt.Errorf("message validation failed: %w", err)
}
// Basic validation passed - the message can be unmarshaled with the schema
return nil
}
// ClearCache clears the descriptor cache
func (p *ProtobufDescriptorParser) ClearCache() {
p.mu.Lock()
defer p.mu.Unlock()
p.descriptorCache = make(map[string]*ProtobufSchema)
}
// GetCacheStats returns statistics about the descriptor cache
func (p *ProtobufDescriptorParser) GetCacheStats() map[string]interface{} {
p.mu.RLock()
defer p.mu.RUnlock()
return map[string]interface{}{
"cached_descriptors": len(p.descriptorCache),
}
}
// Helper function for min
func min(a, b int) int {
if a < b {
return a
}
return b
}
|