aboutsummaryrefslogtreecommitdiff
path: root/weed/mq/kafka/schema/registry_client.go
blob: 8be7fbb790cb7e369a4b33c94e0880d081b74ce3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
package schema

import (
	"bytes"
	"encoding/json"
	"fmt"
	"io"
	"net/http"
	"sync"
	"time"
)

// RegistryClient provides access to a Confluent Schema Registry
type RegistryClient struct {
	baseURL    string
	httpClient *http.Client

	// Caching
	schemaCache      map[uint32]*CachedSchema  // schema ID -> schema
	subjectCache     map[string]*CachedSubject // subject -> latest version info
	negativeCache    map[string]time.Time      // subject -> time when 404 was cached
	cacheMu          sync.RWMutex
	cacheTTL         time.Duration
	negativeCacheTTL time.Duration // TTL for negative (404) cache entries
}

// CachedSchema represents a cached schema with metadata
type CachedSchema struct {
	ID       uint32    `json:"id"`
	Schema   string    `json:"schema"`
	Subject  string    `json:"subject"`
	Version  int       `json:"version"`
	Format   Format    `json:"-"` // Derived from schema content
	CachedAt time.Time `json:"-"`
}

// CachedSubject represents cached subject information
type CachedSubject struct {
	Subject  string    `json:"subject"`
	LatestID uint32    `json:"id"`
	Version  int       `json:"version"`
	Schema   string    `json:"schema"`
	CachedAt time.Time `json:"-"`
}

// RegistryConfig holds configuration for the Schema Registry client
type RegistryConfig struct {
	URL        string
	Username   string // Optional basic auth
	Password   string // Optional basic auth
	Timeout    time.Duration
	CacheTTL   time.Duration
	MaxRetries int
}

// NewRegistryClient creates a new Schema Registry client
func NewRegistryClient(config RegistryConfig) *RegistryClient {
	if config.Timeout == 0 {
		config.Timeout = 30 * time.Second
	}
	if config.CacheTTL == 0 {
		config.CacheTTL = 5 * time.Minute
	}

	httpClient := &http.Client{
		Timeout: config.Timeout,
	}

	return &RegistryClient{
		baseURL:          config.URL,
		httpClient:       httpClient,
		schemaCache:      make(map[uint32]*CachedSchema),
		subjectCache:     make(map[string]*CachedSubject),
		negativeCache:    make(map[string]time.Time),
		cacheTTL:         config.CacheTTL,
		negativeCacheTTL: 2 * time.Minute, // Cache 404s for 2 minutes
	}
}

// GetSchemaByID retrieves a schema by its ID
func (rc *RegistryClient) GetSchemaByID(schemaID uint32) (*CachedSchema, error) {
	// Check cache first
	rc.cacheMu.RLock()
	if cached, exists := rc.schemaCache[schemaID]; exists {
		if time.Since(cached.CachedAt) < rc.cacheTTL {
			rc.cacheMu.RUnlock()
			return cached, nil
		}
	}
	rc.cacheMu.RUnlock()

	// Fetch from registry
	url := fmt.Sprintf("%s/schemas/ids/%d", rc.baseURL, schemaID)
	resp, err := rc.httpClient.Get(url)
	if err != nil {
		return nil, fmt.Errorf("failed to fetch schema %d: %w", schemaID, err)
	}
	defer resp.Body.Close()

	if resp.StatusCode != http.StatusOK {
		body, _ := io.ReadAll(resp.Body)
		return nil, fmt.Errorf("schema registry error %d: %s", resp.StatusCode, string(body))
	}

	var schemaResp struct {
		Schema  string `json:"schema"`
		Subject string `json:"subject"`
		Version int    `json:"version"`
	}

	if err := json.NewDecoder(resp.Body).Decode(&schemaResp); err != nil {
		return nil, fmt.Errorf("failed to decode schema response: %w", err)
	}

	// Determine format from schema content
	format := rc.detectSchemaFormat(schemaResp.Schema)

	cached := &CachedSchema{
		ID:       schemaID,
		Schema:   schemaResp.Schema,
		Subject:  schemaResp.Subject,
		Version:  schemaResp.Version,
		Format:   format,
		CachedAt: time.Now(),
	}

	// Update cache
	rc.cacheMu.Lock()
	rc.schemaCache[schemaID] = cached
	rc.cacheMu.Unlock()

	return cached, nil
}

// GetLatestSchema retrieves the latest schema for a subject
func (rc *RegistryClient) GetLatestSchema(subject string) (*CachedSubject, error) {
	// Check positive cache first
	rc.cacheMu.RLock()
	if cached, exists := rc.subjectCache[subject]; exists {
		if time.Since(cached.CachedAt) < rc.cacheTTL {
			rc.cacheMu.RUnlock()
			return cached, nil
		}
	}

	// Check negative cache (404 cache)
	if cachedAt, exists := rc.negativeCache[subject]; exists {
		if time.Since(cachedAt) < rc.negativeCacheTTL {
			rc.cacheMu.RUnlock()
			return nil, fmt.Errorf("schema registry error 404: subject not found (cached)")
		}
	}
	rc.cacheMu.RUnlock()

	// Fetch from registry
	url := fmt.Sprintf("%s/subjects/%s/versions/latest", rc.baseURL, subject)
	resp, err := rc.httpClient.Get(url)
	if err != nil {
		return nil, fmt.Errorf("failed to fetch latest schema for %s: %w", subject, err)
	}
	defer resp.Body.Close()

	if resp.StatusCode != http.StatusOK {
		body, _ := io.ReadAll(resp.Body)

		// Cache 404 responses to avoid repeated lookups
		if resp.StatusCode == http.StatusNotFound {
			rc.cacheMu.Lock()
			rc.negativeCache[subject] = time.Now()
			rc.cacheMu.Unlock()
		}

		return nil, fmt.Errorf("schema registry error %d: %s", resp.StatusCode, string(body))
	}

	var schemaResp struct {
		ID      uint32 `json:"id"`
		Schema  string `json:"schema"`
		Subject string `json:"subject"`
		Version int    `json:"version"`
	}

	if err := json.NewDecoder(resp.Body).Decode(&schemaResp); err != nil {
		return nil, fmt.Errorf("failed to decode schema response: %w", err)
	}

	cached := &CachedSubject{
		Subject:  subject,
		LatestID: schemaResp.ID,
		Version:  schemaResp.Version,
		Schema:   schemaResp.Schema,
		CachedAt: time.Now(),
	}

	// Update cache and clear negative cache entry
	rc.cacheMu.Lock()
	rc.subjectCache[subject] = cached
	delete(rc.negativeCache, subject) // Clear any cached 404
	rc.cacheMu.Unlock()

	return cached, nil
}

// RegisterSchema registers a new schema for a subject
func (rc *RegistryClient) RegisterSchema(subject, schema string) (uint32, error) {
	url := fmt.Sprintf("%s/subjects/%s/versions", rc.baseURL, subject)

	reqBody := map[string]string{
		"schema": schema,
	}

	jsonData, err := json.Marshal(reqBody)
	if err != nil {
		return 0, fmt.Errorf("failed to marshal schema request: %w", err)
	}

	resp, err := rc.httpClient.Post(url, "application/json", bytes.NewBuffer(jsonData))
	if err != nil {
		return 0, fmt.Errorf("failed to register schema: %w", err)
	}
	defer resp.Body.Close()

	if resp.StatusCode != http.StatusOK {
		body, _ := io.ReadAll(resp.Body)
		return 0, fmt.Errorf("schema registry error %d: %s", resp.StatusCode, string(body))
	}

	var regResp struct {
		ID uint32 `json:"id"`
	}

	if err := json.NewDecoder(resp.Body).Decode(&regResp); err != nil {
		return 0, fmt.Errorf("failed to decode registration response: %w", err)
	}

	// Invalidate caches for this subject
	rc.cacheMu.Lock()
	delete(rc.subjectCache, subject)
	delete(rc.negativeCache, subject) // Clear any cached 404
	// Note: we don't cache the new schema here since we don't have full metadata
	rc.cacheMu.Unlock()

	return regResp.ID, nil
}

// CheckCompatibility checks if a schema is compatible with the subject
func (rc *RegistryClient) CheckCompatibility(subject, schema string) (bool, error) {
	url := fmt.Sprintf("%s/compatibility/subjects/%s/versions/latest", rc.baseURL, subject)

	reqBody := map[string]string{
		"schema": schema,
	}

	jsonData, err := json.Marshal(reqBody)
	if err != nil {
		return false, fmt.Errorf("failed to marshal compatibility request: %w", err)
	}

	resp, err := rc.httpClient.Post(url, "application/json", bytes.NewBuffer(jsonData))
	if err != nil {
		return false, fmt.Errorf("failed to check compatibility: %w", err)
	}
	defer resp.Body.Close()

	if resp.StatusCode != http.StatusOK {
		body, _ := io.ReadAll(resp.Body)
		return false, fmt.Errorf("schema registry error %d: %s", resp.StatusCode, string(body))
	}

	var compatResp struct {
		IsCompatible bool `json:"is_compatible"`
	}

	if err := json.NewDecoder(resp.Body).Decode(&compatResp); err != nil {
		return false, fmt.Errorf("failed to decode compatibility response: %w", err)
	}

	return compatResp.IsCompatible, nil
}

// ListSubjects returns all subjects in the registry
func (rc *RegistryClient) ListSubjects() ([]string, error) {
	url := fmt.Sprintf("%s/subjects", rc.baseURL)
	resp, err := rc.httpClient.Get(url)
	if err != nil {
		return nil, fmt.Errorf("failed to list subjects: %w", err)
	}
	defer resp.Body.Close()

	if resp.StatusCode != http.StatusOK {
		body, _ := io.ReadAll(resp.Body)
		return nil, fmt.Errorf("schema registry error %d: %s", resp.StatusCode, string(body))
	}

	var subjects []string
	if err := json.NewDecoder(resp.Body).Decode(&subjects); err != nil {
		return nil, fmt.Errorf("failed to decode subjects response: %w", err)
	}

	return subjects, nil
}

// ClearCache clears all cached schemas and subjects
func (rc *RegistryClient) ClearCache() {
	rc.cacheMu.Lock()
	defer rc.cacheMu.Unlock()

	rc.schemaCache = make(map[uint32]*CachedSchema)
	rc.subjectCache = make(map[string]*CachedSubject)
	rc.negativeCache = make(map[string]time.Time)
}

// GetCacheStats returns cache statistics
func (rc *RegistryClient) GetCacheStats() (schemaCount, subjectCount, negativeCacheCount int) {
	rc.cacheMu.RLock()
	defer rc.cacheMu.RUnlock()

	return len(rc.schemaCache), len(rc.subjectCache), len(rc.negativeCache)
}

// detectSchemaFormat attempts to determine the schema format from content
func (rc *RegistryClient) detectSchemaFormat(schema string) Format {
	// Try to parse as JSON first (Avro schemas are JSON)
	var jsonObj interface{}
	if err := json.Unmarshal([]byte(schema), &jsonObj); err == nil {
		// Check for Avro-specific fields
		if schemaMap, ok := jsonObj.(map[string]interface{}); ok {
			if schemaType, exists := schemaMap["type"]; exists {
				if typeStr, ok := schemaType.(string); ok {
					// Common Avro types
					avroTypes := []string{"record", "enum", "array", "map", "union", "fixed"}
					for _, avroType := range avroTypes {
						if typeStr == avroType {
							return FormatAvro
						}
					}
					// Common JSON Schema types (that are not Avro types)
					// Note: "string" is ambiguous - it could be Avro primitive or JSON Schema
					// We need to check other indicators first
					jsonSchemaTypes := []string{"object", "number", "integer", "boolean", "null"}
					for _, jsonSchemaType := range jsonSchemaTypes {
						if typeStr == jsonSchemaType {
							return FormatJSONSchema
						}
					}
				}
			}
			// Check for JSON Schema indicators
			if _, exists := schemaMap["$schema"]; exists {
				return FormatJSONSchema
			}
			// Check for JSON Schema properties field
			if _, exists := schemaMap["properties"]; exists {
				return FormatJSONSchema
			}
		}
		// Default JSON-based schema to Avro only if it doesn't look like JSON Schema
		return FormatAvro
	}

	// Check for Protobuf (typically not JSON)
	// Protobuf schemas in Schema Registry are usually stored as descriptors
	// For now, assume non-JSON schemas are Protobuf
	return FormatProtobuf
}

// HealthCheck verifies the registry is accessible
func (rc *RegistryClient) HealthCheck() error {
	url := fmt.Sprintf("%s/subjects", rc.baseURL)
	resp, err := rc.httpClient.Get(url)
	if err != nil {
		return fmt.Errorf("schema registry health check failed: %w", err)
	}
	defer resp.Body.Close()

	if resp.StatusCode != http.StatusOK {
		return fmt.Errorf("schema registry health check failed with status %d", resp.StatusCode)
	}

	return nil
}