aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.github/workflows/deploy_telemetry.yml157
-rw-r--r--telemetry/DEPLOYMENT.md271
-rw-r--r--telemetry/README.md351
-rw-r--r--telemetry/docker-compose.yml55
-rw-r--r--telemetry/grafana-dashboard.json734
-rw-r--r--telemetry/grafana-provisioning/dashboards/dashboards.yml12
-rw-r--r--telemetry/grafana-provisioning/datasources/prometheus.yml9
-rw-r--r--telemetry/prometheus.yml15
-rw-r--r--telemetry/proto/telemetry.pb.go398
-rw-r--r--telemetry/proto/telemetry.proto52
-rw-r--r--telemetry/server/Dockerfile18
-rw-r--r--telemetry/server/Makefile97
-rw-r--r--telemetry/server/api/handlers.go152
-rw-r--r--telemetry/server/dashboard/dashboard.go278
-rw-r--r--telemetry/server/go.sum31
-rw-r--r--telemetry/server/main.go111
-rw-r--r--telemetry/server/storage/prometheus.go245
-rw-r--r--telemetry/test/integration.go315
-rw-r--r--weed/command/master.go6
-rw-r--r--weed/command/server.go2
-rw-r--r--weed/server/master_server.go30
-rw-r--r--weed/telemetry/client.go100
-rw-r--r--weed/telemetry/collector.go218
23 files changed, 3657 insertions, 0 deletions
diff --git a/.github/workflows/deploy_telemetry.yml b/.github/workflows/deploy_telemetry.yml
new file mode 100644
index 000000000..8f10af0ce
--- /dev/null
+++ b/.github/workflows/deploy_telemetry.yml
@@ -0,0 +1,157 @@
+# This workflow will build and deploy the SeaweedFS telemetry server
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-go
+
+name: Deploy Telemetry Server
+
+on:
+ push:
+ branches: [ "master" ]
+ paths:
+ - 'telemetry/**'
+ workflow_dispatch:
+ inputs:
+ setup:
+ description: 'Run first-time server setup'
+ required: true
+ type: boolean
+ default: false
+ deploy:
+ description: 'Deploy telemetry server to remote server'
+ required: true
+ type: boolean
+ default: false
+
+jobs:
+ deploy:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Set up Go
+ uses: actions/setup-go@v4
+ with:
+ go-version: '1.24'
+
+ - name: Build Telemetry Server
+ run: |
+ go mod tidy
+ cd telemetry/server
+ GOOS=linux GOARCH=amd64 go build -o telemetry-server main.go
+
+ - name: First-time Server Setup
+ if: github.event_name == 'workflow_dispatch' && inputs.setup
+ env:
+ SSH_PRIVATE_KEY: ${{ secrets.TELEMETRY_SSH_PRIVATE_KEY }}
+ REMOTE_HOST: ${{ secrets.TELEMETRY_HOST }}
+ REMOTE_USER: ${{ secrets.TELEMETRY_USER }}
+ run: |
+ mkdir -p ~/.ssh
+ echo "$SSH_PRIVATE_KEY" > ~/.ssh/deploy_key
+ chmod 600 ~/.ssh/deploy_key
+ echo "Host *" > ~/.ssh/config
+ echo " StrictHostKeyChecking no" >> ~/.ssh/config
+
+ # Create all required directories with proper permissions
+ ssh -i ~/.ssh/deploy_key $REMOTE_USER@$REMOTE_HOST "
+ mkdir -p ~/seaweedfs-telemetry/bin ~/seaweedfs-telemetry/logs ~/seaweedfs-telemetry/data ~/seaweedfs-telemetry/tmp && \
+ chmod 755 ~/seaweedfs-telemetry/logs && \
+ chmod 755 ~/seaweedfs-telemetry/data && \
+ touch ~/seaweedfs-telemetry/logs/telemetry.log ~/seaweedfs-telemetry/logs/telemetry.error.log && \
+ chmod 644 ~/seaweedfs-telemetry/logs/*.log"
+
+ # Create systemd service file
+ echo "
+ [Unit]
+ Description=SeaweedFS Telemetry Server
+ After=network.target
+
+ [Service]
+ Type=simple
+ User=$REMOTE_USER
+ WorkingDirectory=/home/$REMOTE_USER/seaweedfs-telemetry
+ ExecStart=/home/$REMOTE_USER/seaweedfs-telemetry/bin/telemetry-server -port=8353
+ Restart=always
+ RestartSec=5
+ StandardOutput=append:/home/$REMOTE_USER/seaweedfs-telemetry/logs/telemetry.log
+ StandardError=append:/home/$REMOTE_USER/seaweedfs-telemetry/logs/telemetry.error.log
+
+ [Install]
+ WantedBy=multi-user.target" > telemetry.service
+
+ # Setup logrotate configuration
+ echo "# SeaweedFS Telemetry service log rotation
+ /home/$REMOTE_USER/seaweedfs-telemetry/logs/*.log {
+ daily
+ rotate 30
+ compress
+ delaycompress
+ missingok
+ notifempty
+ create 644 $REMOTE_USER $REMOTE_USER
+ postrotate
+ systemctl restart telemetry.service
+ endscript
+ }" > telemetry_logrotate
+
+ # Copy Grafana dashboard and Prometheus config
+ scp -i ~/.ssh/deploy_key telemetry/grafana-dashboard.json $REMOTE_USER@$REMOTE_HOST:~/seaweedfs-telemetry/
+ scp -i ~/.ssh/deploy_key telemetry/prometheus.yml $REMOTE_USER@$REMOTE_HOST:~/seaweedfs-telemetry/
+
+ # Copy and install service and logrotate files
+ scp -i ~/.ssh/deploy_key telemetry.service telemetry_logrotate $REMOTE_USER@$REMOTE_HOST:~/seaweedfs-telemetry/
+ ssh -i ~/.ssh/deploy_key $REMOTE_USER@$REMOTE_HOST "
+ sudo mv ~/seaweedfs-telemetry/telemetry.service /etc/systemd/system/ && \
+ sudo mv ~/seaweedfs-telemetry/telemetry_logrotate /etc/logrotate.d/seaweedfs-telemetry && \
+ sudo systemctl daemon-reload && \
+ sudo systemctl enable telemetry.service"
+
+ rm -f ~/.ssh/deploy_key
+
+ - name: Deploy Telemetry Server to Remote Server
+ if: (github.event_name == 'push' && contains(github.ref, 'refs/heads/master')) || (github.event_name == 'workflow_dispatch' && inputs.deploy)
+ env:
+ SSH_PRIVATE_KEY: ${{ secrets.TELEMETRY_SSH_PRIVATE_KEY }}
+ REMOTE_HOST: ${{ secrets.TELEMETRY_HOST }}
+ REMOTE_USER: ${{ secrets.TELEMETRY_USER }}
+ run: |
+ mkdir -p ~/.ssh
+ echo "$SSH_PRIVATE_KEY" > ~/.ssh/deploy_key
+ chmod 600 ~/.ssh/deploy_key
+ echo "Host *" > ~/.ssh/config
+ echo " StrictHostKeyChecking no" >> ~/.ssh/config
+
+ # Create temp directory and copy binary
+ ssh -i ~/.ssh/deploy_key $REMOTE_USER@$REMOTE_HOST "mkdir -p ~/seaweedfs-telemetry/tmp"
+ scp -i ~/.ssh/deploy_key telemetry/server/telemetry-server $REMOTE_USER@$REMOTE_HOST:~/seaweedfs-telemetry/tmp/
+
+ # Copy updated configuration files
+ scp -i ~/.ssh/deploy_key telemetry/grafana-dashboard.json $REMOTE_USER@$REMOTE_HOST:~/seaweedfs-telemetry/
+ scp -i ~/.ssh/deploy_key telemetry/prometheus.yml $REMOTE_USER@$REMOTE_HOST:~/seaweedfs-telemetry/
+
+ # Stop service, move binary, and restart
+ ssh -i ~/.ssh/deploy_key $REMOTE_USER@$REMOTE_HOST "
+ sudo systemctl stop telemetry.service || true && \
+ mkdir -p ~/seaweedfs-telemetry/bin && \
+ mv ~/seaweedfs-telemetry/tmp/telemetry-server ~/seaweedfs-telemetry/bin/ && \
+ chmod +x ~/seaweedfs-telemetry/bin/telemetry-server && \
+ sudo systemctl start telemetry.service && \
+ sudo systemctl status telemetry.service"
+
+ # Verify deployment
+ ssh -i ~/.ssh/deploy_key $REMOTE_USER@$REMOTE_HOST "
+ echo 'Waiting for service to start...'
+ sleep 5
+ curl -f http://localhost:8353/health || echo 'Health check failed'"
+
+ rm -f ~/.ssh/deploy_key
+
+ - name: Notify Deployment Status
+ if: always()
+ run: |
+ if [ "${{ job.status }}" == "success" ]; then
+ echo "✅ Telemetry server deployment successful"
+ echo "Dashboard: http://${{ secrets.TELEMETRY_HOST }}:8353"
+ echo "Metrics: http://${{ secrets.TELEMETRY_HOST }}:8353/metrics"
+ else
+ echo "❌ Telemetry server deployment failed"
+ fi \ No newline at end of file
diff --git a/telemetry/DEPLOYMENT.md b/telemetry/DEPLOYMENT.md
new file mode 100644
index 000000000..d5cd69154
--- /dev/null
+++ b/telemetry/DEPLOYMENT.md
@@ -0,0 +1,271 @@
+# SeaweedFS Telemetry Server Deployment
+
+This document describes how to deploy the SeaweedFS telemetry server to a remote server using GitHub Actions.
+
+## Prerequisites
+
+1. A remote Linux server with:
+ - SSH access
+ - systemd (for service management)
+ - Optional: Prometheus and Grafana (for monitoring)
+
+2. GitHub repository secrets configured (see [Setup GitHub Secrets](#setup-github-secrets) below):
+ - `TELEMETRY_SSH_PRIVATE_KEY`: SSH private key for accessing the remote server
+ - `TELEMETRY_HOST`: Remote server hostname or IP address
+ - `TELEMETRY_USER`: Username for SSH access
+
+## Setup GitHub Secrets
+
+Before using the deployment workflow, you need to configure the required secrets in your GitHub repository.
+
+### Step 1: Generate SSH Key Pair
+
+On your local machine, generate a new SSH key pair specifically for deployment:
+
+```bash
+# Generate a new SSH key pair
+ssh-keygen -t ed25519 -C "seaweedfs-telemetry-deploy" -f ~/.ssh/seaweedfs_telemetry_deploy
+
+# This creates two files:
+# ~/.ssh/seaweedfs_telemetry_deploy (private key)
+# ~/.ssh/seaweedfs_telemetry_deploy.pub (public key)
+```
+
+### Step 2: Configure Remote Server
+
+Copy the public key to your remote server:
+
+```bash
+# Copy public key to remote server
+ssh-copy-id -i ~/.ssh/seaweedfs_telemetry_deploy.pub user@your-server.com
+
+# Or manually append to authorized_keys
+cat ~/.ssh/seaweedfs_telemetry_deploy.pub | ssh user@your-server.com "mkdir -p ~/.ssh && cat >> ~/.ssh/authorized_keys"
+```
+
+Test the SSH connection:
+
+```bash
+# Test SSH connection with the new key
+ssh -i ~/.ssh/seaweedfs_telemetry_deploy user@your-server.com "echo 'SSH connection successful'"
+```
+
+### Step 3: Add Secrets to GitHub Repository
+
+1. Go to your GitHub repository
+2. Click on **Settings** tab
+3. In the sidebar, click **Secrets and variables** → **Actions**
+4. Click **New repository secret** for each of the following:
+
+#### TELEMETRY_SSH_PRIVATE_KEY
+
+```bash
+# Display the private key content
+cat ~/.ssh/seaweedfs_telemetry_deploy
+```
+
+- **Name**: `TELEMETRY_SSH_PRIVATE_KEY`
+- **Value**: Copy the entire private key content, including the `-----BEGIN OPENSSH PRIVATE KEY-----` and `-----END OPENSSH PRIVATE KEY-----` lines
+
+#### TELEMETRY_HOST
+
+- **Name**: `TELEMETRY_HOST`
+- **Value**: Your server's hostname or IP address (e.g., `telemetry.example.com` or `192.168.1.100`)
+
+#### TELEMETRY_USER
+
+- **Name**: `TELEMETRY_USER`
+- **Value**: The username on the remote server (e.g., `ubuntu`, `deploy`, or your username)
+
+### Step 4: Verify Configuration
+
+Create a simple test workflow or manually trigger the deployment to verify the secrets are working correctly.
+
+### Security Best Practices
+
+1. **Dedicated SSH Key**: Use a separate SSH key only for deployment
+2. **Limited Permissions**: Create a dedicated user on the remote server with minimal required permissions
+3. **Key Rotation**: Regularly rotate SSH keys
+4. **Server Access**: Restrict SSH access to specific IP ranges if possible
+
+### Example Server Setup
+
+If you're setting up a new server, here's a basic configuration:
+
+```bash
+# On the remote server, create a dedicated user for deployment
+sudo useradd -m -s /bin/bash seaweedfs-deploy
+sudo usermod -aG sudo seaweedfs-deploy # Only if sudo access is needed
+
+# Switch to the deployment user
+sudo su - seaweedfs-deploy
+
+# Create SSH directory
+mkdir -p ~/.ssh
+chmod 700 ~/.ssh
+
+# Add your public key (paste the content of seaweedfs_telemetry_deploy.pub)
+nano ~/.ssh/authorized_keys
+chmod 600 ~/.ssh/authorized_keys
+```
+
+### Troubleshooting
+
+#### SSH Connection Issues
+
+```bash
+# Test SSH connection manually
+ssh -i ~/.ssh/seaweedfs_telemetry_deploy -v user@your-server.com
+
+# Check SSH key permissions
+ls -la ~/.ssh/seaweedfs_telemetry_deploy*
+# Should show: -rw------- for private key, -rw-r--r-- for public key
+```
+
+#### GitHub Actions Fails
+
+1. **Check secrets**: Ensure all three secrets are properly set in GitHub
+2. **Verify SSH key**: Make sure the entire private key (including headers/footers) is copied
+3. **Test connectivity**: Manually SSH to the server from your local machine
+4. **Check user permissions**: Ensure the remote user has necessary permissions
+
+## GitHub Actions Workflow
+
+The deployment workflow (`.github/workflows/deploy_telemetry.yml`) provides two main operations:
+
+### 1. First-time Setup
+
+Run this once to set up the remote server:
+
+1. Go to GitHub Actions in your repository
+2. Select "Deploy Telemetry Server" workflow
+3. Click "Run workflow"
+4. Check "Run first-time server setup"
+5. Click "Run workflow"
+
+This will:
+- Create necessary directories on the remote server
+- Set up systemd service configuration
+- Configure log rotation
+- Upload Grafana dashboard and Prometheus configuration
+
+
+### 2. Deploy Updates
+
+Deployments happen automatically when:
+- Code is pushed to the `master` branch with changes in the `telemetry/` directory
+
+Or manually trigger deployment:
+1. Go to GitHub Actions in your repository
+2. Select "Deploy Telemetry Server" workflow
+3. Click "Run workflow"
+4. Check "Deploy telemetry server to remote server"
+5. Click "Run workflow"
+
+## Server Directory Structure
+
+After setup, the remote server will have:
+
+```
+~/seaweedfs-telemetry/
+├── bin/
+│ └── telemetry-server # Binary executable
+├── logs/
+│ ├── telemetry.log # Application logs
+│ └── telemetry.error.log # Error logs
+├── data/ # Data directory (if needed)
+├── grafana-dashboard.json # Grafana dashboard configuration
+└── prometheus.yml # Prometheus configuration
+```
+
+## Service Management
+
+The telemetry server runs as a systemd service:
+
+```bash
+# Check service status
+sudo systemctl status telemetry.service
+
+# View logs
+sudo journalctl -u telemetry.service -f
+
+# Restart service
+sudo systemctl restart telemetry.service
+
+# Stop/start service
+sudo systemctl stop telemetry.service
+sudo systemctl start telemetry.service
+```
+
+## Accessing the Service
+
+After deployment, the telemetry server will be available at:
+
+- **Dashboard**: `http://your-server:8353`
+- **API**: `http://your-server:8353/api/*`
+- **Metrics**: `http://your-server:8353/metrics`
+- **Health Check**: `http://your-server:8353/health`
+
+## Optional: Prometheus and Grafana Integration
+
+### Prometheus Setup
+
+1. Install Prometheus on your server
+2. Update `/etc/prometheus/prometheus.yml` to include:
+ ```yaml
+ scrape_configs:
+ - job_name: 'seaweedfs-telemetry'
+ static_configs:
+ - targets: ['localhost:8353']
+ metrics_path: '/metrics'
+ ```
+
+### Grafana Setup
+
+1. Install Grafana on your server
+2. Import the dashboard from `~/seaweedfs-telemetry/grafana-dashboard.json`
+3. Configure Prometheus as a data source pointing to your Prometheus instance
+
+## Troubleshooting
+
+### Deployment Fails
+
+1. Check GitHub Actions logs for detailed error messages
+2. Verify SSH connectivity: `ssh user@host`
+3. Ensure all required secrets are configured in GitHub
+
+### Service Won't Start
+
+1. Check service logs: `sudo journalctl -u telemetry.service`
+2. Verify binary permissions: `ls -la ~/seaweedfs-telemetry/bin/`
+3. Test binary manually: `~/seaweedfs-telemetry/bin/telemetry-server -help`
+
+### Port Conflicts
+
+If port 8353 is already in use:
+
+1. Edit the systemd service: `sudo systemctl edit telemetry.service`
+2. Add override configuration:
+ ```ini
+ [Service]
+ ExecStart=
+ ExecStart=/home/user/seaweedfs-telemetry/bin/telemetry-server -port=8354
+ ```
+3. Reload and restart: `sudo systemctl daemon-reload && sudo systemctl restart telemetry.service`
+
+## Security Considerations
+
+1. **Firewall**: Consider restricting access to telemetry ports
+2. **SSH Keys**: Use dedicated SSH keys with minimal permissions
+3. **User Permissions**: Run the service as a non-privileged user
+4. **Network**: Consider running on internal networks only
+
+## Monitoring
+
+Monitor the deployment and service health:
+
+- **GitHub Actions**: Check workflow runs for deployment status
+- **System Logs**: `sudo journalctl -u telemetry.service`
+- **Application Logs**: `tail -f ~/seaweedfs-telemetry/logs/telemetry.log`
+- **Health Endpoint**: `curl http://localhost:8353/health`
+- **Metrics**: `curl http://localhost:8353/metrics` \ No newline at end of file
diff --git a/telemetry/README.md b/telemetry/README.md
new file mode 100644
index 000000000..aee050943
--- /dev/null
+++ b/telemetry/README.md
@@ -0,0 +1,351 @@
+# SeaweedFS Telemetry System
+
+A privacy-respecting telemetry system for SeaweedFS that collects cluster-level usage statistics and provides visualization through Prometheus and Grafana.
+
+## Features
+
+- **Privacy-First Design**: Uses in-memory cluster IDs (regenerated on restart), no personal data collection
+- **Prometheus Integration**: Native Prometheus metrics for monitoring and alerting
+- **Grafana Dashboards**: Pre-built dashboards for data visualization
+- **Protocol Buffers**: Efficient binary data transmission for optimal performance
+- **Opt-in Only**: Disabled by default, requires explicit configuration
+- **Docker Compose**: Complete monitoring stack deployment
+- **Automatic Cleanup**: Configurable data retention policies
+
+## Architecture
+
+```
+SeaweedFS Cluster → Telemetry Client → Telemetry Server → Prometheus → Grafana
+ (protobuf) (metrics) (queries)
+```
+
+## Data Transmission
+
+The telemetry system uses **Protocol Buffers exclusively** for efficient binary data transmission:
+
+- **Compact Format**: 30-50% smaller than JSON
+- **Fast Serialization**: Better performance than text-based formats
+- **Type Safety**: Strong typing with generated Go structs
+- **Schema Evolution**: Built-in versioning support
+
+### Protobuf Schema
+
+```protobuf
+message TelemetryData {
+ string cluster_id = 1; // In-memory generated UUID
+ string version = 2; // SeaweedFS version
+ string os = 3; // Operating system
+ repeated string features = 4; // Enabled features
+ string deployment = 5; // Deployment type
+ int32 volume_server_count = 6; // Number of volume servers
+ uint64 total_disk_bytes = 7; // Total disk usage
+ int32 total_volume_count = 8; // Total volume count
+ int64 timestamp = 9; // Collection timestamp
+}
+```
+
+## Privacy Approach
+
+- **No Personal Data**: No hostnames, IP addresses, or user information
+- **In-Memory IDs**: Cluster IDs are generated in-memory and change on restart
+- **Aggregated Data**: Only cluster-level statistics, no individual file/user data
+- **Opt-in Only**: Telemetry is disabled by default
+- **Transparent**: Open source implementation, clear data collection policy
+
+## Collected Data
+
+| Field | Description | Example |
+|-------|-------------|---------|
+| `cluster_id` | In-memory UUID (changes on restart) | `a1b2c3d4-...` |
+| `version` | SeaweedFS version | `3.45` |
+| `os` | Operating system and architecture | `linux/amd64` |
+| `features` | Enabled components | `["filer", "s3api"]` |
+| `deployment` | Deployment type | `cluster` |
+| `volume_server_count` | Number of volume servers | `5` |
+| `total_disk_bytes` | Total disk usage across cluster | `1073741824` |
+| `total_volume_count` | Total number of volumes | `120` |
+| `timestamp` | When data was collected | `1640995200` |
+
+## Quick Start
+
+### 1. Deploy Telemetry Server
+
+```bash
+# Clone and start the complete monitoring stack
+git clone https://github.com/seaweedfs/seaweedfs.git
+cd seaweedfs/telemetry
+docker-compose up -d
+
+# Or run the server directly
+cd server
+go run . -port=8080 -dashboard=true
+```
+
+### 2. Configure SeaweedFS
+
+```bash
+# Enable telemetry in SeaweedFS master (uses default telemetry.seaweedfs.com:3091)
+weed master -telemetry=true
+
+# Or in server mode
+weed server -telemetry=true
+
+# Or specify custom telemetry server
+weed master -telemetry=true -telemetry.url=http://localhost:8080/api/collect
+```
+
+### 3. Access Dashboards
+
+- **Telemetry Server**: http://localhost:8080
+- **Prometheus**: http://localhost:9090
+- **Grafana**: http://localhost:3000 (admin/admin)
+
+## Configuration
+
+### SeaweedFS Master/Server
+
+```bash
+# Enable telemetry
+-telemetry=true
+
+# Set custom telemetry server URL (optional, defaults to telemetry.seaweedfs.com:3091)
+-telemetry.url=http://your-telemetry-server:8080/api/collect
+```
+
+### Telemetry Server
+
+```bash
+# Server configuration
+-port=8080 # Server port
+-dashboard=true # Enable built-in dashboard
+-cleanup=24h # Cleanup interval
+-max-age=720h # Maximum data retention (30 days)
+
+# Example
+./telemetry-server -port=8080 -dashboard=true -cleanup=24h -max-age=720h
+```
+
+## Prometheus Metrics
+
+The telemetry server exposes these Prometheus metrics:
+
+### Cluster Metrics
+- `seaweedfs_telemetry_total_clusters`: Total unique clusters (30 days)
+- `seaweedfs_telemetry_active_clusters`: Active clusters (7 days)
+
+### Per-Cluster Metrics
+- `seaweedfs_telemetry_volume_servers{cluster_id, version, os, deployment}`: Volume servers per cluster
+- `seaweedfs_telemetry_disk_bytes{cluster_id, version, os, deployment}`: Disk usage per cluster
+- `seaweedfs_telemetry_volume_count{cluster_id, version, os, deployment}`: Volume count per cluster
+- `seaweedfs_telemetry_filer_count{cluster_id, version, os, deployment}`: Filer servers per cluster
+- `seaweedfs_telemetry_broker_count{cluster_id, version, os, deployment}`: Broker servers per cluster
+- `seaweedfs_telemetry_cluster_info{cluster_id, version, os, deployment, features}`: Cluster metadata
+
+### Server Metrics
+- `seaweedfs_telemetry_reports_received_total`: Total telemetry reports received
+
+## API Endpoints
+
+### Data Collection
+```bash
+# Submit telemetry data (protobuf only)
+POST /api/collect
+Content-Type: application/x-protobuf
+[TelemetryRequest protobuf data]
+```
+
+### Statistics (JSON for dashboard/debugging)
+```bash
+# Get aggregated statistics
+GET /api/stats
+
+# Get recent cluster instances
+GET /api/instances?limit=100
+
+# Get metrics over time
+GET /api/metrics?days=30
+```
+
+### Monitoring
+```bash
+# Prometheus metrics
+GET /metrics
+```
+
+## Docker Deployment
+
+### Complete Stack (Recommended)
+
+```yaml
+# docker-compose.yml
+version: '3.8'
+services:
+ telemetry-server:
+ build: ./server
+ ports:
+ - "8080:8080"
+ command: ["-port=8080", "-dashboard=true", "-cleanup=24h"]
+
+ prometheus:
+ image: prom/prometheus:latest
+ ports:
+ - "9090:9090"
+ volumes:
+ - ./prometheus.yml:/etc/prometheus/prometheus.yml
+
+ grafana:
+ image: grafana/grafana:latest
+ ports:
+ - "3000:3000"
+ environment:
+ - GF_SECURITY_ADMIN_PASSWORD=admin
+ volumes:
+ - ./grafana-provisioning:/etc/grafana/provisioning
+ - ./grafana-dashboard.json:/var/lib/grafana/dashboards/seaweedfs.json
+```
+
+```bash
+# Deploy the stack
+docker-compose up -d
+
+# Scale telemetry server if needed
+docker-compose up -d --scale telemetry-server=3
+```
+
+### Server Only
+
+```bash
+# Build and run telemetry server
+cd server
+docker build -t seaweedfs-telemetry .
+docker run -p 8080:8080 seaweedfs-telemetry -port=8080 -dashboard=true
+```
+
+## Development
+
+### Protocol Buffer Development
+
+```bash
+# Generate protobuf code
+cd telemetry
+protoc --go_out=. --go_opt=paths=source_relative proto/telemetry.proto
+
+# The generated code is already included in the repository
+```
+
+### Build from Source
+
+```bash
+# Build telemetry server
+cd telemetry/server
+go build -o telemetry-server .
+
+# Build SeaweedFS with telemetry support
+cd ../..
+go build -o weed ./weed
+```
+
+### Testing
+
+```bash
+# Test telemetry server
+cd telemetry/server
+go test ./...
+
+# Test protobuf communication (requires protobuf tools)
+# See telemetry client code for examples
+```
+
+## Grafana Dashboard
+
+The included Grafana dashboard provides:
+
+- **Overview**: Total and active clusters, version distribution
+- **Resource Usage**: Volume servers and disk usage over time
+- **Deployments**: Deployment type and OS distribution
+- **Growth Trends**: Historical growth patterns
+
+### Custom Queries
+
+```promql
+# Total active clusters
+seaweedfs_telemetry_active_clusters
+
+# Disk usage by version
+sum by (version) (seaweedfs_telemetry_disk_bytes)
+
+# Volume servers by deployment type
+sum by (deployment) (seaweedfs_telemetry_volume_servers)
+
+# Filer servers by version
+sum by (version) (seaweedfs_telemetry_filer_count)
+
+# Broker servers across all clusters
+sum(seaweedfs_telemetry_broker_count)
+
+# Growth rate (weekly)
+increase(seaweedfs_telemetry_total_clusters[7d])
+```
+
+## Security Considerations
+
+- **Network Security**: Use HTTPS in production environments
+- **Access Control**: Implement authentication for Grafana and Prometheus
+- **Data Retention**: Configure appropriate retention policies
+- **Monitoring**: Monitor the telemetry infrastructure itself
+
+## Troubleshooting
+
+### Common Issues
+
+**SeaweedFS not sending data:**
+```bash
+# Check telemetry configuration
+weed master -h | grep telemetry
+
+# Verify connectivity
+curl -v http://your-telemetry-server:8080/api/collect
+```
+
+**Server not receiving data:**
+```bash
+# Check server logs
+docker-compose logs telemetry-server
+
+# Verify metrics endpoint
+curl http://localhost:8080/metrics
+```
+
+**Prometheus not scraping:**
+```bash
+# Check Prometheus targets
+curl http://localhost:9090/api/v1/targets
+
+# Verify configuration
+docker-compose logs prometheus
+```
+
+### Debugging
+
+```bash
+# Enable verbose logging in SeaweedFS
+weed master -v=2 -telemetry=true
+
+# Check telemetry server metrics
+curl http://localhost:8080/metrics | grep seaweedfs_telemetry
+
+# Test data flow
+curl http://localhost:8080/api/stats
+```
+
+## Contributing
+
+1. Fork the repository
+2. Create a feature branch
+3. Make your changes
+4. Add tests if applicable
+5. Submit a pull request
+
+## License
+
+This telemetry system is part of SeaweedFS and follows the same Apache 2.0 license. \ No newline at end of file
diff --git a/telemetry/docker-compose.yml b/telemetry/docker-compose.yml
new file mode 100644
index 000000000..73f0e8f70
--- /dev/null
+++ b/telemetry/docker-compose.yml
@@ -0,0 +1,55 @@
+version: '3.8'
+
+services:
+ telemetry-server:
+ build: ./server
+ ports:
+ - "8080:8080"
+ command: [
+ "./telemetry-server",
+ "-port=8080",
+ "-dashboard=false", # Disable built-in dashboard, use Grafana
+ "-log=true",
+ "-cors=true"
+ ]
+ networks:
+ - telemetry
+
+ prometheus:
+ image: prom/prometheus:latest
+ ports:
+ - "9090:9090"
+ volumes:
+ - ./prometheus.yml:/etc/prometheus/prometheus.yml
+ - prometheus_data:/prometheus
+ command:
+ - '--config.file=/etc/prometheus/prometheus.yml'
+ - '--storage.tsdb.path=/prometheus'
+ - '--web.console.libraries=/etc/prometheus/console_libraries'
+ - '--web.console.templates=/etc/prometheus/consoles'
+ - '--storage.tsdb.retention.time=200h'
+ - '--web.enable-lifecycle'
+ networks:
+ - telemetry
+
+ grafana:
+ image: grafana/grafana:latest
+ ports:
+ - "3000:3000"
+ environment:
+ - GF_SECURITY_ADMIN_PASSWORD=admin
+ - GF_USERS_ALLOW_SIGN_UP=false
+ volumes:
+ - grafana_data:/var/lib/grafana
+ - ./grafana-dashboard.json:/var/lib/grafana/dashboards/seaweedfs-telemetry.json
+ - ./grafana-provisioning:/etc/grafana/provisioning
+ networks:
+ - telemetry
+
+volumes:
+ prometheus_data:
+ grafana_data:
+
+networks:
+ telemetry:
+ driver: bridge \ No newline at end of file
diff --git a/telemetry/grafana-dashboard.json b/telemetry/grafana-dashboard.json
new file mode 100644
index 000000000..c33896dab
--- /dev/null
+++ b/telemetry/grafana-dashboard.json
@@ -0,0 +1,734 @@
+{
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": {
+ "type": "grafana",
+ "uid": "-- Grafana --"
+ },
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "type": "dashboard"
+ }
+ ]
+ },
+ "editable": true,
+ "fiscalYearStartMonth": 0,
+ "graphTooltip": 0,
+ "id": null,
+ "links": [],
+ "liveNow": false,
+ "panels": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "custom": {
+ "align": "auto",
+ "cellOptions": {
+ "type": "auto"
+ },
+ "inspect": false
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 0,
+ "y": 0
+ },
+ "id": 1,
+ "options": {
+ "showHeader": true
+ },
+ "pluginVersion": "10.0.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "expr": "seaweedfs_telemetry_total_clusters",
+ "format": "time_series",
+ "refId": "A"
+ }
+ ],
+ "title": "Total SeaweedFS Clusters",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "custom": {
+ "align": "auto",
+ "cellOptions": {
+ "type": "auto"
+ },
+ "inspect": false
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 12,
+ "y": 0
+ },
+ "id": 2,
+ "options": {
+ "showHeader": true
+ },
+ "pluginVersion": "10.0.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "expr": "seaweedfs_telemetry_active_clusters",
+ "format": "time_series",
+ "refId": "A"
+ }
+ ],
+ "title": "Active Clusters (7 days)",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "vis": false
+ }
+ },
+ "mappings": []
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 0,
+ "y": 8
+ },
+ "id": 3,
+ "options": {
+ "legend": {
+ "displayMode": "visible",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "pieType": "pie",
+ "reduceOptions": {
+ "values": false,
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": ""
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "expr": "count by (version) (seaweedfs_telemetry_cluster_info)",
+ "format": "time_series",
+ "legendFormat": "{{version}}",
+ "refId": "A"
+ }
+ ],
+ "title": "SeaweedFS Version Distribution",
+ "type": "piechart"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "vis": false
+ }
+ },
+ "mappings": []
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 12,
+ "y": 8
+ },
+ "id": 4,
+ "options": {
+ "legend": {
+ "displayMode": "visible",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "pieType": "pie",
+ "reduceOptions": {
+ "values": false,
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": ""
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "expr": "count by (os) (seaweedfs_telemetry_cluster_info)",
+ "format": "time_series",
+ "legendFormat": "{{os}}",
+ "refId": "A"
+ }
+ ],
+ "title": "Operating System Distribution",
+ "type": "piechart"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "vis": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 24,
+ "x": 0,
+ "y": 16
+ },
+ "id": 5,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "expr": "sum(seaweedfs_telemetry_volume_servers)",
+ "format": "time_series",
+ "legendFormat": "Total Volume Servers",
+ "refId": "A"
+ }
+ ],
+ "title": "Total Volume Servers Over Time",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "vis": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "bytes"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 0,
+ "y": 24
+ },
+ "id": 6,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "expr": "sum(seaweedfs_telemetry_disk_bytes)",
+ "format": "time_series",
+ "legendFormat": "Total Disk Usage",
+ "refId": "A"
+ }
+ ],
+ "title": "Total Disk Usage Over Time",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "vis": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 12,
+ "y": 24
+ },
+ "id": 7,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "expr": "sum(seaweedfs_telemetry_volume_count)",
+ "format": "time_series",
+ "legendFormat": "Total Volume Count",
+ "refId": "A"
+ }
+ ],
+ "title": "Total Volume Count Over Time",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "vis": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 0,
+ "y": 32
+ },
+ "id": 8,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "expr": "sum(seaweedfs_telemetry_filer_count)",
+ "format": "time_series",
+ "legendFormat": "Total Filer Count",
+ "refId": "A"
+ }
+ ],
+ "title": "Total Filer Servers Over Time",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "vis": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 12,
+ "y": 32
+ },
+ "id": 9,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "expr": "sum(seaweedfs_telemetry_broker_count)",
+ "format": "time_series",
+ "legendFormat": "Total Broker Count",
+ "refId": "A"
+ }
+ ],
+ "title": "Total Broker Servers Over Time",
+ "type": "timeseries"
+ }
+ ],
+ "refresh": "5m",
+ "schemaVersion": 38,
+ "style": "dark",
+ "tags": [
+ "seaweedfs",
+ "telemetry"
+ ],
+ "templating": {
+ "list": []
+ },
+ "time": {
+ "from": "now-24h",
+ "to": "now"
+ },
+ "timepicker": {},
+ "timezone": "",
+ "title": "SeaweedFS Telemetry Dashboard",
+ "uid": "seaweedfs-telemetry",
+ "version": 1,
+ "weekStart": ""
+} \ No newline at end of file
diff --git a/telemetry/grafana-provisioning/dashboards/dashboards.yml b/telemetry/grafana-provisioning/dashboards/dashboards.yml
new file mode 100644
index 000000000..82fd18a7a
--- /dev/null
+++ b/telemetry/grafana-provisioning/dashboards/dashboards.yml
@@ -0,0 +1,12 @@
+apiVersion: 1
+
+providers:
+ - name: 'seaweedfs'
+ orgId: 1
+ folder: ''
+ type: file
+ disableDeletion: false
+ updateIntervalSeconds: 10
+ allowUiUpdates: true
+ options:
+ path: /var/lib/grafana/dashboards \ No newline at end of file
diff --git a/telemetry/grafana-provisioning/datasources/prometheus.yml b/telemetry/grafana-provisioning/datasources/prometheus.yml
new file mode 100644
index 000000000..38fb02c68
--- /dev/null
+++ b/telemetry/grafana-provisioning/datasources/prometheus.yml
@@ -0,0 +1,9 @@
+apiVersion: 1
+
+datasources:
+ - name: Prometheus
+ type: prometheus
+ access: proxy
+ url: http://prometheus:9090
+ isDefault: true
+ editable: true \ No newline at end of file
diff --git a/telemetry/prometheus.yml b/telemetry/prometheus.yml
new file mode 100644
index 000000000..e33d518e7
--- /dev/null
+++ b/telemetry/prometheus.yml
@@ -0,0 +1,15 @@
+global:
+ scrape_interval: 15s
+ evaluation_interval: 15s
+
+rule_files:
+ # - "first_rules.yml"
+ # - "second_rules.yml"
+
+scrape_configs:
+ - job_name: 'seaweedfs-telemetry'
+ static_configs:
+ - targets: ['telemetry-server:8080']
+ scrape_interval: 30s
+ metrics_path: '/metrics'
+ scrape_timeout: 10s \ No newline at end of file
diff --git a/telemetry/proto/telemetry.pb.go b/telemetry/proto/telemetry.pb.go
new file mode 100644
index 000000000..b1bf44db7
--- /dev/null
+++ b/telemetry/proto/telemetry.pb.go
@@ -0,0 +1,398 @@
+// Code generated by protoc-gen-go. DO NOT EDIT.
+// versions:
+// protoc-gen-go v1.34.2
+// protoc v5.29.3
+// source: proto/telemetry.proto
+
+package proto
+
+import (
+ protoreflect "google.golang.org/protobuf/reflect/protoreflect"
+ protoimpl "google.golang.org/protobuf/runtime/protoimpl"
+ reflect "reflect"
+ sync "sync"
+)
+
+const (
+ // Verify that this generated code is sufficiently up-to-date.
+ _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion)
+ // Verify that runtime/protoimpl is sufficiently up-to-date.
+ _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20)
+)
+
+// TelemetryData represents cluster-level telemetry information
+type TelemetryData struct {
+ state protoimpl.MessageState
+ sizeCache protoimpl.SizeCache
+ unknownFields protoimpl.UnknownFields
+
+ // Unique cluster identifier (generated in-memory)
+ ClusterId string `protobuf:"bytes,1,opt,name=cluster_id,json=clusterId,proto3" json:"cluster_id,omitempty"`
+ // SeaweedFS version
+ Version string `protobuf:"bytes,2,opt,name=version,proto3" json:"version,omitempty"`
+ // Operating system (e.g., "linux/amd64")
+ Os string `protobuf:"bytes,3,opt,name=os,proto3" json:"os,omitempty"`
+ // Enabled features (e.g., ["filer", "s3api", "mq"])
+ Features []string `protobuf:"bytes,4,rep,name=features,proto3" json:"features,omitempty"`
+ // Deployment type ("standalone", "cluster", "master-only", "volume-only")
+ Deployment string `protobuf:"bytes,5,opt,name=deployment,proto3" json:"deployment,omitempty"`
+ // Number of volume servers in the cluster
+ VolumeServerCount int32 `protobuf:"varint,6,opt,name=volume_server_count,json=volumeServerCount,proto3" json:"volume_server_count,omitempty"`
+ // Total disk usage across all volume servers (in bytes)
+ TotalDiskBytes uint64 `protobuf:"varint,7,opt,name=total_disk_bytes,json=totalDiskBytes,proto3" json:"total_disk_bytes,omitempty"`
+ // Total number of volumes in the cluster
+ TotalVolumeCount int32 `protobuf:"varint,8,opt,name=total_volume_count,json=totalVolumeCount,proto3" json:"total_volume_count,omitempty"`
+ // Number of filer servers in the cluster
+ FilerCount int32 `protobuf:"varint,9,opt,name=filer_count,json=filerCount,proto3" json:"filer_count,omitempty"`
+ // Number of broker servers in the cluster
+ BrokerCount int32 `protobuf:"varint,10,opt,name=broker_count,json=brokerCount,proto3" json:"broker_count,omitempty"`
+ // Unix timestamp when the data was collected
+ Timestamp int64 `protobuf:"varint,11,opt,name=timestamp,proto3" json:"timestamp,omitempty"`
+}
+
+func (x *TelemetryData) Reset() {
+ *x = TelemetryData{}
+ if protoimpl.UnsafeEnabled {
+ mi := &file_proto_telemetry_proto_msgTypes[0]
+ ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+ ms.StoreMessageInfo(mi)
+ }
+}
+
+func (x *TelemetryData) String() string {
+ return protoimpl.X.MessageStringOf(x)
+}
+
+func (*TelemetryData) ProtoMessage() {}
+
+func (x *TelemetryData) ProtoReflect() protoreflect.Message {
+ mi := &file_proto_telemetry_proto_msgTypes[0]
+ if protoimpl.UnsafeEnabled && x != nil {
+ ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+ if ms.LoadMessageInfo() == nil {
+ ms.StoreMessageInfo(mi)
+ }
+ return ms
+ }
+ return mi.MessageOf(x)
+}
+
+// Deprecated: Use TelemetryData.ProtoReflect.Descriptor instead.
+func (*TelemetryData) Descriptor() ([]byte, []int) {
+ return file_proto_telemetry_proto_rawDescGZIP(), []int{0}
+}
+
+func (x *TelemetryData) GetClusterId() string {
+ if x != nil {
+ return x.ClusterId
+ }
+ return ""
+}
+
+func (x *TelemetryData) GetVersion() string {
+ if x != nil {
+ return x.Version
+ }
+ return ""
+}
+
+func (x *TelemetryData) GetOs() string {
+ if x != nil {
+ return x.Os
+ }
+ return ""
+}
+
+func (x *TelemetryData) GetFeatures() []string {
+ if x != nil {
+ return x.Features
+ }
+ return nil
+}
+
+func (x *TelemetryData) GetDeployment() string {
+ if x != nil {
+ return x.Deployment
+ }
+ return ""
+}
+
+func (x *TelemetryData) GetVolumeServerCount() int32 {
+ if x != nil {
+ return x.VolumeServerCount
+ }
+ return 0
+}
+
+func (x *TelemetryData) GetTotalDiskBytes() uint64 {
+ if x != nil {
+ return x.TotalDiskBytes
+ }
+ return 0
+}
+
+func (x *TelemetryData) GetTotalVolumeCount() int32 {
+ if x != nil {
+ return x.TotalVolumeCount
+ }
+ return 0
+}
+
+func (x *TelemetryData) GetFilerCount() int32 {
+ if x != nil {
+ return x.FilerCount
+ }
+ return 0
+}
+
+func (x *TelemetryData) GetBrokerCount() int32 {
+ if x != nil {
+ return x.BrokerCount
+ }
+ return 0
+}
+
+func (x *TelemetryData) GetTimestamp() int64 {
+ if x != nil {
+ return x.Timestamp
+ }
+ return 0
+}
+
+// TelemetryRequest is sent from SeaweedFS clusters to the telemetry server
+type TelemetryRequest struct {
+ state protoimpl.MessageState
+ sizeCache protoimpl.SizeCache
+ unknownFields protoimpl.UnknownFields
+
+ Data *TelemetryData `protobuf:"bytes,1,opt,name=data,proto3" json:"data,omitempty"`
+}
+
+func (x *TelemetryRequest) Reset() {
+ *x = TelemetryRequest{}
+ if protoimpl.UnsafeEnabled {
+ mi := &file_proto_telemetry_proto_msgTypes[1]
+ ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+ ms.StoreMessageInfo(mi)
+ }
+}
+
+func (x *TelemetryRequest) String() string {
+ return protoimpl.X.MessageStringOf(x)
+}
+
+func (*TelemetryRequest) ProtoMessage() {}
+
+func (x *TelemetryRequest) ProtoReflect() protoreflect.Message {
+ mi := &file_proto_telemetry_proto_msgTypes[1]
+ if protoimpl.UnsafeEnabled && x != nil {
+ ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+ if ms.LoadMessageInfo() == nil {
+ ms.StoreMessageInfo(mi)
+ }
+ return ms
+ }
+ return mi.MessageOf(x)
+}
+
+// Deprecated: Use TelemetryRequest.ProtoReflect.Descriptor instead.
+func (*TelemetryRequest) Descriptor() ([]byte, []int) {
+ return file_proto_telemetry_proto_rawDescGZIP(), []int{1}
+}
+
+func (x *TelemetryRequest) GetData() *TelemetryData {
+ if x != nil {
+ return x.Data
+ }
+ return nil
+}
+
+// TelemetryResponse is returned by the telemetry server
+type TelemetryResponse struct {
+ state protoimpl.MessageState
+ sizeCache protoimpl.SizeCache
+ unknownFields protoimpl.UnknownFields
+
+ Success bool `protobuf:"varint,1,opt,name=success,proto3" json:"success,omitempty"`
+ Message string `protobuf:"bytes,2,opt,name=message,proto3" json:"message,omitempty"`
+}
+
+func (x *TelemetryResponse) Reset() {
+ *x = TelemetryResponse{}
+ if protoimpl.UnsafeEnabled {
+ mi := &file_proto_telemetry_proto_msgTypes[2]
+ ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+ ms.StoreMessageInfo(mi)
+ }
+}
+
+func (x *TelemetryResponse) String() string {
+ return protoimpl.X.MessageStringOf(x)
+}
+
+func (*TelemetryResponse) ProtoMessage() {}
+
+func (x *TelemetryResponse) ProtoReflect() protoreflect.Message {
+ mi := &file_proto_telemetry_proto_msgTypes[2]
+ if protoimpl.UnsafeEnabled && x != nil {
+ ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+ if ms.LoadMessageInfo() == nil {
+ ms.StoreMessageInfo(mi)
+ }
+ return ms
+ }
+ return mi.MessageOf(x)
+}
+
+// Deprecated: Use TelemetryResponse.ProtoReflect.Descriptor instead.
+func (*TelemetryResponse) Descriptor() ([]byte, []int) {
+ return file_proto_telemetry_proto_rawDescGZIP(), []int{2}
+}
+
+func (x *TelemetryResponse) GetSuccess() bool {
+ if x != nil {
+ return x.Success
+ }
+ return false
+}
+
+func (x *TelemetryResponse) GetMessage() string {
+ if x != nil {
+ return x.Message
+ }
+ return ""
+}
+
+var File_proto_telemetry_proto protoreflect.FileDescriptor
+
+var file_proto_telemetry_proto_rawDesc = []byte{
+ 0x0a, 0x15, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x74, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74, 0x72,
+ 0x79, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x09, 0x74, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74,
+ 0x72, 0x79, 0x22, 0xfe, 0x02, 0x0a, 0x0d, 0x54, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74, 0x72, 0x79,
+ 0x44, 0x61, 0x74, 0x61, 0x12, 0x1d, 0x0a, 0x0a, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f,
+ 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65,
+ 0x72, 0x49, 0x64, 0x12, 0x18, 0x0a, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x02,
+ 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x0e, 0x0a,
+ 0x02, 0x6f, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x6f, 0x73, 0x12, 0x1a, 0x0a,
+ 0x08, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x09, 0x52,
+ 0x08, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x12, 0x1e, 0x0a, 0x0a, 0x64, 0x65, 0x70,
+ 0x6c, 0x6f, 0x79, 0x6d, 0x65, 0x6e, 0x74, 0x18, 0x05, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x64,
+ 0x65, 0x70, 0x6c, 0x6f, 0x79, 0x6d, 0x65, 0x6e, 0x74, 0x12, 0x2e, 0x0a, 0x13, 0x76, 0x6f, 0x6c,
+ 0x75, 0x6d, 0x65, 0x5f, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x5f, 0x63, 0x6f, 0x75, 0x6e, 0x74,
+ 0x18, 0x06, 0x20, 0x01, 0x28, 0x05, 0x52, 0x11, 0x76, 0x6f, 0x6c, 0x75, 0x6d, 0x65, 0x53, 0x65,
+ 0x72, 0x76, 0x65, 0x72, 0x43, 0x6f, 0x75, 0x6e, 0x74, 0x12, 0x28, 0x0a, 0x10, 0x74, 0x6f, 0x74,
+ 0x61, 0x6c, 0x5f, 0x64, 0x69, 0x73, 0x6b, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x18, 0x07, 0x20,
+ 0x01, 0x28, 0x04, 0x52, 0x0e, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x44, 0x69, 0x73, 0x6b, 0x42, 0x79,
+ 0x74, 0x65, 0x73, 0x12, 0x2c, 0x0a, 0x12, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x76, 0x6f, 0x6c,
+ 0x75, 0x6d, 0x65, 0x5f, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x18, 0x08, 0x20, 0x01, 0x28, 0x05, 0x52,
+ 0x10, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x56, 0x6f, 0x6c, 0x75, 0x6d, 0x65, 0x43, 0x6f, 0x75, 0x6e,
+ 0x74, 0x12, 0x1f, 0x0a, 0x0b, 0x66, 0x69, 0x6c, 0x65, 0x72, 0x5f, 0x63, 0x6f, 0x75, 0x6e, 0x74,
+ 0x18, 0x09, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0a, 0x66, 0x69, 0x6c, 0x65, 0x72, 0x43, 0x6f, 0x75,
+ 0x6e, 0x74, 0x12, 0x21, 0x0a, 0x0c, 0x62, 0x72, 0x6f, 0x6b, 0x65, 0x72, 0x5f, 0x63, 0x6f, 0x75,
+ 0x6e, 0x74, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0b, 0x62, 0x72, 0x6f, 0x6b, 0x65, 0x72,
+ 0x43, 0x6f, 0x75, 0x6e, 0x74, 0x12, 0x1c, 0x0a, 0x09, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61,
+ 0x6d, 0x70, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x03, 0x52, 0x09, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74,
+ 0x61, 0x6d, 0x70, 0x22, 0x40, 0x0a, 0x10, 0x54, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74, 0x72, 0x79,
+ 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x2c, 0x0a, 0x04, 0x64, 0x61, 0x74, 0x61, 0x18,
+ 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x74, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74, 0x72,
+ 0x79, 0x2e, 0x54, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74, 0x72, 0x79, 0x44, 0x61, 0x74, 0x61, 0x52,
+ 0x04, 0x64, 0x61, 0x74, 0x61, 0x22, 0x47, 0x0a, 0x11, 0x54, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74,
+ 0x72, 0x79, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x73, 0x75,
+ 0x63, 0x63, 0x65, 0x73, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x73, 0x75, 0x63,
+ 0x63, 0x65, 0x73, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18,
+ 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x42, 0x30,
+ 0x5a, 0x2e, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x73, 0x65, 0x61,
+ 0x77, 0x65, 0x65, 0x64, 0x66, 0x73, 0x2f, 0x73, 0x65, 0x61, 0x77, 0x65, 0x65, 0x64, 0x66, 0x73,
+ 0x2f, 0x74, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74, 0x72, 0x79, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f,
+ 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33,
+}
+
+var (
+ file_proto_telemetry_proto_rawDescOnce sync.Once
+ file_proto_telemetry_proto_rawDescData = file_proto_telemetry_proto_rawDesc
+)
+
+func file_proto_telemetry_proto_rawDescGZIP() []byte {
+ file_proto_telemetry_proto_rawDescOnce.Do(func() {
+ file_proto_telemetry_proto_rawDescData = protoimpl.X.CompressGZIP(file_proto_telemetry_proto_rawDescData)
+ })
+ return file_proto_telemetry_proto_rawDescData
+}
+
+var file_proto_telemetry_proto_msgTypes = make([]protoimpl.MessageInfo, 3)
+var file_proto_telemetry_proto_goTypes = []any{
+ (*TelemetryData)(nil), // 0: telemetry.TelemetryData
+ (*TelemetryRequest)(nil), // 1: telemetry.TelemetryRequest
+ (*TelemetryResponse)(nil), // 2: telemetry.TelemetryResponse
+}
+var file_proto_telemetry_proto_depIdxs = []int32{
+ 0, // 0: telemetry.TelemetryRequest.data:type_name -> telemetry.TelemetryData
+ 1, // [1:1] is the sub-list for method output_type
+ 1, // [1:1] is the sub-list for method input_type
+ 1, // [1:1] is the sub-list for extension type_name
+ 1, // [1:1] is the sub-list for extension extendee
+ 0, // [0:1] is the sub-list for field type_name
+}
+
+func init() { file_proto_telemetry_proto_init() }
+func file_proto_telemetry_proto_init() {
+ if File_proto_telemetry_proto != nil {
+ return
+ }
+ if !protoimpl.UnsafeEnabled {
+ file_proto_telemetry_proto_msgTypes[0].Exporter = func(v any, i int) any {
+ switch v := v.(*TelemetryData); i {
+ case 0:
+ return &v.state
+ case 1:
+ return &v.sizeCache
+ case 2:
+ return &v.unknownFields
+ default:
+ return nil
+ }
+ }
+ file_proto_telemetry_proto_msgTypes[1].Exporter = func(v any, i int) any {
+ switch v := v.(*TelemetryRequest); i {
+ case 0:
+ return &v.state
+ case 1:
+ return &v.sizeCache
+ case 2:
+ return &v.unknownFields
+ default:
+ return nil
+ }
+ }
+ file_proto_telemetry_proto_msgTypes[2].Exporter = func(v any, i int) any {
+ switch v := v.(*TelemetryResponse); i {
+ case 0:
+ return &v.state
+ case 1:
+ return &v.sizeCache
+ case 2:
+ return &v.unknownFields
+ default:
+ return nil
+ }
+ }
+ }
+ type x struct{}
+ out := protoimpl.TypeBuilder{
+ File: protoimpl.DescBuilder{
+ GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
+ RawDescriptor: file_proto_telemetry_proto_rawDesc,
+ NumEnums: 0,
+ NumMessages: 3,
+ NumExtensions: 0,
+ NumServices: 0,
+ },
+ GoTypes: file_proto_telemetry_proto_goTypes,
+ DependencyIndexes: file_proto_telemetry_proto_depIdxs,
+ MessageInfos: file_proto_telemetry_proto_msgTypes,
+ }.Build()
+ File_proto_telemetry_proto = out.File
+ file_proto_telemetry_proto_rawDesc = nil
+ file_proto_telemetry_proto_goTypes = nil
+ file_proto_telemetry_proto_depIdxs = nil
+}
diff --git a/telemetry/proto/telemetry.proto b/telemetry/proto/telemetry.proto
new file mode 100644
index 000000000..07bc79446
--- /dev/null
+++ b/telemetry/proto/telemetry.proto
@@ -0,0 +1,52 @@
+syntax = "proto3";
+
+package telemetry;
+
+option go_package = "github.com/seaweedfs/seaweedfs/telemetry/proto";
+
+// TelemetryData represents cluster-level telemetry information
+message TelemetryData {
+ // Unique cluster identifier (generated in-memory)
+ string cluster_id = 1;
+
+ // SeaweedFS version
+ string version = 2;
+
+ // Operating system (e.g., "linux/amd64")
+ string os = 3;
+
+ // Enabled features (e.g., ["filer", "s3api", "mq"])
+ repeated string features = 4;
+
+ // Deployment type ("standalone", "cluster", "master-only", "volume-only")
+ string deployment = 5;
+
+ // Number of volume servers in the cluster
+ int32 volume_server_count = 6;
+
+ // Total disk usage across all volume servers (in bytes)
+ uint64 total_disk_bytes = 7;
+
+ // Total number of volumes in the cluster
+ int32 total_volume_count = 8;
+
+ // Number of filer servers in the cluster
+ int32 filer_count = 9;
+
+ // Number of broker servers in the cluster
+ int32 broker_count = 10;
+
+ // Unix timestamp when the data was collected
+ int64 timestamp = 11;
+}
+
+// TelemetryRequest is sent from SeaweedFS clusters to the telemetry server
+message TelemetryRequest {
+ TelemetryData data = 1;
+}
+
+// TelemetryResponse is returned by the telemetry server
+message TelemetryResponse {
+ bool success = 1;
+ string message = 2;
+} \ No newline at end of file
diff --git a/telemetry/server/Dockerfile b/telemetry/server/Dockerfile
new file mode 100644
index 000000000..8f3782fcf
--- /dev/null
+++ b/telemetry/server/Dockerfile
@@ -0,0 +1,18 @@
+FROM golang:1.21-alpine AS builder
+
+WORKDIR /app
+COPY go.mod go.sum ./
+RUN go mod download
+
+COPY . .
+RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -ldflags '-extldflags "-static"' -o telemetry-server .
+
+FROM alpine:latest
+RUN apk --no-cache add ca-certificates
+WORKDIR /root/
+
+COPY --from=builder /app/telemetry-server .
+
+EXPOSE 8080
+
+CMD ["./telemetry-server"] \ No newline at end of file
diff --git a/telemetry/server/Makefile b/telemetry/server/Makefile
new file mode 100644
index 000000000..cf57f1777
--- /dev/null
+++ b/telemetry/server/Makefile
@@ -0,0 +1,97 @@
+.PHONY: build run clean test deps proto integration-test test-all
+
+# Build the telemetry server
+build:
+ go build -o telemetry-server .
+
+# Run the server in development mode
+run:
+ go run . -port=8080 -dashboard=true -cleanup=1h -max-age=24h
+
+# Run the server in production mode
+run-prod:
+ ./telemetry-server -port=8080 -dashboard=true -cleanup=24h -max-age=720h
+
+# Clean build artifacts
+clean:
+ rm -f telemetry-server
+ rm -f ../test/telemetry-server-test.log
+ go clean
+
+# Run unit tests
+test:
+ go test ./...
+
+# Run integration tests
+integration-test:
+ @echo "🧪 Running telemetry integration tests..."
+ cd ../../ && go run telemetry/test/integration.go
+
+# Run all tests (unit + integration)
+test-all: test integration-test
+
+# Install dependencies
+deps:
+ go mod download
+ go mod tidy
+
+# Generate protobuf code (requires protoc)
+proto:
+ cd .. && protoc --go_out=. --go_opt=paths=source_relative proto/telemetry.proto
+
+# Build Docker image
+docker-build:
+ docker build -t seaweedfs-telemetry .
+
+# Run with Docker
+docker-run:
+ docker run -p 8080:8080 seaweedfs-telemetry -port=8080 -dashboard=true
+
+# Development with auto-reload (requires air: go install github.com/cosmtrek/air@latest)
+dev:
+ air
+
+# Check if protoc is available
+check-protoc:
+ @which protoc > /dev/null || (echo "protoc is required for proto generation. Install from https://grpc.io/docs/protoc-installation/" && exit 1)
+
+# Full development setup
+setup: check-protoc deps proto build
+
+# Run a quick smoke test
+smoke-test: build
+ @echo "🔥 Running smoke test..."
+ @timeout 10s ./telemetry-server -port=18081 > /dev/null 2>&1 & \
+ SERVER_PID=$$!; \
+ sleep 2; \
+ if curl -s http://localhost:18081/health > /dev/null; then \
+ echo "✅ Smoke test passed - server responds to health check"; \
+ else \
+ echo "❌ Smoke test failed - server not responding"; \
+ exit 1; \
+ fi; \
+ kill $$SERVER_PID 2>/dev/null || true
+
+# Continuous integration target
+ci: deps proto build test integration-test
+ @echo "🎉 All CI tests passed!"
+
+# Help
+help:
+ @echo "Available targets:"
+ @echo " build - Build the telemetry server binary"
+ @echo " run - Run server in development mode"
+ @echo " run-prod - Run server in production mode"
+ @echo " clean - Clean build artifacts"
+ @echo " test - Run unit tests"
+ @echo " integration-test- Run integration tests"
+ @echo " test-all - Run all tests (unit + integration)"
+ @echo " deps - Install Go dependencies"
+ @echo " proto - Generate protobuf code"
+ @echo " docker-build - Build Docker image"
+ @echo " docker-run - Run with Docker"
+ @echo " dev - Run with auto-reload (requires air)"
+ @echo " smoke-test - Quick server health check"
+ @echo " setup - Full development setup"
+ @echo " ci - Continuous integration (all tests)"
+ @echo " help - Show this help" \ No newline at end of file
diff --git a/telemetry/server/api/handlers.go b/telemetry/server/api/handlers.go
new file mode 100644
index 000000000..0ff00330b
--- /dev/null
+++ b/telemetry/server/api/handlers.go
@@ -0,0 +1,152 @@
+package api
+
+import (
+ "encoding/json"
+ "io"
+ "net/http"
+ "strconv"
+ "time"
+
+ "github.com/seaweedfs/seaweedfs/telemetry/proto"
+ "github.com/seaweedfs/seaweedfs/telemetry/server/storage"
+ protobuf "google.golang.org/protobuf/proto"
+)
+
+type Handler struct {
+ storage *storage.PrometheusStorage
+}
+
+func NewHandler(storage *storage.PrometheusStorage) *Handler {
+ return &Handler{storage: storage}
+}
+
+func (h *Handler) CollectTelemetry(w http.ResponseWriter, r *http.Request) {
+ if r.Method != http.MethodPost {
+ http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
+ return
+ }
+
+ contentType := r.Header.Get("Content-Type")
+
+ // Only accept protobuf content type
+ if contentType != "application/x-protobuf" && contentType != "application/protobuf" {
+ http.Error(w, "Content-Type must be application/x-protobuf", http.StatusUnsupportedMediaType)
+ return
+ }
+
+ // Read protobuf request
+ body, err := io.ReadAll(r.Body)
+ if err != nil {
+ http.Error(w, "Failed to read request body", http.StatusBadRequest)
+ return
+ }
+
+ req := &proto.TelemetryRequest{}
+ if err := protobuf.Unmarshal(body, req); err != nil {
+ http.Error(w, "Invalid protobuf data", http.StatusBadRequest)
+ return
+ }
+
+ data := req.Data
+ if data == nil {
+ http.Error(w, "Missing telemetry data", http.StatusBadRequest)
+ return
+ }
+
+ // Validate required fields
+ if data.ClusterId == "" || data.Version == "" || data.Os == "" {
+ http.Error(w, "Missing required fields", http.StatusBadRequest)
+ return
+ }
+
+ // Set timestamp if not provided
+ if data.Timestamp == 0 {
+ data.Timestamp = time.Now().Unix()
+ }
+
+ // Store the telemetry data
+ if err := h.storage.StoreTelemetry(data); err != nil {
+ http.Error(w, "Failed to store data", http.StatusInternalServerError)
+ return
+ }
+
+ // Return protobuf response
+ resp := &proto.TelemetryResponse{
+ Success: true,
+ Message: "Telemetry data received",
+ }
+
+ respData, err := protobuf.Marshal(resp)
+ if err != nil {
+ http.Error(w, "Failed to marshal response", http.StatusInternalServerError)
+ return
+ }
+
+ w.Header().Set("Content-Type", "application/x-protobuf")
+ w.WriteHeader(http.StatusOK)
+ w.Write(respData)
+}
+
+func (h *Handler) GetStats(w http.ResponseWriter, r *http.Request) {
+ if r.Method != http.MethodGet {
+ http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
+ return
+ }
+
+ stats, err := h.storage.GetStats()
+ if err != nil {
+ http.Error(w, "Failed to get stats", http.StatusInternalServerError)
+ return
+ }
+
+ w.Header().Set("Content-Type", "application/json")
+ json.NewEncoder(w).Encode(stats)
+}
+
+func (h *Handler) GetInstances(w http.ResponseWriter, r *http.Request) {
+ if r.Method != http.MethodGet {
+ http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
+ return
+ }
+
+ limitStr := r.URL.Query().Get("limit")
+ limit := 100 // default
+ if limitStr != "" {
+ if l, err := strconv.Atoi(limitStr); err == nil && l > 0 && l <= 1000 {
+ limit = l
+ }
+ }
+
+ instances, err := h.storage.GetInstances(limit)
+ if err != nil {
+ http.Error(w, "Failed to get instances", http.StatusInternalServerError)
+ return
+ }
+
+ w.Header().Set("Content-Type", "application/json")
+ json.NewEncoder(w).Encode(instances)
+}
+
+func (h *Handler) GetMetrics(w http.ResponseWriter, r *http.Request) {
+ if r.Method != http.MethodGet {
+ http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
+ return
+ }
+
+ daysStr := r.URL.Query().Get("days")
+ days := 30 // default
+ if daysStr != "" {
+ if d, err := strconv.Atoi(daysStr); err == nil && d > 0 && d <= 365 {
+ days = d
+ }
+ }
+
+ metrics, err := h.storage.GetMetrics(days)
+ if err != nil {
+ http.Error(w, "Failed to get metrics", http.StatusInternalServerError)
+ return
+ }
+
+ w.Header().Set("Content-Type", "application/json")
+ json.NewEncoder(w).Encode(metrics)
+}
diff --git a/telemetry/server/dashboard/dashboard.go b/telemetry/server/dashboard/dashboard.go
new file mode 100644
index 000000000..9a56c7f1b
--- /dev/null
+++ b/telemetry/server/dashboard/dashboard.go
@@ -0,0 +1,278 @@
+package dashboard
+
+import (
+ "net/http"
+)
+
+type Handler struct{}
+
+func NewHandler() *Handler {
+ return &Handler{}
+}
+
+func (h *Handler) ServeIndex(w http.ResponseWriter, r *http.Request) {
+ html := `<!DOCTYPE html>
+<html lang="en">
+<head>
+ <meta charset="UTF-8">
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
+ <title>SeaweedFS Telemetry Dashboard</title>
+ <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
+ <style>
+ body {
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
+ margin: 0;
+ padding: 20px;
+ background-color: #f5f5f5;
+ }
+ .container {
+ max-width: 1200px;
+ margin: 0 auto;
+ }
+ .header {
+ background: white;
+ padding: 20px;
+ border-radius: 8px;
+ margin-bottom: 20px;
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+ }
+ .stats-grid {
+ display: grid;
+ grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
+ gap: 20px;
+ margin-bottom: 20px;
+ }
+ .stat-card {
+ background: white;
+ padding: 20px;
+ border-radius: 8px;
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+ }
+ .stat-value {
+ font-size: 2em;
+ font-weight: bold;
+ color: #2196F3;
+ }
+ .stat-label {
+ color: #666;
+ margin-top: 5px;
+ }
+ .chart-container {
+ background: white;
+ padding: 20px;
+ border-radius: 8px;
+ margin-bottom: 20px;
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+ }
+ .chart-title {
+ font-size: 1.2em;
+ font-weight: bold;
+ margin-bottom: 15px;
+ }
+ .loading {
+ text-align: center;
+ padding: 40px;
+ color: #666;
+ }
+ .error {
+ background: #ffebee;
+ color: #c62828;
+ padding: 15px;
+ border-radius: 4px;
+ margin: 10px 0;
+ }
+ </style>
+</head>
+<body>
+ <div class="container">
+ <div class="header">
+ <h1>SeaweedFS Telemetry Dashboard</h1>
+ <p>Privacy-respecting usage analytics for SeaweedFS</p>
+ </div>
+
+ <div id="loading" class="loading">Loading telemetry data...</div>
+ <div id="error" class="error" style="display: none;"></div>
+
+ <div id="dashboard" style="display: none;">
+ <div class="stats-grid">
+ <div class="stat-card">
+ <div class="stat-value" id="totalInstances">-</div>
+ <div class="stat-label">Total Instances (30 days)</div>
+ </div>
+ <div class="stat-card">
+ <div class="stat-value" id="activeInstances">-</div>
+ <div class="stat-label">Active Instances (7 days)</div>
+ </div>
+ <div class="stat-card">
+ <div class="stat-value" id="totalVersions">-</div>
+ <div class="stat-label">Different Versions</div>
+ </div>
+ <div class="stat-card">
+ <div class="stat-value" id="totalOS">-</div>
+ <div class="stat-label">Operating Systems</div>
+ </div>
+ </div>
+
+ <div class="chart-container">
+ <div class="chart-title">Version Distribution</div>
+ <canvas id="versionChart" width="400" height="200"></canvas>
+ </div>
+
+ <div class="chart-container">
+ <div class="chart-title">Operating System Distribution</div>
+ <canvas id="osChart" width="400" height="200"></canvas>
+ </div>
+
+ <div class="chart-container">
+ <div class="chart-title">Deployment Types</div>
+ <canvas id="deploymentChart" width="400" height="200"></canvas>
+ </div>
+
+ <div class="chart-container">
+ <div class="chart-title">Volume Servers Over Time</div>
+ <canvas id="serverChart" width="400" height="200"></canvas>
+ </div>
+
+ <div class="chart-container">
+ <div class="chart-title">Total Disk Usage Over Time</div>
+ <canvas id="diskChart" width="400" height="200"></canvas>
+ </div>
+ </div>
+ </div>
+
+ <script>
+ let charts = {};
+
+ async function loadDashboard() {
+ try {
+ // Load stats
+ const statsResponse = await fetch('/api/stats');
+ const stats = await statsResponse.json();
+
+ // Load metrics
+ const metricsResponse = await fetch('/api/metrics?days=30');
+ const metrics = await metricsResponse.json();
+
+ updateStats(stats);
+ updateCharts(stats, metrics);
+
+ document.getElementById('loading').style.display = 'none';
+ document.getElementById('dashboard').style.display = 'block';
+ } catch (error) {
+ console.error('Error loading dashboard:', error);
+ showError('Failed to load telemetry data: ' + error.message);
+ }
+ }
+
+ function updateStats(stats) {
+ document.getElementById('totalInstances').textContent = stats.total_instances || 0;
+ document.getElementById('activeInstances').textContent = stats.active_instances || 0;
+ document.getElementById('totalVersions').textContent = Object.keys(stats.versions || {}).length;
+ document.getElementById('totalOS').textContent = Object.keys(stats.os_distribution || {}).length;
+ }
+
+ function updateCharts(stats, metrics) {
+ // Version chart
+ createPieChart('versionChart', 'Version Distribution', stats.versions || {});
+
+ // OS chart
+ createPieChart('osChart', 'Operating System Distribution', stats.os_distribution || {});
+
+ // Deployment chart
+ createPieChart('deploymentChart', 'Deployment Types', stats.deployments || {});
+
+ // Server count over time
+ if (metrics.dates && metrics.server_counts) {
+ createLineChart('serverChart', 'Volume Servers', metrics.dates, metrics.server_counts, '#2196F3');
+ }
+
+ // Disk usage over time
+ if (metrics.dates && metrics.disk_usage) {
+ const diskUsageGB = metrics.disk_usage.map(bytes => Math.round(bytes / (1024 * 1024 * 1024)));
+ createLineChart('diskChart', 'Disk Usage (GB)', metrics.dates, diskUsageGB, '#4CAF50');
+ }
+ }
+
+ function createPieChart(canvasId, title, data) {
+ const ctx = document.getElementById(canvasId).getContext('2d');
+
+ if (charts[canvasId]) {
+ charts[canvasId].destroy();
+ }
+
+ const labels = Object.keys(data);
+ const values = Object.values(data);
+
+ charts[canvasId] = new Chart(ctx, {
+ type: 'pie',
+ data: {
+ labels: labels,
+ datasets: [{
+ data: values,
+ backgroundColor: [
+ '#FF6384', '#36A2EB', '#FFCE56', '#4BC0C0',
+ '#9966FF', '#FF9F40', '#FF6384', '#C9CBCF'
+ ]
+ }]
+ },
+ options: {
+ responsive: true,
+ plugins: {
+ legend: {
+ position: 'bottom'
+ }
+ }
+ }
+ });
+ }
+
+ function createLineChart(canvasId, label, labels, data, color) {
+ const ctx = document.getElementById(canvasId).getContext('2d');
+
+ if (charts[canvasId]) {
+ charts[canvasId].destroy();
+ }
+
+ charts[canvasId] = new Chart(ctx, {
+ type: 'line',
+ data: {
+ labels: labels,
+ datasets: [{
+ label: label,
+ data: data,
+ borderColor: color,
+ backgroundColor: color + '20',
+ fill: true,
+ tension: 0.1
+ }]
+ },
+ options: {
+ responsive: true,
+ scales: {
+ y: {
+ beginAtZero: true
+ }
+ }
+ }
+ });
+ }
+
+ function showError(message) {
+ document.getElementById('loading').style.display = 'none';
+ document.getElementById('error').style.display = 'block';
+ document.getElementById('error').textContent = message;
+ }
+
+ // Load dashboard on page load
+ loadDashboard();
+
+ // Refresh every 5 minutes
+ setInterval(loadDashboard, 5 * 60 * 1000);
+ </script>
+</body>
+</html>`
+
+ w.Header().Set("Content-Type", "text/html")
+ w.WriteHeader(http.StatusOK)
+ w.Write([]byte(html))
+}
diff --git a/telemetry/server/go.sum b/telemetry/server/go.sum
new file mode 100644
index 000000000..0aec189da
--- /dev/null
+++ b/telemetry/server/go.sum
@@ -0,0 +1,31 @@
+github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
+github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
+github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44=
+github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
+github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
+github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg=
+github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
+github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
+github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
+github.com/matttproud/golang_protobuf_extensions v1.0.4 h1:mmDVorXM7PCGKw94cs5zkfA9PSy5pEvNWRP0ET0TIVo=
+github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4=
+github.com/prometheus/client_golang v1.17.0 h1:rl2sfwZMtSthVU752MqfjQozy7blglC+1SOtjMAMh+Q=
+github.com/prometheus/client_golang v1.17.0/go.mod h1:VeL+gMmOAxkS2IqfCq0ZmHSL+LjWfWDUmp1mBz9JgUY=
+github.com/prometheus/client_model v0.4.1-0.20230718164431-9a2bf3000d16 h1:v7DLqVdK4VrYkVD5diGdl4sxJurKJEMnODWRJlxV9oM=
+github.com/prometheus/client_model v0.4.1-0.20230718164431-9a2bf3000d16/go.mod h1:oMQmHW1/JoDwqLtg57MGgP/Fb1CJEYF2imWWhWtMkYU=
+github.com/prometheus/common v0.44.0 h1:+5BrQJwiBB9xsMygAB3TNvpQKOwlkc25LbISbrdOOfY=
+github.com/prometheus/common v0.44.0/go.mod h1:ofAIvZbQ1e/nugmZGz4/qCb9Ap1VoSTIO7x0VV9VvuY=
+github.com/prometheus/procfs v0.11.1 h1:xRC8Iq1yyca5ypa9n1EZnWZkt7dwcoRPQwX/5gwaUuI=
+github.com/prometheus/procfs v0.11.1/go.mod h1:eesXgaPo1q7lBpVMoMy0ZOFTth9hBn4W/y0/p/ScXhY=
+golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sys v0.11.0 h1:eG7RXZHdqOJ1i+0lgLgCpSXAp6M3LYlAo6osgSi0xOM=
+golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
+google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
+google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8=
+google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
diff --git a/telemetry/server/main.go b/telemetry/server/main.go
new file mode 100644
index 000000000..6cbae05c7
--- /dev/null
+++ b/telemetry/server/main.go
@@ -0,0 +1,111 @@
+package main
+
+import (
+ "encoding/json"
+ "flag"
+ "fmt"
+ "log"
+ "net/http"
+ "time"
+
+ "github.com/prometheus/client_golang/prometheus/promhttp"
+ "github.com/seaweedfs/seaweedfs/telemetry/server/api"
+ "github.com/seaweedfs/seaweedfs/telemetry/server/dashboard"
+ "github.com/seaweedfs/seaweedfs/telemetry/server/storage"
+)
+
+var (
+ port = flag.Int("port", 8080, "HTTP server port")
+ enableCORS = flag.Bool("cors", true, "Enable CORS for dashboard")
+ logRequests = flag.Bool("log", true, "Log incoming requests")
+ enableDashboard = flag.Bool("dashboard", true, "Enable built-in dashboard (optional when using Grafana)")
+ cleanupInterval = flag.Duration("cleanup", 24*time.Hour, "Cleanup interval for old instances")
+ maxInstanceAge = flag.Duration("max-age", 30*24*time.Hour, "Maximum age for instances before cleanup")
+)
+
+func main() {
+ flag.Parse()
+
+ // Create Prometheus storage instance
+ store := storage.NewPrometheusStorage()
+
+ // Start cleanup routine
+ go func() {
+ ticker := time.NewTicker(*cleanupInterval)
+ defer ticker.Stop()
+ for range ticker.C {
+ store.CleanupOldInstances(*maxInstanceAge)
+ }
+ }()
+
+ // Setup HTTP handlers
+ mux := http.NewServeMux()
+
+ // Prometheus metrics endpoint
+ mux.Handle("/metrics", promhttp.Handler())
+
+ // API endpoints
+ apiHandler := api.NewHandler(store)
+ mux.HandleFunc("/api/collect", corsMiddleware(logMiddleware(apiHandler.CollectTelemetry)))
+ mux.HandleFunc("/api/stats", corsMiddleware(logMiddleware(apiHandler.GetStats)))
+ mux.HandleFunc("/api/instances", corsMiddleware(logMiddleware(apiHandler.GetInstances)))
+ mux.HandleFunc("/api/metrics", corsMiddleware(logMiddleware(apiHandler.GetMetrics)))
+
+ // Dashboard (optional)
+ if *enableDashboard {
+ dashboardHandler := dashboard.NewHandler()
+ mux.HandleFunc("/", corsMiddleware(dashboardHandler.ServeIndex))
+ mux.HandleFunc("/dashboard", corsMiddleware(dashboardHandler.ServeIndex))
+ mux.Handle("/static/", http.StripPrefix("/static/", http.FileServer(http.Dir("./static"))))
+ }
+
+ // Health check
+ mux.HandleFunc("/health", func(w http.ResponseWriter, r *http.Request) {
+ w.Header().Set("Content-Type", "application/json")
+ json.NewEncoder(w).Encode(map[string]string{
+ "status": "ok",
+ "time": time.Now().UTC().Format(time.RFC3339),
+ })
+ })
+
+ addr := fmt.Sprintf(":%d", *port)
+ log.Printf("Starting telemetry server on %s", addr)
+ log.Printf("Prometheus metrics: http://localhost%s/metrics", addr)
+ if *enableDashboard {
+ log.Printf("Dashboard: http://localhost%s/dashboard", addr)
+ }
+ log.Printf("Cleanup interval: %v, Max instance age: %v", *cleanupInterval, *maxInstanceAge)
+
+ if err := http.ListenAndServe(addr, mux); err != nil {
+ log.Fatalf("Server failed: %v", err)
+ }
+}
+
+func corsMiddleware(next http.HandlerFunc) http.HandlerFunc {
+ return func(w http.ResponseWriter, r *http.Request) {
+ if *enableCORS {
+ w.Header().Set("Access-Control-Allow-Origin", "*")
+ w.Header().Set("Access-Control-Allow-Methods", "GET, POST, OPTIONS")
+ w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization")
+ }
+
+ if r.Method == "OPTIONS" {
+ w.WriteHeader(http.StatusOK)
+ return
+ }
+
+ next(w, r)
+ }
+}
+
+func logMiddleware(next http.HandlerFunc) http.HandlerFunc {
+ return func(w http.ResponseWriter, r *http.Request) {
+ if *logRequests {
+ start := time.Now()
+ next(w, r)
+ log.Printf("%s %s %s %v", r.Method, r.URL.Path, r.RemoteAddr, time.Since(start))
+ } else {
+ next(w, r)
+ }
+ }
+}
diff --git a/telemetry/server/storage/prometheus.go b/telemetry/server/storage/prometheus.go
new file mode 100644
index 000000000..d25dd669a
--- /dev/null
+++ b/telemetry/server/storage/prometheus.go
@@ -0,0 +1,245 @@
+package storage
+
+import (
+ "encoding/json"
+ "sync"
+ "time"
+
+ "github.com/prometheus/client_golang/prometheus"
+ "github.com/prometheus/client_golang/prometheus/promauto"
+ "github.com/seaweedfs/seaweedfs/telemetry/proto"
+)
+
+type PrometheusStorage struct {
+ // Prometheus metrics
+ totalClusters prometheus.Gauge
+ activeClusters prometheus.Gauge
+ volumeServerCount *prometheus.GaugeVec
+ totalDiskBytes *prometheus.GaugeVec
+ totalVolumeCount *prometheus.GaugeVec
+ filerCount *prometheus.GaugeVec
+ brokerCount *prometheus.GaugeVec
+ clusterInfo *prometheus.GaugeVec
+ telemetryReceived prometheus.Counter
+
+ // In-memory storage for API endpoints (if needed)
+ mu sync.RWMutex
+ instances map[string]*telemetryData
+ stats map[string]interface{}
+}
+
+// telemetryData is an internal struct that includes the received timestamp
+type telemetryData struct {
+ *proto.TelemetryData
+ ReceivedAt time.Time `json:"received_at"`
+}
+
+func NewPrometheusStorage() *PrometheusStorage {
+ return &PrometheusStorage{
+ totalClusters: promauto.NewGauge(prometheus.GaugeOpts{
+ Name: "seaweedfs_telemetry_total_clusters",
+ Help: "Total number of unique SeaweedFS clusters (last 30 days)",
+ }),
+ activeClusters: promauto.NewGauge(prometheus.GaugeOpts{
+ Name: "seaweedfs_telemetry_active_clusters",
+ Help: "Number of active SeaweedFS clusters (last 7 days)",
+ }),
+ volumeServerCount: promauto.NewGaugeVec(prometheus.GaugeOpts{
+ Name: "seaweedfs_telemetry_volume_servers",
+ Help: "Number of volume servers per cluster",
+ }, []string{"cluster_id", "version", "os", "deployment"}),
+ totalDiskBytes: promauto.NewGaugeVec(prometheus.GaugeOpts{
+ Name: "seaweedfs_telemetry_disk_bytes",
+ Help: "Total disk usage in bytes per cluster",
+ }, []string{"cluster_id", "version", "os", "deployment"}),
+ totalVolumeCount: promauto.NewGaugeVec(prometheus.GaugeOpts{
+ Name: "seaweedfs_telemetry_volume_count",
+ Help: "Total number of volumes per cluster",
+ }, []string{"cluster_id", "version", "os", "deployment"}),
+ filerCount: promauto.NewGaugeVec(prometheus.GaugeOpts{
+ Name: "seaweedfs_telemetry_filer_count",
+ Help: "Number of filer servers per cluster",
+ }, []string{"cluster_id", "version", "os", "deployment"}),
+ brokerCount: promauto.NewGaugeVec(prometheus.GaugeOpts{
+ Name: "seaweedfs_telemetry_broker_count",
+ Help: "Number of broker servers per cluster",
+ }, []string{"cluster_id", "version", "os", "deployment"}),
+ clusterInfo: promauto.NewGaugeVec(prometheus.GaugeOpts{
+ Name: "seaweedfs_telemetry_cluster_info",
+ Help: "Cluster information (always 1, labels contain metadata)",
+ }, []string{"cluster_id", "version", "os", "deployment", "features"}),
+ telemetryReceived: promauto.NewCounter(prometheus.CounterOpts{
+ Name: "seaweedfs_telemetry_reports_received_total",
+ Help: "Total number of telemetry reports received",
+ }),
+ instances: make(map[string]*telemetryData),
+ stats: make(map[string]interface{}),
+ }
+}
+
+func (s *PrometheusStorage) StoreTelemetry(data *proto.TelemetryData) error {
+ s.mu.Lock()
+ defer s.mu.Unlock()
+
+ // Update Prometheus metrics
+ labels := prometheus.Labels{
+ "cluster_id": data.ClusterId,
+ "version": data.Version,
+ "os": data.Os,
+ "deployment": data.Deployment,
+ }
+
+ s.volumeServerCount.With(labels).Set(float64(data.VolumeServerCount))
+ s.totalDiskBytes.With(labels).Set(float64(data.TotalDiskBytes))
+ s.totalVolumeCount.With(labels).Set(float64(data.TotalVolumeCount))
+ s.filerCount.With(labels).Set(float64(data.FilerCount))
+ s.brokerCount.With(labels).Set(float64(data.BrokerCount))
+
+ // Features as JSON string for the label
+ featuresJSON, _ := json.Marshal(data.Features)
+ infoLabels := prometheus.Labels{
+ "cluster_id": data.ClusterId,
+ "version": data.Version,
+ "os": data.Os,
+ "deployment": data.Deployment,
+ "features": string(featuresJSON),
+ }
+ s.clusterInfo.With(infoLabels).Set(1)
+
+ s.telemetryReceived.Inc()
+
+ // Store in memory for API endpoints
+ s.instances[data.ClusterId] = &telemetryData{
+ TelemetryData: data,
+ ReceivedAt: time.Now().UTC(),
+ }
+
+ // Update aggregated stats
+ s.updateStats()
+
+ return nil
+}
+
+func (s *PrometheusStorage) GetStats() (map[string]interface{}, error) {
+ s.mu.RLock()
+ defer s.mu.RUnlock()
+
+ // Return cached stats
+ result := make(map[string]interface{})
+ for k, v := range s.stats {
+ result[k] = v
+ }
+ return result, nil
+}
+
+func (s *PrometheusStorage) GetInstances(limit int) ([]*telemetryData, error) {
+ s.mu.RLock()
+ defer s.mu.RUnlock()
+
+ var instances []*telemetryData
+ count := 0
+ for _, instance := range s.instances {
+ if count >= limit {
+ break
+ }
+ instances = append(instances, instance)
+ count++
+ }
+
+ return instances, nil
+}
+
+func (s *PrometheusStorage) GetMetrics(days int) (map[string]interface{}, error) {
+ s.mu.RLock()
+ defer s.mu.RUnlock()
+
+ // Return current metrics from in-memory storage
+ // Historical data should be queried from Prometheus directly
+ cutoff := time.Now().AddDate(0, 0, -days)
+
+ var volumeServers []map[string]interface{}
+ var diskUsage []map[string]interface{}
+
+ for _, instance := range s.instances {
+ if instance.ReceivedAt.After(cutoff) {
+ volumeServers = append(volumeServers, map[string]interface{}{
+ "date": instance.ReceivedAt.Format("2006-01-02"),
+ "value": instance.TelemetryData.VolumeServerCount,
+ })
+ diskUsage = append(diskUsage, map[string]interface{}{
+ "date": instance.ReceivedAt.Format("2006-01-02"),
+ "value": instance.TelemetryData.TotalDiskBytes,
+ })
+ }
+ }
+
+ return map[string]interface{}{
+ "volume_servers": volumeServers,
+ "disk_usage": diskUsage,
+ }, nil
+}
+
+func (s *PrometheusStorage) updateStats() {
+ now := time.Now()
+ last7Days := now.AddDate(0, 0, -7)
+ last30Days := now.AddDate(0, 0, -30)
+
+ totalInstances := 0
+ activeInstances := 0
+ versions := make(map[string]int)
+ osDistribution := make(map[string]int)
+ deployments := make(map[string]int)
+
+ for _, instance := range s.instances {
+ if instance.ReceivedAt.After(last30Days) {
+ totalInstances++
+ }
+ if instance.ReceivedAt.After(last7Days) {
+ activeInstances++
+ versions[instance.TelemetryData.Version]++
+ osDistribution[instance.TelemetryData.Os]++
+ deployments[instance.TelemetryData.Deployment]++
+ }
+ }
+
+ // Update Prometheus gauges
+ s.totalClusters.Set(float64(totalInstances))
+ s.activeClusters.Set(float64(activeInstances))
+
+ // Update cached stats for API
+ s.stats = map[string]interface{}{
+ "total_instances": totalInstances,
+ "active_instances": activeInstances,
+ "versions": versions,
+ "os_distribution": osDistribution,
+ "deployments": deployments,
+ }
+}
+
+// CleanupOldInstances removes instances older than the specified duration
+func (s *PrometheusStorage) CleanupOldInstances(maxAge time.Duration) {
+ s.mu.Lock()
+ defer s.mu.Unlock()
+
+ cutoff := time.Now().Add(-maxAge)
+ for instanceID, instance := range s.instances {
+ if instance.ReceivedAt.Before(cutoff) {
+ delete(s.instances, instanceID)
+
+ // Remove from Prometheus metrics
+ labels := prometheus.Labels{
+ "cluster_id": instance.TelemetryData.ClusterId,
+ "version": instance.TelemetryData.Version,
+ "os": instance.TelemetryData.Os,
+ "deployment": instance.TelemetryData.Deployment,
+ }
+ s.volumeServerCount.Delete(labels)
+ s.totalDiskBytes.Delete(labels)
+ s.totalVolumeCount.Delete(labels)
+ s.filerCount.Delete(labels)
+ s.brokerCount.Delete(labels)
+ }
+ }
+
+ s.updateStats()
+}
diff --git a/telemetry/test/integration.go b/telemetry/test/integration.go
new file mode 100644
index 000000000..d0eea4777
--- /dev/null
+++ b/telemetry/test/integration.go
@@ -0,0 +1,315 @@
+package main
+
+import (
+ "context"
+ "fmt"
+ "io"
+ "log"
+ "net/http"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "strings"
+ "syscall"
+ "time"
+
+ "github.com/seaweedfs/seaweedfs/telemetry/proto"
+ "github.com/seaweedfs/seaweedfs/weed/telemetry"
+ protobuf "google.golang.org/protobuf/proto"
+)
+
+const (
+ serverPort = "18080" // Use different port to avoid conflicts
+ serverURL = "http://localhost:" + serverPort
+)
+
+func main() {
+ fmt.Println("🧪 Starting SeaweedFS Telemetry Integration Test")
+
+ // Start telemetry server
+ fmt.Println("📡 Starting telemetry server...")
+ serverCmd, err := startTelemetryServer()
+ if err != nil {
+ log.Fatalf("❌ Failed to start telemetry server: %v", err)
+ }
+ defer stopServer(serverCmd)
+
+ // Wait for server to start
+ if !waitForServer(serverURL+"/health", 15*time.Second) {
+ log.Fatal("❌ Telemetry server failed to start")
+ }
+ fmt.Println("✅ Telemetry server started successfully")
+
+ // Test protobuf marshaling first
+ fmt.Println("🔧 Testing protobuf marshaling...")
+ if err := testProtobufMarshaling(); err != nil {
+ log.Fatalf("❌ Protobuf marshaling test failed: %v", err)
+ }
+ fmt.Println("✅ Protobuf marshaling test passed")
+
+ // Test protobuf client
+ fmt.Println("🔄 Testing protobuf telemetry client...")
+ if err := testTelemetryClient(); err != nil {
+ log.Fatalf("❌ Telemetry client test failed: %v", err)
+ }
+ fmt.Println("✅ Telemetry client test passed")
+
+ // Test server metrics endpoint
+ fmt.Println("📊 Testing Prometheus metrics endpoint...")
+ if err := testMetricsEndpoint(); err != nil {
+ log.Fatalf("❌ Metrics endpoint test failed: %v", err)
+ }
+ fmt.Println("✅ Metrics endpoint test passed")
+
+ // Test stats API
+ fmt.Println("📈 Testing stats API...")
+ if err := testStatsAPI(); err != nil {
+ log.Fatalf("❌ Stats API test failed: %v", err)
+ }
+ fmt.Println("✅ Stats API test passed")
+
+ // Test instances API
+ fmt.Println("📋 Testing instances API...")
+ if err := testInstancesAPI(); err != nil {
+ log.Fatalf("❌ Instances API test failed: %v", err)
+ }
+ fmt.Println("✅ Instances API test passed")
+
+ fmt.Println("🎉 All telemetry integration tests passed!")
+}
+
+func startTelemetryServer() (*exec.Cmd, error) {
+ // Get the directory where this test is running
+ testDir, err := os.Getwd()
+ if err != nil {
+ return nil, fmt.Errorf("failed to get working directory: %v", err)
+ }
+
+ // Navigate to the server directory (from main seaweedfs directory)
+ serverDir := filepath.Join(testDir, "telemetry", "server")
+
+ cmd := exec.Command("go", "run", ".",
+ "-port="+serverPort,
+ "-dashboard=false",
+ "-cleanup=1m",
+ "-max-age=1h")
+
+ cmd.Dir = serverDir
+
+ // Create log files for server output
+ logFile, err := os.Create("telemetry-server-test.log")
+ if err != nil {
+ return nil, fmt.Errorf("failed to create log file: %v", err)
+ }
+
+ cmd.Stdout = logFile
+ cmd.Stderr = logFile
+
+ if err := cmd.Start(); err != nil {
+ return nil, fmt.Errorf("failed to start server: %v", err)
+ }
+
+ return cmd, nil
+}
+
+func stopServer(cmd *exec.Cmd) {
+ if cmd != nil && cmd.Process != nil {
+ cmd.Process.Signal(syscall.SIGTERM)
+ cmd.Wait()
+
+ // Clean up log file
+ os.Remove("telemetry-server-test.log")
+ }
+}
+
+func waitForServer(url string, timeout time.Duration) bool {
+ ctx, cancel := context.WithTimeout(context.Background(), timeout)
+ defer cancel()
+
+ fmt.Printf("⏳ Waiting for server at %s...\n", url)
+
+ for {
+ select {
+ case <-ctx.Done():
+ return false
+ default:
+ resp, err := http.Get(url)
+ if err == nil {
+ resp.Body.Close()
+ if resp.StatusCode == http.StatusOK {
+ return true
+ }
+ }
+ time.Sleep(500 * time.Millisecond)
+ }
+ }
+}
+
+func testProtobufMarshaling() error {
+ // Test protobuf marshaling/unmarshaling
+ testData := &proto.TelemetryData{
+ ClusterId: "test-cluster-12345",
+ Version: "test-3.45",
+ Os: "linux/amd64",
+ Features: []string{"filer", "s3api"},
+ Deployment: "test",
+ VolumeServerCount: 2,
+ TotalDiskBytes: 1000000,
+ TotalVolumeCount: 10,
+ FilerCount: 1,
+ BrokerCount: 1,
+ Timestamp: time.Now().Unix(),
+ }
+
+ // Marshal
+ data, err := protobuf.Marshal(testData)
+ if err != nil {
+ return fmt.Errorf("failed to marshal protobuf: %v", err)
+ }
+
+ fmt.Printf(" Protobuf size: %d bytes\n", len(data))
+
+ // Unmarshal
+ testData2 := &proto.TelemetryData{}
+ if err := protobuf.Unmarshal(data, testData2); err != nil {
+ return fmt.Errorf("failed to unmarshal protobuf: %v", err)
+ }
+
+ // Verify data
+ if testData2.ClusterId != testData.ClusterId {
+ return fmt.Errorf("protobuf data mismatch: expected %s, got %s",
+ testData.ClusterId, testData2.ClusterId)
+ }
+
+ if testData2.VolumeServerCount != testData.VolumeServerCount {
+ return fmt.Errorf("volume server count mismatch: expected %d, got %d",
+ testData.VolumeServerCount, testData2.VolumeServerCount)
+ }
+
+ return nil
+}
+
+func testTelemetryClient() error {
+ // Create telemetry client
+ client := telemetry.NewClient(serverURL+"/api/collect", true)
+
+ // Create test data using protobuf format
+ testData := &proto.TelemetryData{
+ Version: "test-3.45",
+ Os: "linux/amd64",
+ Features: []string{"filer", "s3api", "mq"},
+ Deployment: "integration-test",
+ VolumeServerCount: 3,
+ TotalDiskBytes: 1073741824, // 1GB
+ TotalVolumeCount: 50,
+ FilerCount: 2,
+ BrokerCount: 1,
+ Timestamp: time.Now().Unix(),
+ }
+
+ // Send telemetry data
+ if err := client.SendTelemetry(testData); err != nil {
+ return fmt.Errorf("failed to send telemetry: %v", err)
+ }
+
+ fmt.Printf(" Sent telemetry for cluster: %s\n", client.GetInstanceID())
+
+ // Wait a bit for processing
+ time.Sleep(2 * time.Second)
+
+ return nil
+}
+
+func testMetricsEndpoint() error {
+ resp, err := http.Get(serverURL + "/metrics")
+ if err != nil {
+ return fmt.Errorf("failed to get metrics: %v", err)
+ }
+ defer resp.Body.Close()
+
+ if resp.StatusCode != http.StatusOK {
+ return fmt.Errorf("metrics endpoint returned status %d", resp.StatusCode)
+ }
+
+ // Read response and check for expected metrics
+ content, err := io.ReadAll(resp.Body)
+ if err != nil {
+ return fmt.Errorf("failed to read metrics response: %v", err)
+ }
+
+ contentStr := string(content)
+ expectedMetrics := []string{
+ "seaweedfs_telemetry_total_clusters",
+ "seaweedfs_telemetry_active_clusters",
+ "seaweedfs_telemetry_reports_received_total",
+ "seaweedfs_telemetry_volume_servers",
+ "seaweedfs_telemetry_disk_bytes",
+ "seaweedfs_telemetry_volume_count",
+ "seaweedfs_telemetry_filer_count",
+ "seaweedfs_telemetry_broker_count",
+ }
+
+ for _, metric := range expectedMetrics {
+ if !strings.Contains(contentStr, metric) {
+ return fmt.Errorf("missing expected metric: %s", metric)
+ }
+ }
+
+ // Check that we have at least one report received
+ if !strings.Contains(contentStr, "seaweedfs_telemetry_reports_received_total 1") {
+ fmt.Printf(" Warning: Expected at least 1 report received, metrics content:\n%s\n", contentStr)
+ }
+
+ fmt.Printf(" Found %d expected metrics\n", len(expectedMetrics))
+
+ return nil
+}
+
+func testStatsAPI() error {
+ resp, err := http.Get(serverURL + "/api/stats")
+ if err != nil {
+ return fmt.Errorf("failed to get stats: %v", err)
+ }
+ defer resp.Body.Close()
+
+ if resp.StatusCode != http.StatusOK {
+ return fmt.Errorf("stats API returned status %d", resp.StatusCode)
+ }
+
+ // Read and verify JSON response
+ content, err := io.ReadAll(resp.Body)
+ if err != nil {
+ return fmt.Errorf("failed to read stats response: %v", err)
+ }
+
+ contentStr := string(content)
+ if !strings.Contains(contentStr, "total_instances") {
+ return fmt.Errorf("stats response missing total_instances field")
+ }
+
+ fmt.Printf(" Stats response: %s\n", contentStr)
+
+ return nil
+}
+
+func testInstancesAPI() error {
+ resp, err := http.Get(serverURL + "/api/instances?limit=10")
+ if err != nil {
+ return fmt.Errorf("failed to get instances: %v", err)
+ }
+ defer resp.Body.Close()
+
+ if resp.StatusCode != http.StatusOK {
+ return fmt.Errorf("instances API returned status %d", resp.StatusCode)
+ }
+
+ // Read response
+ content, err := io.ReadAll(resp.Body)
+ if err != nil {
+ return fmt.Errorf("failed to read instances response: %v", err)
+ }
+
+ fmt.Printf(" Instances response length: %d bytes\n", len(content))
+
+ return nil
+}
diff --git a/weed/command/master.go b/weed/command/master.go
index a8cdf76c6..0761687d9 100644
--- a/weed/command/master.go
+++ b/weed/command/master.go
@@ -61,6 +61,8 @@ type MasterOptions struct {
electionTimeout *time.Duration
raftHashicorp *bool
raftBootstrap *bool
+ telemetryUrl *string
+ telemetryEnabled *bool
}
func init() {
@@ -88,6 +90,8 @@ func init() {
m.electionTimeout = cmdMaster.Flag.Duration("electionTimeout", 10*time.Second, "election timeout of master servers")
m.raftHashicorp = cmdMaster.Flag.Bool("raftHashicorp", false, "use hashicorp raft")
m.raftBootstrap = cmdMaster.Flag.Bool("raftBootstrap", false, "Whether to bootstrap the Raft cluster")
+ m.telemetryUrl = cmdMaster.Flag.String("telemetry.url", "https://telemetry.seaweedfs.com:3091/api/collect", "telemetry server URL to send usage statistics")
+ m.telemetryEnabled = cmdMaster.Flag.Bool("telemetry", false, "enable telemetry reporting")
}
var cmdMaster = &Command{
@@ -332,5 +336,7 @@ func (m *MasterOptions) toMasterOption(whiteList []string) *weed_server.MasterOp
DisableHttp: *m.disableHttp,
MetricsAddress: *m.metricsAddress,
MetricsIntervalSec: *m.metricsIntervalSec,
+ TelemetryUrl: *m.telemetryUrl,
+ TelemetryEnabled: *m.telemetryEnabled,
}
}
diff --git a/weed/command/server.go b/weed/command/server.go
index dd3b0c8b4..c1f80b325 100644
--- a/weed/command/server.go
+++ b/weed/command/server.go
@@ -104,6 +104,8 @@ func init() {
masterOptions.raftBootstrap = cmdServer.Flag.Bool("master.raftBootstrap", false, "Whether to bootstrap the Raft cluster")
masterOptions.heartbeatInterval = cmdServer.Flag.Duration("master.heartbeatInterval", 300*time.Millisecond, "heartbeat interval of master servers, and will be randomly multiplied by [1, 1.25)")
masterOptions.electionTimeout = cmdServer.Flag.Duration("master.electionTimeout", 10*time.Second, "election timeout of master servers")
+ masterOptions.telemetryUrl = cmdServer.Flag.String("master.telemetry.url", "https://telemetry.seaweedfs.com:3091/api/collect", "telemetry server URL to send usage statistics")
+ masterOptions.telemetryEnabled = cmdServer.Flag.Bool("master.telemetry", false, "enable telemetry reporting")
filerOptions.filerGroup = cmdServer.Flag.String("filer.filerGroup", "", "share metadata with other filers in the same filerGroup")
filerOptions.collection = cmdServer.Flag.String("filer.collection", "", "all data will be stored in this collection")
diff --git a/weed/server/master_server.go b/weed/server/master_server.go
index 6569fdbd4..48576adf4 100644
--- a/weed/server/master_server.go
+++ b/weed/server/master_server.go
@@ -8,11 +8,13 @@ import (
"net/url"
"os"
"regexp"
+ "runtime"
"strings"
"sync"
"time"
"github.com/seaweedfs/seaweedfs/weed/stats"
+ "github.com/seaweedfs/seaweedfs/weed/telemetry"
"github.com/seaweedfs/seaweedfs/weed/cluster"
"github.com/seaweedfs/seaweedfs/weed/pb"
@@ -30,6 +32,7 @@ import (
"github.com/seaweedfs/seaweedfs/weed/topology"
"github.com/seaweedfs/seaweedfs/weed/util"
util_http "github.com/seaweedfs/seaweedfs/weed/util/http"
+ "github.com/seaweedfs/seaweedfs/weed/util/version"
"github.com/seaweedfs/seaweedfs/weed/wdclient"
)
@@ -52,6 +55,8 @@ type MasterOption struct {
MetricsAddress string
MetricsIntervalSec int
IsFollower bool
+ TelemetryUrl string
+ TelemetryEnabled bool
}
type MasterServer struct {
@@ -76,6 +81,9 @@ type MasterServer struct {
adminLocks *AdminLocks
Cluster *cluster.Cluster
+
+ // telemetry
+ telemetryCollector *telemetry.Collector
}
func NewMasterServer(r *mux.Router, option *MasterOption, peers map[string]pb.ServerAddress) *MasterServer {
@@ -131,6 +139,28 @@ func NewMasterServer(r *mux.Router, option *MasterOption, peers map[string]pb.Se
ms.vg = topology.NewDefaultVolumeGrowth()
glog.V(0).Infoln("Volume Size Limit is", ms.option.VolumeSizeLimitMB, "MB")
+ // Initialize telemetry after topology is created
+ if option.TelemetryEnabled && option.TelemetryUrl != "" {
+ telemetryClient := telemetry.NewClient(option.TelemetryUrl, option.TelemetryEnabled)
+ ms.telemetryCollector = telemetry.NewCollector(telemetryClient, ms.Topo, ms.Cluster)
+ ms.telemetryCollector.SetMasterServer(ms)
+
+ // Set version and OS information
+ ms.telemetryCollector.SetVersion(version.VERSION_NUMBER)
+ ms.telemetryCollector.SetOS(runtime.GOOS + "/" + runtime.GOARCH)
+
+ // Determine features and deployment type
+ features := []string{"master"}
+ if len(peers) > 1 {
+ features = append(features, "cluster")
+ }
+ ms.telemetryCollector.SetFeatures(features)
+ ms.telemetryCollector.SetDeployment(telemetry.DetermineDeployment(true, false, len(peers)))
+
+ // Start periodic telemetry collection (every 24 hours)
+ ms.telemetryCollector.StartPeriodicCollection(24 * time.Hour)
+ }
+
ms.guard = security.NewGuard(append(ms.option.WhiteList, whiteList...), signingKey, expiresAfterSec, readSigningKey, readExpiresAfterSec)
handleStaticResources2(r)
diff --git a/weed/telemetry/client.go b/weed/telemetry/client.go
new file mode 100644
index 000000000..528984d4d
--- /dev/null
+++ b/weed/telemetry/client.go
@@ -0,0 +1,100 @@
+package telemetry
+
+import (
+ "bytes"
+ "fmt"
+ "net/http"
+ "time"
+
+ "github.com/google/uuid"
+ "github.com/seaweedfs/seaweedfs/telemetry/proto"
+ "github.com/seaweedfs/seaweedfs/weed/glog"
+ protobuf "google.golang.org/protobuf/proto"
+)
+
+type Client struct {
+ url string
+ enabled bool
+ instanceID string
+ httpClient *http.Client
+}
+
+// NewClient creates a new telemetry client
+func NewClient(url string, enabled bool) *Client {
+ return &Client{
+ url: url,
+ enabled: enabled,
+ instanceID: uuid.New().String(), // Generate UUID in memory only
+ httpClient: &http.Client{
+ Timeout: 10 * time.Second,
+ },
+ }
+}
+
+// IsEnabled returns whether telemetry is enabled
+func (c *Client) IsEnabled() bool {
+ return c.enabled && c.url != ""
+}
+
+// SendTelemetry sends telemetry data synchronously using protobuf format
+func (c *Client) SendTelemetry(data *proto.TelemetryData) error {
+ if !c.IsEnabled() {
+ return nil
+ }
+
+ // Set the cluster ID
+ data.ClusterId = c.instanceID
+
+ return c.sendProtobuf(data)
+}
+
+// SendTelemetryAsync sends telemetry data asynchronously
+func (c *Client) SendTelemetryAsync(data *proto.TelemetryData) {
+ if !c.IsEnabled() {
+ return
+ }
+
+ go func() {
+ if err := c.SendTelemetry(data); err != nil {
+ glog.V(1).Infof("Failed to send telemetry: %v", err)
+ }
+ }()
+}
+
+// sendProtobuf sends data using protobuf format
+func (c *Client) sendProtobuf(data *proto.TelemetryData) error {
+ req := &proto.TelemetryRequest{
+ Data: data,
+ }
+
+ body, err := protobuf.Marshal(req)
+ if err != nil {
+ return fmt.Errorf("failed to marshal protobuf: %v", err)
+ }
+
+ httpReq, err := http.NewRequest("POST", c.url, bytes.NewBuffer(body))
+ if err != nil {
+ return fmt.Errorf("failed to create request: %v", err)
+ }
+
+ httpReq.Header.Set("Content-Type", "application/x-protobuf")
+ httpReq.Header.Set("User-Agent", fmt.Sprintf("SeaweedFS/%s", data.Version))
+
+ resp, err := c.httpClient.Do(httpReq)
+ if err != nil {
+ return fmt.Errorf("failed to send request: %v", err)
+ }
+ defer resp.Body.Close()
+
+ if resp.StatusCode != http.StatusOK {
+ return fmt.Errorf("server returned status %d", resp.StatusCode)
+ }
+
+ glog.V(2).Infof("Telemetry sent successfully via protobuf")
+ return nil
+}
+
+// GetInstanceID returns the current instance ID
+func (c *Client) GetInstanceID() string {
+ return c.instanceID
+}
diff --git a/weed/telemetry/collector.go b/weed/telemetry/collector.go
new file mode 100644
index 000000000..7991d92c8
--- /dev/null
+++ b/weed/telemetry/collector.go
@@ -0,0 +1,218 @@
+package telemetry
+
+import (
+ "time"
+
+ "github.com/seaweedfs/seaweedfs/telemetry/proto"
+ "github.com/seaweedfs/seaweedfs/weed/cluster"
+ "github.com/seaweedfs/seaweedfs/weed/glog"
+ "github.com/seaweedfs/seaweedfs/weed/topology"
+)
+
+type Collector struct {
+ client *Client
+ topo *topology.Topology
+ cluster *cluster.Cluster
+ masterServer interface{} // Will be set to *weed_server.MasterServer to access client tracking
+ features []string
+ deployment string
+ version string
+ os string
+}
+
+// NewCollector creates a new telemetry collector
+func NewCollector(client *Client, topo *topology.Topology, cluster *cluster.Cluster) *Collector {
+ return &Collector{
+ client: client,
+ topo: topo,
+ cluster: cluster,
+ masterServer: nil,
+ features: []string{},
+ deployment: "unknown",
+ version: "unknown",
+ os: "unknown",
+ }
+}
+
+// SetFeatures sets the list of enabled features
+func (c *Collector) SetFeatures(features []string) {
+ c.features = features
+}
+
+// SetDeployment sets the deployment type (standalone, cluster, etc.)
+func (c *Collector) SetDeployment(deployment string) {
+ c.deployment = deployment
+}
+
+// SetVersion sets the SeaweedFS version
+func (c *Collector) SetVersion(version string) {
+ c.version = version
+}
+
+// SetOS sets the operating system information
+func (c *Collector) SetOS(os string) {
+ c.os = os
+}
+
+// SetMasterServer sets a reference to the master server for client tracking
+func (c *Collector) SetMasterServer(masterServer interface{}) {
+ c.masterServer = masterServer
+}
+
+// CollectAndSendAsync collects telemetry data and sends it asynchronously
+func (c *Collector) CollectAndSendAsync() {
+ if !c.client.IsEnabled() {
+ return
+ }
+
+ go func() {
+ data := c.collectData()
+ c.client.SendTelemetryAsync(data)
+ }()
+}
+
+// StartPeriodicCollection starts sending telemetry data periodically
+func (c *Collector) StartPeriodicCollection(interval time.Duration) {
+ if !c.client.IsEnabled() {
+ glog.V(1).Infof("Telemetry is disabled, skipping periodic collection")
+ return
+ }
+
+ glog.V(0).Infof("Starting telemetry collection every %v", interval)
+
+ // Send initial telemetry after a short delay
+ go func() {
+ time.Sleep(30 * time.Second) // Wait for cluster to stabilize
+ c.CollectAndSendAsync()
+ }()
+
+ // Start periodic collection
+ ticker := time.NewTicker(interval)
+ go func() {
+ defer ticker.Stop()
+ for range ticker.C {
+ c.CollectAndSendAsync()
+ }
+ }()
+}
+
+// collectData gathers telemetry data from the topology
+func (c *Collector) collectData() *proto.TelemetryData {
+ data := &proto.TelemetryData{
+ Version: c.version,
+ Os: c.os,
+ Features: c.features,
+ Deployment: c.deployment,
+ Timestamp: time.Now().Unix(),
+ }
+
+ if c.topo != nil {
+ // Collect volume server count
+ data.VolumeServerCount = int32(c.countVolumeServers())
+
+ // Collect total disk usage and volume count
+ diskBytes, volumeCount := c.collectVolumeStats()
+ data.TotalDiskBytes = diskBytes
+ data.TotalVolumeCount = int32(volumeCount)
+ }
+
+ if c.cluster != nil {
+ // Collect filer and broker counts
+ data.FilerCount = int32(c.countFilers())
+ data.BrokerCount = int32(c.countBrokers())
+ }
+
+ return data
+}
+
+// countVolumeServers counts the number of active volume servers
+func (c *Collector) countVolumeServers() int {
+ count := 0
+ for _, dcNode := range c.topo.Children() {
+ dc := dcNode.(*topology.DataCenter)
+ for _, rackNode := range dc.Children() {
+ rack := rackNode.(*topology.Rack)
+ for range rack.Children() {
+ count++
+ }
+ }
+ }
+ return count
+}
+
+// collectVolumeStats collects total disk usage and volume count
+func (c *Collector) collectVolumeStats() (uint64, int) {
+ var totalDiskBytes uint64
+ var totalVolumeCount int
+
+ for _, dcNode := range c.topo.Children() {
+ dc := dcNode.(*topology.DataCenter)
+ for _, rackNode := range dc.Children() {
+ rack := rackNode.(*topology.Rack)
+ for _, dnNode := range rack.Children() {
+ dn := dnNode.(*topology.DataNode)
+ volumes := dn.GetVolumes()
+ for _, volumeInfo := range volumes {
+ totalVolumeCount++
+ totalDiskBytes += volumeInfo.Size
+ }
+ }
+ }
+ }
+
+ return totalDiskBytes, totalVolumeCount
+}
+
+// countFilers counts the number of active filer servers across all groups
+func (c *Collector) countFilers() int {
+ // Count all filer-type nodes in the cluster
+ // This includes both pure filer servers and S3 servers (which register as filers)
+ count := 0
+ for _, groupName := range c.getAllFilerGroups() {
+ nodes := c.cluster.ListClusterNode(cluster.FilerGroupName(groupName), cluster.FilerType)
+ count += len(nodes)
+ }
+ return count
+}
+
+// countBrokers counts the number of active broker servers
+func (c *Collector) countBrokers() int {
+ // Count brokers across all broker groups
+ count := 0
+ for _, groupName := range c.getAllBrokerGroups() {
+ nodes := c.cluster.ListClusterNode(cluster.FilerGroupName(groupName), cluster.BrokerType)
+ count += len(nodes)
+ }
+ return count
+}
+
+// getAllFilerGroups returns all filer group names
+func (c *Collector) getAllFilerGroups() []string {
+ // For simplicity, we check the default group
+ // In a more sophisticated implementation, we could enumerate all groups
+ return []string{""}
+}
+
+// getAllBrokerGroups returns all broker group names
+func (c *Collector) getAllBrokerGroups() []string {
+ // For simplicity, we check the default group
+ // In a more sophisticated implementation, we could enumerate all groups
+ return []string{""}
+}
+
+// DetermineDeployment determines the deployment type based on configuration
+func DetermineDeployment(isMasterEnabled, isVolumeEnabled bool, peerCount int) string {
+ if isMasterEnabled && isVolumeEnabled {
+ if peerCount > 1 {
+ return "cluster"
+ }
+ return "standalone"
+ }
+ if isMasterEnabled {
+ return "master-only"
+ }
+ if isVolumeEnabled {
+ return "volume-only"
+ }
+ return "unknown"
+}