diff options
Diffstat (limited to 'weed')
| -rw-r--r-- | weed/filer/foundationdb/CONFIGURATION.md | 385 | ||||
| -rw-r--r-- | weed/filer/foundationdb/INSTALL.md | 435 | ||||
| -rw-r--r-- | weed/filer/foundationdb/README.md | 221 | ||||
| -rw-r--r-- | weed/filer/foundationdb/doc.go | 13 | ||||
| -rw-r--r-- | weed/filer/foundationdb/foundationdb_store.go | 575 | ||||
| -rw-r--r-- | weed/filer/foundationdb/foundationdb_store_test.go | 545 | ||||
| -rw-r--r-- | weed/server/filer_server.go | 1 |
7 files changed, 2175 insertions, 0 deletions
diff --git a/weed/filer/foundationdb/CONFIGURATION.md b/weed/filer/foundationdb/CONFIGURATION.md new file mode 100644 index 000000000..80f5bd357 --- /dev/null +++ b/weed/filer/foundationdb/CONFIGURATION.md @@ -0,0 +1,385 @@ +# FoundationDB Filer Store Configuration Reference + +This document provides comprehensive configuration options for the FoundationDB filer store. + +## Configuration Methods + +### 1. Configuration File (filer.toml) + +```toml +[foundationdb] +enabled = true +cluster_file = "/etc/foundationdb/fdb.cluster" +api_version = 740 +timeout = "5s" +max_retry_delay = "1s" +directory_prefix = "seaweedfs" +``` + +### 2. Environment Variables + +All configuration options can be set via environment variables with the `WEED_FOUNDATIONDB_` prefix: + +```bash +export WEED_FOUNDATIONDB_ENABLED=true +export WEED_FOUNDATIONDB_CLUSTER_FILE=/etc/foundationdb/fdb.cluster +export WEED_FOUNDATIONDB_API_VERSION=740 +export WEED_FOUNDATIONDB_TIMEOUT=5s +export WEED_FOUNDATIONDB_MAX_RETRY_DELAY=1s +export WEED_FOUNDATIONDB_DIRECTORY_PREFIX=seaweedfs +``` + +### 3. Command Line Arguments + +While not directly supported, configuration can be specified via config files passed to the `weed` command. + +## Configuration Options + +### Basic Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `enabled` | boolean | `false` | Enable the FoundationDB filer store | +| `cluster_file` | string | `/etc/foundationdb/fdb.cluster` | Path to FoundationDB cluster file | +| `api_version` | integer | `740` | FoundationDB API version to use | + +### Connection Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `timeout` | duration | `5s` | Transaction timeout duration | +| `max_retry_delay` | duration | `1s` | Maximum delay between retries | + +### Storage Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `directory_prefix` | string | `seaweedfs` | Directory prefix for key organization | + +## Configuration Examples + +### Development Environment + +```toml +[foundationdb] +enabled = true +cluster_file = "/var/fdb/config/fdb.cluster" +api_version = 740 +timeout = "10s" +max_retry_delay = "2s" +directory_prefix = "seaweedfs_dev" +``` + +### Production Environment + +```toml +[foundationdb] +enabled = true +cluster_file = "/etc/foundationdb/fdb.cluster" +api_version = 740 +timeout = "30s" +max_retry_delay = "5s" +directory_prefix = "seaweedfs_prod" +``` + +### High-Performance Setup + +```toml +[foundationdb] +enabled = true +cluster_file = "/etc/foundationdb/fdb.cluster" +api_version = 740 +timeout = "60s" +max_retry_delay = "10s" +directory_prefix = "sw" # Shorter prefix for efficiency +``` + +### Path-Specific Configuration + +Configure different FoundationDB settings for different paths: + +```toml +# Default configuration +[foundationdb] +enabled = true +cluster_file = "/etc/foundationdb/fdb.cluster" +directory_prefix = "seaweedfs_main" + +# Backup path with different prefix +[foundationdb.backup] +enabled = true +cluster_file = "/etc/foundationdb/fdb.cluster" +directory_prefix = "seaweedfs_backup" +location = "/backup" +timeout = "120s" + +# Archive path with extended timeouts +[foundationdb.archive] +enabled = true +cluster_file = "/etc/foundationdb/fdb.cluster" +directory_prefix = "seaweedfs_archive" +location = "/archive" +timeout = "300s" +max_retry_delay = "30s" +``` + +## Configuration Validation + +### Required Settings + +The following settings are required for FoundationDB to function: + +1. `enabled = true` +2. `cluster_file` must point to a valid FoundationDB cluster file +3. `api_version` must match your FoundationDB installation + +### Validation Rules + +- `api_version` must be between 600 and 740 +- `timeout` must be a valid duration string (e.g., "5s", "30s", "2m") +- `max_retry_delay` must be a valid duration string +- `cluster_file` must exist and be readable +- `directory_prefix` must not be empty + +### Error Handling + +Invalid configurations will result in startup errors: + +``` +FATAL: Failed to initialize store for foundationdb: invalid timeout duration +FATAL: Failed to initialize store for foundationdb: failed to open FoundationDB database +FATAL: Failed to initialize store for foundationdb: cluster file not found +``` + +## Performance Tuning + +### Timeout Configuration + +| Use Case | Timeout | Max Retry Delay | Notes | +|----------|---------|-----------------|-------| +| Interactive workloads | 5s | 1s | Fast response times | +| Batch processing | 60s | 10s | Handle large operations | +| Archive operations | 300s | 30s | Very large data sets | + +### Connection Pool Settings + +FoundationDB automatically manages connection pooling. No additional configuration needed. + +### Directory Organization + +Use meaningful directory prefixes to organize data: + +```toml +# Separate environments +directory_prefix = "prod_seaweedfs" # Production +directory_prefix = "staging_seaweedfs" # Staging +directory_prefix = "dev_seaweedfs" # Development + +# Separate applications +directory_prefix = "app1_seaweedfs" # Application 1 +directory_prefix = "app2_seaweedfs" # Application 2 +``` + +## Security Configuration + +### Cluster File Security + +Protect the FoundationDB cluster file: + +```bash +# Set proper permissions +sudo chown root:seaweedfs /etc/foundationdb/fdb.cluster +sudo chmod 640 /etc/foundationdb/fdb.cluster +``` + +### Network Security + +FoundationDB supports TLS encryption. Configure in the cluster file: + +``` +description:cluster_id@tls(server1:4500,server2:4500,server3:4500) +``` + +### Access Control + +Use FoundationDB's built-in access control mechanisms when available. + +## Monitoring Configuration + +### Health Check Settings + +Configure health check timeouts appropriately: + +```toml +[foundationdb] +enabled = true +timeout = "10s" # Reasonable timeout for health checks +``` + +### Logging Configuration + +Enable verbose logging for troubleshooting: + +```bash +# Start SeaweedFS with debug logs +WEED_FOUNDATIONDB_ENABLED=true weed -v=2 server -filer +``` + +## Migration Configuration + +### From Other Filer Stores + +When migrating from other filer stores: + +1. Configure both stores temporarily +2. Use path-specific configuration for gradual migration +3. Migrate data using SeaweedFS tools + +```toml +# During migration - keep old store for reads +[leveldb2] +enabled = true +dir = "/old/filer/data" + +# New writes go to FoundationDB +[foundationdb.migration] +enabled = true +location = "/new" +cluster_file = "/etc/foundationdb/fdb.cluster" +``` + +## Backup Configuration + +### Metadata Backup Strategy + +```toml +# Main storage +[foundationdb] +enabled = true +directory_prefix = "seaweedfs_main" + +# Backup storage (different cluster recommended) +[foundationdb.backup] +enabled = true +cluster_file = "/etc/foundationdb/backup_fdb.cluster" +directory_prefix = "seaweedfs_backup" +location = "/backup" +``` + +## Container Configuration + +### Docker Environment Variables + +```bash +# Docker environment +WEED_FOUNDATIONDB_ENABLED=true +WEED_FOUNDATIONDB_CLUSTER_FILE=/var/fdb/config/fdb.cluster +WEED_FOUNDATIONDB_API_VERSION=740 +``` + +### Kubernetes ConfigMap + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: seaweedfs-config +data: + filer.toml: | + [foundationdb] + enabled = true + cluster_file = "/var/fdb/config/cluster_file" + api_version = 740 + timeout = "30s" + max_retry_delay = "5s" + directory_prefix = "k8s_seaweedfs" +``` + +## Troubleshooting Configuration + +### Debug Configuration + +```toml +[foundationdb] +enabled = true +cluster_file = "/etc/foundationdb/fdb.cluster" +timeout = "60s" # Longer timeouts for debugging +max_retry_delay = "10s" +directory_prefix = "debug_seaweedfs" +``` + +### Test Configuration + +```toml +[foundationdb] +enabled = true +cluster_file = "/tmp/fdb.cluster" # Test cluster +timeout = "5s" +directory_prefix = "test_seaweedfs" +``` + +## Configuration Best Practices + +### 1. Environment Separation + +Use different directory prefixes for different environments: +- Production: `prod_seaweedfs` +- Staging: `staging_seaweedfs` +- Development: `dev_seaweedfs` + +### 2. Timeout Settings + +- Interactive: 5-10 seconds +- Batch: 30-60 seconds +- Archive: 120-300 seconds + +### 3. Cluster File Management + +- Use absolute paths for cluster files +- Ensure proper file permissions +- Keep backup copies of cluster files + +### 4. Directory Naming + +- Use descriptive prefixes +- Include environment/application identifiers +- Keep prefixes reasonably short for efficiency + +### 5. Error Handling + +- Configure appropriate timeouts +- Monitor retry patterns +- Set up alerting for configuration errors + +## Configuration Testing + +### Validation Script + +```bash +#!/bin/bash +# Test FoundationDB configuration + +# Check cluster file +if [ ! -f "$WEED_FOUNDATIONDB_CLUSTER_FILE" ]; then + echo "ERROR: Cluster file not found: $WEED_FOUNDATIONDB_CLUSTER_FILE" + exit 1 +fi + +# Test connection +fdbcli -C "$WEED_FOUNDATIONDB_CLUSTER_FILE" --exec 'status' > /dev/null +if [ $? -ne 0 ]; then + echo "ERROR: Cannot connect to FoundationDB cluster" + exit 1 +fi + +echo "Configuration validation passed" +``` + +### Integration Testing + +```bash +# Test configuration with SeaweedFS +cd test/foundationdb +make check-env +make test-unit +``` diff --git a/weed/filer/foundationdb/INSTALL.md b/weed/filer/foundationdb/INSTALL.md new file mode 100644 index 000000000..7b3b128fa --- /dev/null +++ b/weed/filer/foundationdb/INSTALL.md @@ -0,0 +1,435 @@ +# FoundationDB Filer Store Installation Guide + +This guide covers the installation and setup of the FoundationDB filer store for SeaweedFS. + +## Prerequisites + +### FoundationDB Server + +1. **Install FoundationDB Server** + + **Ubuntu/Debian:** + ```bash + # Add FoundationDB repository + curl -L https://github.com/apple/foundationdb/releases/download/7.4.5/foundationdb-clients_7.4.5-1_amd64.deb -o foundationdb-clients.deb + curl -L https://github.com/apple/foundationdb/releases/download/7.4.5/foundationdb-server_7.4.5-1_amd64.deb -o foundationdb-server.deb + + sudo dpkg -i foundationdb-clients.deb foundationdb-server.deb + ``` + + **CentOS/RHEL:** + ```bash + # Install RPM packages + wget https://github.com/apple/foundationdb/releases/download/7.4.5/foundationdb-clients-7.4.5-1.el7.x86_64.rpm + wget https://github.com/apple/foundationdb/releases/download/7.4.5/foundationdb-server-7.4.5-1.el7.x86_64.rpm + + sudo rpm -Uvh foundationdb-clients-7.4.5-1.el7.x86_64.rpm foundationdb-server-7.4.5-1.el7.x86_64.rpm + ``` + + **macOS:** + ```bash + # Using Homebrew (if available) + brew install foundationdb + + # Or download from GitHub releases + # https://github.com/apple/foundationdb/releases + ``` + +2. **Initialize FoundationDB Cluster** + + **Single Node (Development):** + ```bash + # Start FoundationDB service + sudo systemctl start foundationdb + sudo systemctl enable foundationdb + + # Initialize database + fdbcli --exec 'configure new single ssd' + ``` + + **Multi-Node Cluster (Production):** + ```bash + # On each node, edit /etc/foundationdb/fdb.cluster + # Example: testing:testing@node1:4500,node2:4500,node3:4500 + + # On one node, initialize cluster + fdbcli --exec 'configure new double ssd' + ``` + +3. **Verify Installation** + ```bash + fdbcli --exec 'status' + ``` + +### FoundationDB Client Libraries + +The SeaweedFS FoundationDB integration requires the FoundationDB client libraries. + +**Ubuntu/Debian:** +```bash +sudo apt-get install libfdb-dev +``` + +**CentOS/RHEL:** +```bash +sudo yum install foundationdb-devel +``` + +**macOS:** +```bash +# Client libraries are included with the server installation +export LIBRARY_PATH=/usr/local/lib +export CPATH=/usr/local/include +``` + +## Building SeaweedFS with FoundationDB Support + +### Download FoundationDB Go Bindings + +```bash +go mod init seaweedfs-foundationdb +go get github.com/apple/foundationdb/bindings/go/src/fdb +``` + +### Build SeaweedFS + +```bash +# Clone SeaweedFS repository +git clone https://github.com/seaweedfs/seaweedfs.git +cd seaweedfs + +# Build with FoundationDB support +go build -tags foundationdb -o weed +``` + +### Verify Build + +```bash +./weed version +# Should show version information + +./weed help +# Should list available commands +``` + +## Configuration + +### Basic Configuration + +Create or edit `filer.toml`: + +```toml +[foundationdb] +enabled = true +cluster_file = "/etc/foundationdb/fdb.cluster" +api_version = 740 +timeout = "5s" +max_retry_delay = "1s" +directory_prefix = "seaweedfs" +``` + +### Environment Variables + +Alternative configuration via environment variables: + +```bash +export WEED_FOUNDATIONDB_ENABLED=true +export WEED_FOUNDATIONDB_CLUSTER_FILE=/etc/foundationdb/fdb.cluster +export WEED_FOUNDATIONDB_API_VERSION=740 +export WEED_FOUNDATIONDB_TIMEOUT=5s +export WEED_FOUNDATIONDB_MAX_RETRY_DELAY=1s +export WEED_FOUNDATIONDB_DIRECTORY_PREFIX=seaweedfs +``` + +### Advanced Configuration + +For production deployments: + +```toml +[foundationdb] +enabled = true +cluster_file = "/etc/foundationdb/fdb.cluster" +api_version = 740 +timeout = "30s" +max_retry_delay = "5s" +directory_prefix = "seaweedfs_prod" + +# Path-specific configuration for backups +[foundationdb.backup] +enabled = true +cluster_file = "/etc/foundationdb/fdb.cluster" +directory_prefix = "seaweedfs_backup" +location = "/backup" +timeout = "60s" +``` + +## Deployment + +### Single Node Deployment + +```bash +# Start SeaweedFS with FoundationDB filer +./weed server -filer \ + -master.port=9333 \ + -volume.port=8080 \ + -filer.port=8888 \ + -s3.port=8333 +``` + +### Distributed Deployment + +**Master Servers:** +```bash +# Node 1 +./weed master -port=9333 -peers=master1:9333,master2:9333,master3:9333 + +# Node 2 +./weed master -port=9333 -peers=master1:9333,master2:9333,master3:9333 -ip=master2 + +# Node 3 +./weed master -port=9333 -peers=master1:9333,master2:9333,master3:9333 -ip=master3 +``` + +**Filer Servers with FoundationDB:** +```bash +# Filer nodes +./weed filer -master=master1:9333,master2:9333,master3:9333 -port=8888 +``` + +**Volume Servers:** +```bash +./weed volume -master=master1:9333,master2:9333,master3:9333 -port=8080 +``` + +### Docker Deployment + +**docker-compose.yml:** +```yaml +version: '3.9' +services: + foundationdb: + image: foundationdb/foundationdb:7.4.5 + ports: + - "4500:4500" + volumes: + - fdb_data:/var/fdb/data + - fdb_config:/var/fdb/config + + seaweedfs: + image: chrislusf/seaweedfs:latest + command: "server -filer -ip=seaweedfs" + ports: + - "9333:9333" + - "8888:8888" + - "8333:8333" + environment: + WEED_FOUNDATIONDB_ENABLED: "true" + WEED_FOUNDATIONDB_CLUSTER_FILE: "/var/fdb/config/fdb.cluster" + volumes: + - fdb_config:/var/fdb/config + depends_on: + - foundationdb + +volumes: + fdb_data: + fdb_config: +``` + +### Kubernetes Deployment + +**FoundationDB Operator:** +```bash +# Install FoundationDB operator +kubectl apply -f https://raw.githubusercontent.com/FoundationDB/fdb-kubernetes-operator/main/config/samples/deployment.yaml +``` + +**SeaweedFS with FoundationDB:** +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: seaweedfs-filer +spec: + replicas: 3 + selector: + matchLabels: + app: seaweedfs-filer + template: + metadata: + labels: + app: seaweedfs-filer + spec: + containers: + - name: seaweedfs + image: chrislusf/seaweedfs:latest + command: ["weed", "filer"] + env: + - name: WEED_FOUNDATIONDB_ENABLED + value: "true" + - name: WEED_FOUNDATIONDB_CLUSTER_FILE + value: "/var/fdb/config/cluster_file" + ports: + - containerPort: 8888 + volumeMounts: + - name: fdb-config + mountPath: /var/fdb/config + volumes: + - name: fdb-config + configMap: + name: fdb-cluster-config +``` + +## Testing Installation + +### Quick Test + +```bash +# Start SeaweedFS with FoundationDB +./weed server -filer & + +# Test file operations +echo "Hello FoundationDB" > test.txt +curl -F file=@test.txt "http://localhost:8888/test/" +curl "http://localhost:8888/test/test.txt" + +# Test S3 API +curl -X PUT "http://localhost:8333/testbucket" +curl -T test.txt "http://localhost:8333/testbucket/test.txt" +``` + +### Integration Test Suite + +```bash +# Run the provided test suite +cd test/foundationdb +make setup +make test +``` + +## Performance Tuning + +### FoundationDB Tuning + +```bash +# Configure for high performance +fdbcli --exec 'configure triple ssd' +fdbcli --exec 'configure storage_engine=ssd-redwood-1-experimental' +``` + +### SeaweedFS Configuration + +```toml +[foundationdb] +enabled = true +cluster_file = "/etc/foundationdb/fdb.cluster" +timeout = "10s" # Longer timeout for large operations +max_retry_delay = "2s" # Adjust retry behavior +directory_prefix = "sw" # Shorter prefix for efficiency +``` + +### OS-Level Tuning + +```bash +# Increase file descriptor limits +echo "* soft nofile 65536" >> /etc/security/limits.conf +echo "* hard nofile 65536" >> /etc/security/limits.conf + +# Adjust network parameters +echo "net.core.rmem_max = 134217728" >> /etc/sysctl.conf +echo "net.core.wmem_max = 134217728" >> /etc/sysctl.conf +sysctl -p +``` + +## Monitoring and Maintenance + +### Health Checks + +```bash +# FoundationDB cluster health +fdbcli --exec 'status' +fdbcli --exec 'status details' + +# SeaweedFS health +curl http://localhost:9333/cluster/status +curl http://localhost:8888/statistics/health +``` + +### Log Monitoring + +**FoundationDB Logs:** +- `/var/log/foundationdb/` (default location) +- Monitor for errors, warnings, and performance issues + +**SeaweedFS Logs:** +```bash +# Start with verbose logging +./weed -v=2 server -filer +``` + +### Backup and Recovery + +**FoundationDB Backup:** +```bash +# Start backup +fdbbackup start -d file:///path/to/backup -t backup_tag + +# Monitor backup +fdbbackup status -t backup_tag + +# Restore from backup +fdbrestore start -r file:///path/to/backup -t backup_tag --wait +``` + +**SeaweedFS Metadata Backup:** +```bash +# Export filer metadata +./weed shell +> fs.meta.save /path/to/metadata/backup.gz +``` + +## Troubleshooting + +### Common Issues + +1. **Connection Refused** + - Check FoundationDB service status: `sudo systemctl status foundationdb` + - Verify cluster file: `cat /etc/foundationdb/fdb.cluster` + - Check network connectivity: `telnet localhost 4500` + +2. **API Version Mismatch** + - Update API version in configuration + - Rebuild SeaweedFS with matching FDB client library + +3. **Transaction Conflicts** + - Reduce transaction scope + - Implement appropriate retry logic + - Check for concurrent access patterns + +4. **Performance Issues** + - Monitor cluster status: `fdbcli --exec 'status details'` + - Check data distribution: `fdbcli --exec 'status json'` + - Verify storage configuration + +### Debug Mode + +```bash +# Enable FoundationDB client tracing +export FDB_TRACE_ENABLE=1 +export FDB_TRACE_PATH=/tmp/fdb_trace + +# Start SeaweedFS with debug logging +./weed -v=3 server -filer +``` + +### Getting Help + +1. **FoundationDB Documentation**: https://apple.github.io/foundationdb/ +2. **SeaweedFS Community**: https://github.com/seaweedfs/seaweedfs/discussions +3. **Issue Reporting**: https://github.com/seaweedfs/seaweedfs/issues + +For specific FoundationDB filer store issues, include: +- FoundationDB version and cluster configuration +- SeaweedFS version and build tags +- Configuration files (filer.toml) +- Error messages and logs +- Steps to reproduce the issue diff --git a/weed/filer/foundationdb/README.md b/weed/filer/foundationdb/README.md new file mode 100644 index 000000000..68ba6416a --- /dev/null +++ b/weed/filer/foundationdb/README.md @@ -0,0 +1,221 @@ +# FoundationDB Filer Store + +This package provides a FoundationDB-based filer store for SeaweedFS, offering ACID transactions and horizontal scalability. + +## Features + +- **ACID Transactions**: Strong consistency guarantees with full ACID properties +- **Horizontal Scalability**: Automatic data distribution across multiple nodes +- **High Availability**: Built-in fault tolerance and automatic failover +- **Efficient Directory Operations**: Optimized for large directory listings +- **Key-Value Support**: Full KV operations for metadata storage +- **Compression**: Automatic compression for large entry chunks + +## Installation + +### Prerequisites + +1. **FoundationDB Server**: Install and configure a FoundationDB cluster +2. **FoundationDB Client Libraries**: Install libfdb_c client libraries +3. **Go Build Tags**: Use the `foundationdb` build tag when compiling + +### Building SeaweedFS with FoundationDB Support + +```bash +go build -tags foundationdb -o weed +``` + +## Configuration + +### Basic Configuration + +Add the following to your `filer.toml`: + +```toml +[foundationdb] +enabled = true +cluster_file = "/etc/foundationdb/fdb.cluster" +api_version = 740 +timeout = "5s" +max_retry_delay = "1s" +directory_prefix = "seaweedfs" +``` + +### Configuration Options + +| Option | Description | Default | Required | +|--------|-------------|---------|----------| +| `enabled` | Enable FoundationDB filer store | `false` | Yes | +| `cluster_file` | Path to FDB cluster file | `/etc/foundationdb/fdb.cluster` | Yes | +| `api_version` | FoundationDB API version | `740` | No | +| `timeout` | Operation timeout duration | `5s` | No | +| `max_retry_delay` | Maximum retry delay | `1s` | No | +| `directory_prefix` | Directory prefix for organization | `seaweedfs` | No | + +### Path-Specific Configuration + +For path-specific filer stores: + +```toml +[foundationdb.backup] +enabled = true +cluster_file = "/etc/foundationdb/fdb.cluster" +directory_prefix = "seaweedfs_backup" +location = "/backup" +``` + +## Environment Variables + +Configure via environment variables: + +```bash +export WEED_FOUNDATIONDB_ENABLED=true +export WEED_FOUNDATIONDB_CLUSTER_FILE=/etc/foundationdb/fdb.cluster +export WEED_FOUNDATIONDB_API_VERSION=740 +export WEED_FOUNDATIONDB_TIMEOUT=5s +export WEED_FOUNDATIONDB_MAX_RETRY_DELAY=1s +export WEED_FOUNDATIONDB_DIRECTORY_PREFIX=seaweedfs +``` + +## FoundationDB Cluster Setup + +### Single Node (Development) + +```bash +# Start FoundationDB server +foundationdb start + +# Initialize database +fdbcli --exec 'configure new single ssd' +``` + +### Multi-Node Cluster (Production) + +1. **Install FoundationDB** on all nodes +2. **Configure cluster file** (`/etc/foundationdb/fdb.cluster`) +3. **Initialize cluster**: + ```bash + fdbcli --exec 'configure new double ssd' + ``` + +### Docker Setup + +Use the provided docker-compose.yml in `test/foundationdb/`: + +```bash +cd test/foundationdb +make setup +``` + +## Performance Considerations + +### Optimal Configuration + +- **API Version**: Use the latest stable API version (720+) +- **Directory Structure**: Use logical directory prefixes to isolate different SeaweedFS instances +- **Transaction Size**: Keep transactions under 10MB (FDB limit) +- **Batch Operations**: Use transactions for multiple related operations + +### Monitoring + +Monitor FoundationDB cluster status: + +```bash +fdbcli --exec 'status' +fdbcli --exec 'status details' +``` + +### Scaling + +FoundationDB automatically handles: +- Data distribution across nodes +- Load balancing +- Automatic failover +- Storage node addition/removal + +## Testing + +### Unit Tests + +```bash +cd weed/filer/foundationdb +go test -tags foundationdb -v +``` + +### Integration Tests + +```bash +cd test/foundationdb +make test +``` + +### End-to-End Tests + +```bash +cd test/foundationdb +make test-e2e +``` + +## Troubleshooting + +### Common Issues + +1. **Connection Failures**: + - Verify cluster file path + - Check FoundationDB server status + - Validate network connectivity + +2. **Transaction Conflicts**: + - Reduce transaction scope + - Implement retry logic + - Check for concurrent operations + +3. **Performance Issues**: + - Monitor cluster health + - Check data distribution + - Optimize directory structure + +### Debug Information + +Enable verbose logging: + +```bash +weed -v=2 server -filer +``` + +Check FoundationDB status: + +```bash +fdbcli --exec 'status details' +``` + +## Security + +### Network Security + +- Configure TLS for FoundationDB connections +- Use firewall rules to restrict access +- Monitor connection attempts + +### Data Encryption + +- Enable encryption at rest in FoundationDB +- Use encrypted connections +- Implement proper key management + +## Limitations + +- Maximum transaction size: 10MB +- Single transaction timeout: configurable (default 5s) +- API version compatibility required +- Requires FoundationDB cluster setup + +## Support + +For issues specific to the FoundationDB filer store: +1. Check FoundationDB cluster status +2. Verify configuration settings +3. Review SeaweedFS logs with verbose output +4. Test with minimal reproduction case + +For FoundationDB-specific issues, consult the [FoundationDB documentation](https://apple.github.io/foundationdb/). diff --git a/weed/filer/foundationdb/doc.go b/weed/filer/foundationdb/doc.go new file mode 100644 index 000000000..3b3a20bc4 --- /dev/null +++ b/weed/filer/foundationdb/doc.go @@ -0,0 +1,13 @@ +/* +Package foundationdb provides a FoundationDB-based filer store for SeaweedFS. + +FoundationDB is a distributed ACID database with strong consistency guarantees +and excellent scalability characteristics. This filer store leverages FDB's +directory layer for organizing file metadata and its key-value interface for +efficient storage and retrieval. + +The referenced "github.com/apple/foundationdb/bindings/go/src/fdb" library +requires FoundationDB client libraries to be installed. +So this is only compiled with "go build -tags foundationdb". +*/ +package foundationdb diff --git a/weed/filer/foundationdb/foundationdb_store.go b/weed/filer/foundationdb/foundationdb_store.go new file mode 100644 index 000000000..509ee4b86 --- /dev/null +++ b/weed/filer/foundationdb/foundationdb_store.go @@ -0,0 +1,575 @@ +//go:build foundationdb +// +build foundationdb + +// Package foundationdb provides a filer store implementation using FoundationDB as the backend. +// +// IMPORTANT DESIGN NOTE - DeleteFolderChildren and Transaction Limits: +// +// FoundationDB imposes strict transaction limits: +// - Maximum transaction size: 10MB +// - Maximum transaction duration: 5 seconds +// +// The DeleteFolderChildren operation always uses batched deletion with multiple small transactions +// to safely handle directories of any size. Even if called within an existing transaction context, +// it will create its own batch transactions to avoid exceeding FDB limits. +// +// This means DeleteFolderChildren is NOT atomic with respect to an outer transaction - it manages +// its own transaction boundaries for safety and reliability. + +package foundationdb + +import ( + "bytes" + "context" + "fmt" + "time" + + "github.com/apple/foundationdb/bindings/go/src/fdb" + "github.com/apple/foundationdb/bindings/go/src/fdb/directory" + "github.com/apple/foundationdb/bindings/go/src/fdb/tuple" + + "github.com/seaweedfs/seaweedfs/weed/filer" + "github.com/seaweedfs/seaweedfs/weed/glog" + "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" + "github.com/seaweedfs/seaweedfs/weed/util" +) + +const ( + // FoundationDB transaction size limit is 10MB + FDB_TRANSACTION_SIZE_LIMIT = 10 * 1024 * 1024 + // Maximum number of entries to return in a single directory listing + // Large batches can cause transaction timeouts and increase memory pressure + MAX_DIRECTORY_LIST_LIMIT = 1000 +) + +func init() { + filer.Stores = append(filer.Stores, &FoundationDBStore{}) +} + +type FoundationDBStore struct { + database fdb.Database + seaweedfsDir directory.DirectorySubspace + kvDir directory.DirectorySubspace + directoryPrefix string + timeout time.Duration + maxRetryDelay time.Duration +} + +// Context key type for storing transactions +type contextKey string + +const transactionKey contextKey = "fdb_transaction" + +// Helper functions for context-scoped transactions +func (store *FoundationDBStore) getTransactionFromContext(ctx context.Context) (fdb.Transaction, bool) { + val := ctx.Value(transactionKey) + if val == nil { + var emptyTx fdb.Transaction + return emptyTx, false + } + if tx, ok := val.(fdb.Transaction); ok { + return tx, true + } + var emptyTx fdb.Transaction + return emptyTx, false +} + +func (store *FoundationDBStore) setTransactionInContext(ctx context.Context, tx fdb.Transaction) context.Context { + return context.WithValue(ctx, transactionKey, tx) +} + +func (store *FoundationDBStore) GetName() string { + return "foundationdb" +} + +func (store *FoundationDBStore) Initialize(configuration util.Configuration, prefix string) error { + // Set default configuration values + configuration.SetDefault(prefix+"cluster_file", "/etc/foundationdb/fdb.cluster") + configuration.SetDefault(prefix+"api_version", 740) + configuration.SetDefault(prefix+"timeout", "5s") + configuration.SetDefault(prefix+"max_retry_delay", "1s") + configuration.SetDefault(prefix+"directory_prefix", "seaweedfs") + + clusterFile := configuration.GetString(prefix + "cluster_file") + apiVersion := configuration.GetInt(prefix + "api_version") + timeoutStr := configuration.GetString(prefix + "timeout") + maxRetryDelayStr := configuration.GetString(prefix + "max_retry_delay") + store.directoryPrefix = configuration.GetString(prefix + "directory_prefix") + + // Parse timeout values + var err error + store.timeout, err = time.ParseDuration(timeoutStr) + if err != nil { + return fmt.Errorf("invalid timeout duration %s: %w", timeoutStr, err) + } + + store.maxRetryDelay, err = time.ParseDuration(maxRetryDelayStr) + if err != nil { + return fmt.Errorf("invalid max_retry_delay duration %s: %w", maxRetryDelayStr, err) + } + + return store.initialize(clusterFile, apiVersion) +} + +func (store *FoundationDBStore) initialize(clusterFile string, apiVersion int) error { + glog.V(0).Infof("FoundationDB: connecting to cluster file: %s, API version: %d", clusterFile, apiVersion) + + // Set FDB API version + if err := fdb.APIVersion(apiVersion); err != nil { + return fmt.Errorf("failed to set FoundationDB API version %d: %w", apiVersion, err) + } + + // Open database + var err error + store.database, err = fdb.OpenDatabase(clusterFile) + if err != nil { + return fmt.Errorf("failed to open FoundationDB database: %w", err) + } + + // Create/open seaweedfs directory + store.seaweedfsDir, err = directory.CreateOrOpen(store.database, []string{store.directoryPrefix}, nil) + if err != nil { + return fmt.Errorf("failed to create/open seaweedfs directory: %w", err) + } + + // Create/open kv subdirectory for key-value operations + store.kvDir, err = directory.CreateOrOpen(store.database, []string{store.directoryPrefix, "kv"}, nil) + if err != nil { + return fmt.Errorf("failed to create/open kv directory: %w", err) + } + + glog.V(0).Infof("FoundationDB store initialized successfully with directory prefix: %s", store.directoryPrefix) + return nil +} + +func (store *FoundationDBStore) BeginTransaction(ctx context.Context) (context.Context, error) { + // Check if there's already a transaction in this context + if _, exists := store.getTransactionFromContext(ctx); exists { + return ctx, fmt.Errorf("transaction already in progress for this context") + } + + // Create a new transaction + tx, err := store.database.CreateTransaction() + if err != nil { + return ctx, fmt.Errorf("failed to create transaction: %w", err) + } + + // Store the transaction in context and return the new context + newCtx := store.setTransactionInContext(ctx, tx) + return newCtx, nil +} + +func (store *FoundationDBStore) CommitTransaction(ctx context.Context) error { + // Get transaction from context + tx, exists := store.getTransactionFromContext(ctx) + if !exists { + return fmt.Errorf("no transaction in progress for this context") + } + + // Commit the transaction + err := tx.Commit().Get() + if err != nil { + return fmt.Errorf("failed to commit transaction: %w", err) + } + + return nil +} + +func (store *FoundationDBStore) RollbackTransaction(ctx context.Context) error { + // Get transaction from context + tx, exists := store.getTransactionFromContext(ctx) + if !exists { + return fmt.Errorf("no transaction in progress for this context") + } + + // Cancel the transaction + tx.Cancel() + return nil +} + +func (store *FoundationDBStore) InsertEntry(ctx context.Context, entry *filer.Entry) error { + return store.UpdateEntry(ctx, entry) +} + +func (store *FoundationDBStore) UpdateEntry(ctx context.Context, entry *filer.Entry) error { + key := store.genKey(entry.DirAndName()) + + value, err := entry.EncodeAttributesAndChunks() + if err != nil { + return fmt.Errorf("encoding %s %+v: %w", entry.FullPath, entry.Attr, err) + } + + if len(entry.GetChunks()) > filer.CountEntryChunksForGzip { + value = util.MaybeGzipData(value) + } + + // Check transaction size limit + if len(value) > FDB_TRANSACTION_SIZE_LIMIT { + return fmt.Errorf("entry %s exceeds FoundationDB transaction size limit (%d > %d bytes)", + entry.FullPath, len(value), FDB_TRANSACTION_SIZE_LIMIT) + } + + // Check if there's a transaction in context + if tx, exists := store.getTransactionFromContext(ctx); exists { + tx.Set(key, value) + return nil + } + + // Execute in a new transaction if not in an existing one + _, err = store.database.Transact(func(tr fdb.Transaction) (interface{}, error) { + tr.Set(key, value) + return nil, nil + }) + + if err != nil { + return fmt.Errorf("persisting %s: %w", entry.FullPath, err) + } + + return nil +} + +func (store *FoundationDBStore) FindEntry(ctx context.Context, fullpath util.FullPath) (entry *filer.Entry, err error) { + key := store.genKey(util.FullPath(fullpath).DirAndName()) + + var data []byte + // Check if there's a transaction in context + if tx, exists := store.getTransactionFromContext(ctx); exists { + data, err = tx.Get(key).Get() + } else { + var result interface{} + result, err = store.database.ReadTransact(func(rtr fdb.ReadTransaction) (interface{}, error) { + return rtr.Get(key).Get() + }) + if err == nil { + if resultBytes, ok := result.([]byte); ok { + data = resultBytes + } + } + } + + if err != nil { + return nil, fmt.Errorf("find entry %s: %w", fullpath, err) + } + + if data == nil { + return nil, filer_pb.ErrNotFound + } + + entry = &filer.Entry{ + FullPath: fullpath, + } + + err = entry.DecodeAttributesAndChunks(util.MaybeDecompressData(data)) + if err != nil { + return entry, fmt.Errorf("decode %s : %w", entry.FullPath, err) + } + + return entry, nil +} + +func (store *FoundationDBStore) DeleteEntry(ctx context.Context, fullpath util.FullPath) error { + key := store.genKey(util.FullPath(fullpath).DirAndName()) + + // Check if there's a transaction in context + if tx, exists := store.getTransactionFromContext(ctx); exists { + tx.Clear(key) + return nil + } + + // Execute in a new transaction if not in an existing one + _, err := store.database.Transact(func(tr fdb.Transaction) (interface{}, error) { + tr.Clear(key) + return nil, nil + }) + + if err != nil { + return fmt.Errorf("deleting %s: %w", fullpath, err) + } + + return nil +} + +func (store *FoundationDBStore) DeleteFolderChildren(ctx context.Context, fullpath util.FullPath) error { + // Recursively delete all entries in this directory and its subdirectories + // We need recursion because our key structure is tuple{dirPath, fileName} + // not tuple{dirPath, ...pathComponents}, so a simple prefix range won't catch subdirectories + + // ALWAYS use batched deletion to safely handle directories of any size. + // This avoids FoundationDB's 10MB transaction size and 5s timeout limits. + // + // Note: Even if called within an existing transaction, we create our own batch transactions. + // This means DeleteFolderChildren is NOT atomic with an outer transaction, but it ensures + // reliability and prevents transaction limit violations. + return store.deleteFolderChildrenInBatches(ctx, fullpath) +} + +// deleteFolderChildrenInBatches deletes directory contents in multiple transactions +// to avoid hitting FoundationDB's transaction size (10MB) and time (5s) limits +func (store *FoundationDBStore) deleteFolderChildrenInBatches(ctx context.Context, fullpath util.FullPath) error { + const BATCH_SIZE = 100 // Delete up to 100 entries per transaction + + // Ensure listing and recursion run outside of any ambient transaction + // Store a sentinel nil value so getTransactionFromContext returns false + ctxNoTxn := context.WithValue(ctx, transactionKey, (*struct{})(nil)) + + for { + // Collect one batch of entries + var entriesToDelete []util.FullPath + var subDirectories []util.FullPath + + // List entries - we'll process BATCH_SIZE at a time + _, err := store.ListDirectoryEntries(ctxNoTxn, fullpath, "", true, int64(BATCH_SIZE), func(entry *filer.Entry) bool { + entriesToDelete = append(entriesToDelete, entry.FullPath) + if entry.IsDirectory() { + subDirectories = append(subDirectories, entry.FullPath) + } + return true + }) + + if err != nil { + return fmt.Errorf("listing children of %s: %w", fullpath, err) + } + + // If no entries found, we're done + if len(entriesToDelete) == 0 { + break + } + + // Recursively delete subdirectories first (also in batches) + for _, subDir := range subDirectories { + if err := store.deleteFolderChildrenInBatches(ctxNoTxn, subDir); err != nil { + return err + } + } + + // Delete this batch of entries in a single transaction + _, err = store.database.Transact(func(tr fdb.Transaction) (interface{}, error) { + txCtx := store.setTransactionInContext(context.Background(), tr) + for _, entryPath := range entriesToDelete { + if delErr := store.DeleteEntry(txCtx, entryPath); delErr != nil { + return nil, fmt.Errorf("deleting entry %s: %w", entryPath, delErr) + } + } + return nil, nil + }) + + if err != nil { + return err + } + + // If we got fewer entries than BATCH_SIZE, we're done with this directory + if len(entriesToDelete) < BATCH_SIZE { + break + } + } + + return nil +} + +func (store *FoundationDBStore) ListDirectoryEntries(ctx context.Context, dirPath util.FullPath, startFileName string, includeStartFile bool, limit int64, eachEntryFunc filer.ListEachEntryFunc) (lastFileName string, err error) { + return store.ListDirectoryPrefixedEntries(ctx, dirPath, startFileName, includeStartFile, limit, "", eachEntryFunc) +} + +func (store *FoundationDBStore) ListDirectoryPrefixedEntries(ctx context.Context, dirPath util.FullPath, startFileName string, includeStartFile bool, limit int64, prefix string, eachEntryFunc filer.ListEachEntryFunc) (lastFileName string, err error) { + // Cap limit for optimal FoundationDB performance + // Large batches can cause transaction timeouts and increase memory pressure + if limit > MAX_DIRECTORY_LIST_LIMIT || limit <= 0 { + limit = MAX_DIRECTORY_LIST_LIMIT + } + + // Get the range for the entire directory first + dirTuple := tuple.Tuple{string(dirPath)} + dirRange, err := fdb.PrefixRange(store.seaweedfsDir.Pack(dirTuple)) + if err != nil { + return "", fmt.Errorf("creating prefix range for %s: %w", dirPath, err) + } + + // Determine the key range for the scan + // Use FDB's range capabilities to only fetch keys matching the prefix + var beginKey, endKey fdb.Key + dirBeginConv, dirEndConv := dirRange.FDBRangeKeys() + dirBegin := dirBeginConv.FDBKey() + dirEnd := dirEndConv.FDBKey() + + if prefix != "" { + // Build range by bracketing the filename component + // Start at Pack(dirPath, prefix) and end at Pack(dirPath, nextPrefix) + // where nextPrefix is the next lexicographic string + beginKey = store.seaweedfsDir.Pack(tuple.Tuple{string(dirPath), prefix}) + endKey = dirEnd + + // Use Strinc to get the next string for proper prefix range + if nextPrefix, strincErr := fdb.Strinc([]byte(prefix)); strincErr == nil { + endKey = store.seaweedfsDir.Pack(tuple.Tuple{string(dirPath), string(nextPrefix)}) + } + } else { + // Use entire directory range + beginKey = dirBegin + endKey = dirEnd + } + + // Determine start key and selector based on startFileName + var beginSelector fdb.KeySelector + if startFileName != "" { + // Start from the specified file + startKey := store.seaweedfsDir.Pack(tuple.Tuple{string(dirPath), startFileName}) + if includeStartFile { + beginSelector = fdb.FirstGreaterOrEqual(startKey) + } else { + beginSelector = fdb.FirstGreaterThan(startKey) + } + // Ensure beginSelector is within our desired range + if bytes.Compare(beginSelector.Key.FDBKey(), beginKey.FDBKey()) < 0 { + beginSelector = fdb.FirstGreaterOrEqual(beginKey) + } + } else { + // Start from beginning of the range + beginSelector = fdb.FirstGreaterOrEqual(beginKey) + } + + // End selector is the end of our calculated range + endSelector := fdb.FirstGreaterOrEqual(endKey) + + var kvs []fdb.KeyValue + var rangeErr error + // Check if there's a transaction in context + if tx, exists := store.getTransactionFromContext(ctx); exists { + sr := fdb.SelectorRange{Begin: beginSelector, End: endSelector} + kvs, rangeErr = tx.GetRange(sr, fdb.RangeOptions{Limit: int(limit)}).GetSliceWithError() + if rangeErr != nil { + return "", fmt.Errorf("scanning %s: %w", dirPath, rangeErr) + } + } else { + result, err := store.database.ReadTransact(func(rtr fdb.ReadTransaction) (interface{}, error) { + sr := fdb.SelectorRange{Begin: beginSelector, End: endSelector} + kvSlice, err := rtr.GetRange(sr, fdb.RangeOptions{Limit: int(limit)}).GetSliceWithError() + if err != nil { + return nil, err + } + return kvSlice, nil + }) + if err != nil { + return "", fmt.Errorf("scanning %s: %w", dirPath, err) + } + var ok bool + kvs, ok = result.([]fdb.KeyValue) + if !ok { + return "", fmt.Errorf("unexpected type from ReadTransact: %T, expected []fdb.KeyValue", result) + } + } + + for _, kv := range kvs { + fileName, extractErr := store.extractFileName(kv.Key) + if extractErr != nil { + glog.Warningf("list %s: failed to extract fileName from key %v: %v", dirPath, kv.Key, extractErr) + continue + } + + entry := &filer.Entry{ + FullPath: util.NewFullPath(string(dirPath), fileName), + } + + if decodeErr := entry.DecodeAttributesAndChunks(util.MaybeDecompressData(kv.Value)); decodeErr != nil { + glog.V(0).Infof("list %s : %v", entry.FullPath, decodeErr) + continue + } + + if !eachEntryFunc(entry) { + break + } + lastFileName = fileName + } + + return lastFileName, nil +} + +// KV operations +func (store *FoundationDBStore) KvPut(ctx context.Context, key []byte, value []byte) error { + fdbKey := store.kvDir.Pack(tuple.Tuple{key}) + + // Check if there's a transaction in context + if tx, exists := store.getTransactionFromContext(ctx); exists { + tx.Set(fdbKey, value) + return nil + } + + _, err := store.database.Transact(func(tr fdb.Transaction) (interface{}, error) { + tr.Set(fdbKey, value) + return nil, nil + }) + + return err +} + +func (store *FoundationDBStore) KvGet(ctx context.Context, key []byte) ([]byte, error) { + fdbKey := store.kvDir.Pack(tuple.Tuple{key}) + + var data []byte + var err error + + // Check if there's a transaction in context + if tx, exists := store.getTransactionFromContext(ctx); exists { + data, err = tx.Get(fdbKey).Get() + } else { + var result interface{} + result, err = store.database.ReadTransact(func(rtr fdb.ReadTransaction) (interface{}, error) { + return rtr.Get(fdbKey).Get() + }) + if err == nil { + if resultBytes, ok := result.([]byte); ok { + data = resultBytes + } + } + } + + if err != nil { + return nil, fmt.Errorf("kv get %s: %w", string(key), err) + } + if data == nil { + return nil, filer.ErrKvNotFound + } + + return data, nil +} + +func (store *FoundationDBStore) KvDelete(ctx context.Context, key []byte) error { + fdbKey := store.kvDir.Pack(tuple.Tuple{key}) + + // Check if there's a transaction in context + if tx, exists := store.getTransactionFromContext(ctx); exists { + tx.Clear(fdbKey) + return nil + } + + _, err := store.database.Transact(func(tr fdb.Transaction) (interface{}, error) { + tr.Clear(fdbKey) + return nil, nil + }) + + return err +} + +func (store *FoundationDBStore) Shutdown() { + // FoundationDB doesn't have an explicit close method for Database + glog.V(0).Infof("FoundationDB store shutdown") +} + +// Helper functions +func (store *FoundationDBStore) genKey(dirPath, fileName string) fdb.Key { + return store.seaweedfsDir.Pack(tuple.Tuple{dirPath, fileName}) +} + +func (store *FoundationDBStore) extractFileName(key fdb.Key) (string, error) { + t, err := store.seaweedfsDir.Unpack(key) + if err != nil { + return "", fmt.Errorf("unpack key %v: %w", key, err) + } + if len(t) != 2 { + return "", fmt.Errorf("tuple unexpected length (len=%d, expected 2) for key %v", len(t), key) + } + + if fileName, ok := t[1].(string); ok { + return fileName, nil + } + return "", fmt.Errorf("second element not a string (type=%T) for key %v", t[1], key) +} diff --git a/weed/filer/foundationdb/foundationdb_store_test.go b/weed/filer/foundationdb/foundationdb_store_test.go new file mode 100644 index 000000000..215c98c76 --- /dev/null +++ b/weed/filer/foundationdb/foundationdb_store_test.go @@ -0,0 +1,545 @@ +//go:build foundationdb +// +build foundationdb + +package foundationdb + +import ( + "context" + "errors" + "fmt" + "os" + "strings" + "testing" + "time" + + "github.com/seaweedfs/seaweedfs/weed/filer" + "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" + "github.com/seaweedfs/seaweedfs/weed/util" +) + +func TestFoundationDBStore_Initialize(t *testing.T) { + // Test with default configuration + config := util.GetViper() + config.Set("foundationdb.cluster_file", getTestClusterFile()) + config.Set("foundationdb.api_version", 740) + + store := &FoundationDBStore{} + err := store.Initialize(config, "foundationdb.") + if err != nil { + t.Skip("FoundationDB not available for testing, skipping") + } + + defer store.Shutdown() + + if store.GetName() != "foundationdb" { + t.Errorf("Expected store name 'foundationdb', got '%s'", store.GetName()) + } + + if store.directoryPrefix != "seaweedfs" { + t.Errorf("Expected default directory prefix 'seaweedfs', got '%s'", store.directoryPrefix) + } +} + +func TestFoundationDBStore_InitializeWithCustomConfig(t *testing.T) { + config := util.GetViper() + config.Set("foundationdb.cluster_file", getTestClusterFile()) + config.Set("foundationdb.api_version", 740) + config.Set("foundationdb.timeout", "10s") + config.Set("foundationdb.max_retry_delay", "2s") + config.Set("foundationdb.directory_prefix", "custom_prefix") + + store := &FoundationDBStore{} + err := store.Initialize(config, "foundationdb.") + if err != nil { + t.Skip("FoundationDB not available for testing, skipping") + } + + defer store.Shutdown() + + if store.directoryPrefix != "custom_prefix" { + t.Errorf("Expected custom directory prefix 'custom_prefix', got '%s'", store.directoryPrefix) + } + + if store.timeout != 10*time.Second { + t.Errorf("Expected timeout 10s, got %v", store.timeout) + } + + if store.maxRetryDelay != 2*time.Second { + t.Errorf("Expected max retry delay 2s, got %v", store.maxRetryDelay) + } +} + +func TestFoundationDBStore_InitializeInvalidConfig(t *testing.T) { + tests := []struct { + name string + config map[string]interface{} + errorMsg string + }{ + { + name: "invalid timeout", + config: map[string]interface{}{ + "foundationdb.cluster_file": getTestClusterFile(), + "foundationdb.api_version": 740, + "foundationdb.timeout": "invalid", + "foundationdb.directory_prefix": "test", + }, + errorMsg: "invalid timeout duration", + }, + { + name: "invalid max_retry_delay", + config: map[string]interface{}{ + "foundationdb.cluster_file": getTestClusterFile(), + "foundationdb.api_version": 740, + "foundationdb.timeout": "5s", + "foundationdb.max_retry_delay": "invalid", + "foundationdb.directory_prefix": "test", + }, + errorMsg: "invalid max_retry_delay duration", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + config := util.GetViper() + for key, value := range tt.config { + config.Set(key, value) + } + + store := &FoundationDBStore{} + err := store.Initialize(config, "foundationdb.") + if err == nil { + store.Shutdown() + t.Errorf("Expected initialization to fail, but it succeeded") + } else if !containsString(err.Error(), tt.errorMsg) { + t.Errorf("Expected error message to contain '%s', got '%s'", tt.errorMsg, err.Error()) + } + }) + } +} + +func TestFoundationDBStore_KeyGeneration(t *testing.T) { + store := &FoundationDBStore{} + err := store.initialize(getTestClusterFile(), 740) + if err != nil { + t.Skip("FoundationDB not available for testing, skipping") + } + defer store.Shutdown() + + // Test key generation for different paths + testCases := []struct { + dirPath string + fileName string + desc string + }{ + {"/", "file.txt", "root directory file"}, + {"/dir", "file.txt", "subdirectory file"}, + {"/deep/nested/dir", "file.txt", "deep nested file"}, + {"/dir with spaces", "file with spaces.txt", "paths with spaces"}, + {"/unicode/测试", "文件.txt", "unicode paths"}, + } + + for _, tc := range testCases { + t.Run(tc.desc, func(t *testing.T) { + key := store.genKey(tc.dirPath, tc.fileName) + if len(key) == 0 { + t.Error("Generated key should not be empty") + } + + // Test that we can extract filename back + // Note: This tests internal consistency + if tc.fileName != "" { + extractedName, err := store.extractFileName(key) + if err != nil { + t.Errorf("extractFileName failed: %v", err) + } + if extractedName != tc.fileName { + t.Errorf("Expected extracted filename '%s', got '%s'", tc.fileName, extractedName) + } + } + }) + } +} + +func TestFoundationDBStore_ErrorHandling(t *testing.T) { + store := &FoundationDBStore{} + err := store.initialize(getTestClusterFile(), 740) + if err != nil { + t.Skip("FoundationDB not available for testing, skipping") + } + defer store.Shutdown() + + ctx := context.Background() + + // Test FindEntry with non-existent path + _, err = store.FindEntry(ctx, "/non/existent/file.txt") + if err == nil { + t.Error("Expected error for non-existent file") + } + if !errors.Is(err, filer_pb.ErrNotFound) { + t.Errorf("Expected ErrNotFound, got %v", err) + } + + // Test KvGet with non-existent key + _, err = store.KvGet(ctx, []byte("non_existent_key")) + if err == nil { + t.Error("Expected error for non-existent key") + } + if !errors.Is(err, filer.ErrKvNotFound) { + t.Errorf("Expected ErrKvNotFound, got %v", err) + } + + // Test transaction state errors + err = store.CommitTransaction(ctx) + if err == nil { + t.Error("Expected error when committing without active transaction") + } + + err = store.RollbackTransaction(ctx) + if err == nil { + t.Error("Expected error when rolling back without active transaction") + } +} + +func TestFoundationDBStore_TransactionState(t *testing.T) { + store := &FoundationDBStore{} + err := store.initialize(getTestClusterFile(), 740) + if err != nil { + t.Skip("FoundationDB not available for testing, skipping") + } + defer store.Shutdown() + + ctx := context.Background() + + // Test double transaction begin + txCtx, err := store.BeginTransaction(ctx) + if err != nil { + t.Fatalf("BeginTransaction failed: %v", err) + } + + // Try to begin another transaction on the same context + _, err = store.BeginTransaction(txCtx) + if err == nil { + t.Error("Expected error when beginning transaction while one is active") + } + + // Commit the transaction + err = store.CommitTransaction(txCtx) + if err != nil { + t.Fatalf("CommitTransaction failed: %v", err) + } + + // Now should be able to begin a new transaction + txCtx2, err := store.BeginTransaction(ctx) + if err != nil { + t.Fatalf("BeginTransaction after commit failed: %v", err) + } + + // Rollback this time + err = store.RollbackTransaction(txCtx2) + if err != nil { + t.Fatalf("RollbackTransaction failed: %v", err) + } +} + +// Benchmark tests +func BenchmarkFoundationDBStore_InsertEntry(b *testing.B) { + store := createBenchmarkStore(b) + defer store.Shutdown() + + ctx := context.Background() + entry := &filer.Entry{ + FullPath: "/benchmark/file.txt", + Attr: filer.Attr{ + Mode: 0644, + Uid: 1000, + Gid: 1000, + Mtime: time.Now(), + }, + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + entry.FullPath = util.NewFullPath("/benchmark", fmt.Sprintf("%x", uint64(i))+".txt") + err := store.InsertEntry(ctx, entry) + if err != nil { + b.Fatalf("InsertEntry failed: %v", err) + } + } +} + +func BenchmarkFoundationDBStore_FindEntry(b *testing.B) { + store := createBenchmarkStore(b) + defer store.Shutdown() + + ctx := context.Background() + + // Pre-populate with test entries + numEntries := 1000 + for i := 0; i < numEntries; i++ { + entry := &filer.Entry{ + FullPath: util.NewFullPath("/benchmark", fmt.Sprintf("%x", uint64(i))+".txt"), + Attr: filer.Attr{ + Mode: 0644, + Uid: 1000, + Gid: 1000, + Mtime: time.Now(), + }, + } + err := store.InsertEntry(ctx, entry) + if err != nil { + b.Fatalf("Pre-population InsertEntry failed: %v", err) + } + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + path := util.NewFullPath("/benchmark", fmt.Sprintf("%x", uint64(i%numEntries))+".txt") + _, err := store.FindEntry(ctx, path) + if err != nil { + b.Fatalf("FindEntry failed: %v", err) + } + } +} + +func BenchmarkFoundationDBStore_KvOperations(b *testing.B) { + store := createBenchmarkStore(b) + defer store.Shutdown() + + ctx := context.Background() + key := []byte("benchmark_key") + value := []byte("benchmark_value") + + b.ResetTimer() + for i := 0; i < b.N; i++ { + // Put + err := store.KvPut(ctx, key, value) + if err != nil { + b.Fatalf("KvPut failed: %v", err) + } + + // Get + _, err = store.KvGet(ctx, key) + if err != nil { + b.Fatalf("KvGet failed: %v", err) + } + } +} + +// Helper functions +func getTestClusterFile() string { + clusterFile := os.Getenv("FDB_CLUSTER_FILE") + if clusterFile == "" { + clusterFile = "/var/fdb/config/fdb.cluster" + } + return clusterFile +} + +func createBenchmarkStore(b *testing.B) *FoundationDBStore { + clusterFile := getTestClusterFile() + if _, err := os.Stat(clusterFile); os.IsNotExist(err) { + b.Skip("FoundationDB cluster file not found, skipping benchmark") + } + + store := &FoundationDBStore{} + err := store.initialize(clusterFile, 740) + if err != nil { + b.Skipf("Failed to initialize FoundationDB store: %v", err) + } + + return store +} + +func getTestStore(t *testing.T) *FoundationDBStore { + t.Helper() + + clusterFile := getTestClusterFile() + if _, err := os.Stat(clusterFile); os.IsNotExist(err) { + t.Skip("FoundationDB cluster file not found, skipping test") + } + + store := &FoundationDBStore{} + if err := store.initialize(clusterFile, 740); err != nil { + t.Skipf("Failed to initialize FoundationDB store: %v", err) + } + + return store +} + +func containsString(s, substr string) bool { + return strings.Contains(s, substr) +} + +func TestFoundationDBStore_DeleteFolderChildrenWithBatching(t *testing.T) { + // This test validates that DeleteFolderChildren always uses batching + // to safely handle large directories, regardless of transaction context + + store := getTestStore(t) + defer store.Shutdown() + + ctx := context.Background() + testDir := util.FullPath(fmt.Sprintf("/test_batch_delete_%d", time.Now().UnixNano())) + + // Create a large directory (> 100 entries to trigger batching) + const NUM_ENTRIES = 250 + + t.Logf("Creating %d test entries...", NUM_ENTRIES) + for i := 0; i < NUM_ENTRIES; i++ { + entry := &filer.Entry{ + FullPath: util.NewFullPath(string(testDir), fmt.Sprintf("file_%04d.txt", i)), + Attr: filer.Attr{ + Mode: 0644, + Uid: 1000, + Gid: 1000, + Mtime: time.Now(), + }, + } + if err := store.InsertEntry(ctx, entry); err != nil { + t.Fatalf("Failed to insert test entry %d: %v", i, err) + } + } + + // Test 1: DeleteFolderChildren outside transaction should succeed + t.Run("OutsideTransaction", func(t *testing.T) { + testDir1 := util.FullPath(fmt.Sprintf("/test_batch_1_%d", time.Now().UnixNano())) + + // Create entries + for i := 0; i < NUM_ENTRIES; i++ { + entry := &filer.Entry{ + FullPath: util.NewFullPath(string(testDir1), fmt.Sprintf("file_%04d.txt", i)), + Attr: filer.Attr{ + Mode: 0644, + Uid: 1000, + Gid: 1000, + Mtime: time.Now(), + }, + } + store.InsertEntry(ctx, entry) + } + + // Delete with batching + err := store.DeleteFolderChildren(ctx, testDir1) + if err != nil { + t.Errorf("DeleteFolderChildren outside transaction should succeed, got error: %v", err) + } + + // Verify all entries deleted + var count int + store.ListDirectoryEntries(ctx, testDir1, "", true, 1000, func(entry *filer.Entry) bool { + count++ + return true + }) + if count != 0 { + t.Errorf("Expected all entries to be deleted, found %d", count) + } + }) + + // Test 2: DeleteFolderChildren with transaction context - uses its own batched transactions + t.Run("WithTransactionContext", func(t *testing.T) { + testDir2 := util.FullPath(fmt.Sprintf("/test_batch_2_%d", time.Now().UnixNano())) + + // Create entries + for i := 0; i < NUM_ENTRIES; i++ { + entry := &filer.Entry{ + FullPath: util.NewFullPath(string(testDir2), fmt.Sprintf("file_%04d.txt", i)), + Attr: filer.Attr{ + Mode: 0644, + Uid: 1000, + Gid: 1000, + Mtime: time.Now(), + }, + } + store.InsertEntry(ctx, entry) + } + + // Start a transaction (DeleteFolderChildren will ignore it and use its own batching) + txCtx, err := store.BeginTransaction(ctx) + if err != nil { + t.Fatalf("BeginTransaction failed: %v", err) + } + + // Delete large directory - should succeed with batching + err = store.DeleteFolderChildren(txCtx, testDir2) + if err != nil { + t.Errorf("DeleteFolderChildren should succeed with batching even when transaction context present, got: %v", err) + } + + // Rollback transaction (DeleteFolderChildren used its own transactions, so this doesn't affect deletions) + store.RollbackTransaction(txCtx) + + // Verify entries are still deleted (because DeleteFolderChildren managed its own transactions) + var count int + store.ListDirectoryEntries(ctx, testDir2, "", true, 1000, func(entry *filer.Entry) bool { + count++ + return true + }) + + if count != 0 { + t.Errorf("Expected all entries to be deleted, found %d (DeleteFolderChildren uses its own transactions)", count) + } + }) + + // Test 3: Nested directories with batching + t.Run("NestedDirectories", func(t *testing.T) { + testDir3 := util.FullPath(fmt.Sprintf("/test_batch_3_%d", time.Now().UnixNano())) + + // Create nested structure + for i := 0; i < 50; i++ { + // Files in root + entry := &filer.Entry{ + FullPath: util.NewFullPath(string(testDir3), fmt.Sprintf("file_%02d.txt", i)), + Attr: filer.Attr{ + Mode: 0644, + Uid: 1000, + Gid: 1000, + Mtime: time.Now(), + }, + } + store.InsertEntry(ctx, entry) + + // Subdirectory + subDir := &filer.Entry{ + FullPath: util.NewFullPath(string(testDir3), fmt.Sprintf("dir_%02d", i)), + Attr: filer.Attr{ + Mode: 0755 | os.ModeDir, + Uid: 1000, + Gid: 1000, + Mtime: time.Now(), + }, + } + store.InsertEntry(ctx, subDir) + + // Files in subdirectory + for j := 0; j < 3; j++ { + subEntry := &filer.Entry{ + FullPath: util.NewFullPath(string(testDir3)+"/"+fmt.Sprintf("dir_%02d", i), fmt.Sprintf("subfile_%02d.txt", j)), + Attr: filer.Attr{ + Mode: 0644, + Uid: 1000, + Gid: 1000, + Mtime: time.Now(), + }, + } + store.InsertEntry(ctx, subEntry) + } + } + + // Delete all with batching + err := store.DeleteFolderChildren(ctx, testDir3) + if err != nil { + t.Errorf("DeleteFolderChildren should handle nested directories, got: %v", err) + } + + // Verify all deleted + var count int + store.ListDirectoryEntries(ctx, testDir3, "", true, 1000, func(entry *filer.Entry) bool { + count++ + return true + }) + if count != 0 { + t.Errorf("Expected all nested entries to be deleted, found %d", count) + } + }) + + // Cleanup + store.DeleteFolderChildren(ctx, testDir) +} diff --git a/weed/server/filer_server.go b/weed/server/filer_server.go index f395f6d60..79fb90742 100644 --- a/weed/server/filer_server.go +++ b/weed/server/filer_server.go @@ -28,6 +28,7 @@ import ( _ "github.com/seaweedfs/seaweedfs/weed/filer/cassandra2" _ "github.com/seaweedfs/seaweedfs/weed/filer/elastic/v7" _ "github.com/seaweedfs/seaweedfs/weed/filer/etcd" + _ "github.com/seaweedfs/seaweedfs/weed/filer/foundationdb" _ "github.com/seaweedfs/seaweedfs/weed/filer/hbase" _ "github.com/seaweedfs/seaweedfs/weed/filer/leveldb" _ "github.com/seaweedfs/seaweedfs/weed/filer/leveldb2" |
