aboutsummaryrefslogtreecommitdiff
path: root/weed
diff options
context:
space:
mode:
Diffstat (limited to 'weed')
-rw-r--r--weed/filer/foundationdb/CONFIGURATION.md385
-rw-r--r--weed/filer/foundationdb/INSTALL.md435
-rw-r--r--weed/filer/foundationdb/README.md221
-rw-r--r--weed/filer/foundationdb/doc.go13
-rw-r--r--weed/filer/foundationdb/foundationdb_store.go575
-rw-r--r--weed/filer/foundationdb/foundationdb_store_test.go545
-rw-r--r--weed/server/filer_server.go1
7 files changed, 2175 insertions, 0 deletions
diff --git a/weed/filer/foundationdb/CONFIGURATION.md b/weed/filer/foundationdb/CONFIGURATION.md
new file mode 100644
index 000000000..80f5bd357
--- /dev/null
+++ b/weed/filer/foundationdb/CONFIGURATION.md
@@ -0,0 +1,385 @@
+# FoundationDB Filer Store Configuration Reference
+
+This document provides comprehensive configuration options for the FoundationDB filer store.
+
+## Configuration Methods
+
+### 1. Configuration File (filer.toml)
+
+```toml
+[foundationdb]
+enabled = true
+cluster_file = "/etc/foundationdb/fdb.cluster"
+api_version = 740
+timeout = "5s"
+max_retry_delay = "1s"
+directory_prefix = "seaweedfs"
+```
+
+### 2. Environment Variables
+
+All configuration options can be set via environment variables with the `WEED_FOUNDATIONDB_` prefix:
+
+```bash
+export WEED_FOUNDATIONDB_ENABLED=true
+export WEED_FOUNDATIONDB_CLUSTER_FILE=/etc/foundationdb/fdb.cluster
+export WEED_FOUNDATIONDB_API_VERSION=740
+export WEED_FOUNDATIONDB_TIMEOUT=5s
+export WEED_FOUNDATIONDB_MAX_RETRY_DELAY=1s
+export WEED_FOUNDATIONDB_DIRECTORY_PREFIX=seaweedfs
+```
+
+### 3. Command Line Arguments
+
+While not directly supported, configuration can be specified via config files passed to the `weed` command.
+
+## Configuration Options
+
+### Basic Options
+
+| Option | Type | Default | Description |
+|--------|------|---------|-------------|
+| `enabled` | boolean | `false` | Enable the FoundationDB filer store |
+| `cluster_file` | string | `/etc/foundationdb/fdb.cluster` | Path to FoundationDB cluster file |
+| `api_version` | integer | `740` | FoundationDB API version to use |
+
+### Connection Options
+
+| Option | Type | Default | Description |
+|--------|------|---------|-------------|
+| `timeout` | duration | `5s` | Transaction timeout duration |
+| `max_retry_delay` | duration | `1s` | Maximum delay between retries |
+
+### Storage Options
+
+| Option | Type | Default | Description |
+|--------|------|---------|-------------|
+| `directory_prefix` | string | `seaweedfs` | Directory prefix for key organization |
+
+## Configuration Examples
+
+### Development Environment
+
+```toml
+[foundationdb]
+enabled = true
+cluster_file = "/var/fdb/config/fdb.cluster"
+api_version = 740
+timeout = "10s"
+max_retry_delay = "2s"
+directory_prefix = "seaweedfs_dev"
+```
+
+### Production Environment
+
+```toml
+[foundationdb]
+enabled = true
+cluster_file = "/etc/foundationdb/fdb.cluster"
+api_version = 740
+timeout = "30s"
+max_retry_delay = "5s"
+directory_prefix = "seaweedfs_prod"
+```
+
+### High-Performance Setup
+
+```toml
+[foundationdb]
+enabled = true
+cluster_file = "/etc/foundationdb/fdb.cluster"
+api_version = 740
+timeout = "60s"
+max_retry_delay = "10s"
+directory_prefix = "sw" # Shorter prefix for efficiency
+```
+
+### Path-Specific Configuration
+
+Configure different FoundationDB settings for different paths:
+
+```toml
+# Default configuration
+[foundationdb]
+enabled = true
+cluster_file = "/etc/foundationdb/fdb.cluster"
+directory_prefix = "seaweedfs_main"
+
+# Backup path with different prefix
+[foundationdb.backup]
+enabled = true
+cluster_file = "/etc/foundationdb/fdb.cluster"
+directory_prefix = "seaweedfs_backup"
+location = "/backup"
+timeout = "120s"
+
+# Archive path with extended timeouts
+[foundationdb.archive]
+enabled = true
+cluster_file = "/etc/foundationdb/fdb.cluster"
+directory_prefix = "seaweedfs_archive"
+location = "/archive"
+timeout = "300s"
+max_retry_delay = "30s"
+```
+
+## Configuration Validation
+
+### Required Settings
+
+The following settings are required for FoundationDB to function:
+
+1. `enabled = true`
+2. `cluster_file` must point to a valid FoundationDB cluster file
+3. `api_version` must match your FoundationDB installation
+
+### Validation Rules
+
+- `api_version` must be between 600 and 740
+- `timeout` must be a valid duration string (e.g., "5s", "30s", "2m")
+- `max_retry_delay` must be a valid duration string
+- `cluster_file` must exist and be readable
+- `directory_prefix` must not be empty
+
+### Error Handling
+
+Invalid configurations will result in startup errors:
+
+```
+FATAL: Failed to initialize store for foundationdb: invalid timeout duration
+FATAL: Failed to initialize store for foundationdb: failed to open FoundationDB database
+FATAL: Failed to initialize store for foundationdb: cluster file not found
+```
+
+## Performance Tuning
+
+### Timeout Configuration
+
+| Use Case | Timeout | Max Retry Delay | Notes |
+|----------|---------|-----------------|-------|
+| Interactive workloads | 5s | 1s | Fast response times |
+| Batch processing | 60s | 10s | Handle large operations |
+| Archive operations | 300s | 30s | Very large data sets |
+
+### Connection Pool Settings
+
+FoundationDB automatically manages connection pooling. No additional configuration needed.
+
+### Directory Organization
+
+Use meaningful directory prefixes to organize data:
+
+```toml
+# Separate environments
+directory_prefix = "prod_seaweedfs" # Production
+directory_prefix = "staging_seaweedfs" # Staging
+directory_prefix = "dev_seaweedfs" # Development
+
+# Separate applications
+directory_prefix = "app1_seaweedfs" # Application 1
+directory_prefix = "app2_seaweedfs" # Application 2
+```
+
+## Security Configuration
+
+### Cluster File Security
+
+Protect the FoundationDB cluster file:
+
+```bash
+# Set proper permissions
+sudo chown root:seaweedfs /etc/foundationdb/fdb.cluster
+sudo chmod 640 /etc/foundationdb/fdb.cluster
+```
+
+### Network Security
+
+FoundationDB supports TLS encryption. Configure in the cluster file:
+
+```
+description:cluster_id@tls(server1:4500,server2:4500,server3:4500)
+```
+
+### Access Control
+
+Use FoundationDB's built-in access control mechanisms when available.
+
+## Monitoring Configuration
+
+### Health Check Settings
+
+Configure health check timeouts appropriately:
+
+```toml
+[foundationdb]
+enabled = true
+timeout = "10s" # Reasonable timeout for health checks
+```
+
+### Logging Configuration
+
+Enable verbose logging for troubleshooting:
+
+```bash
+# Start SeaweedFS with debug logs
+WEED_FOUNDATIONDB_ENABLED=true weed -v=2 server -filer
+```
+
+## Migration Configuration
+
+### From Other Filer Stores
+
+When migrating from other filer stores:
+
+1. Configure both stores temporarily
+2. Use path-specific configuration for gradual migration
+3. Migrate data using SeaweedFS tools
+
+```toml
+# During migration - keep old store for reads
+[leveldb2]
+enabled = true
+dir = "/old/filer/data"
+
+# New writes go to FoundationDB
+[foundationdb.migration]
+enabled = true
+location = "/new"
+cluster_file = "/etc/foundationdb/fdb.cluster"
+```
+
+## Backup Configuration
+
+### Metadata Backup Strategy
+
+```toml
+# Main storage
+[foundationdb]
+enabled = true
+directory_prefix = "seaweedfs_main"
+
+# Backup storage (different cluster recommended)
+[foundationdb.backup]
+enabled = true
+cluster_file = "/etc/foundationdb/backup_fdb.cluster"
+directory_prefix = "seaweedfs_backup"
+location = "/backup"
+```
+
+## Container Configuration
+
+### Docker Environment Variables
+
+```bash
+# Docker environment
+WEED_FOUNDATIONDB_ENABLED=true
+WEED_FOUNDATIONDB_CLUSTER_FILE=/var/fdb/config/fdb.cluster
+WEED_FOUNDATIONDB_API_VERSION=740
+```
+
+### Kubernetes ConfigMap
+
+```yaml
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: seaweedfs-config
+data:
+ filer.toml: |
+ [foundationdb]
+ enabled = true
+ cluster_file = "/var/fdb/config/cluster_file"
+ api_version = 740
+ timeout = "30s"
+ max_retry_delay = "5s"
+ directory_prefix = "k8s_seaweedfs"
+```
+
+## Troubleshooting Configuration
+
+### Debug Configuration
+
+```toml
+[foundationdb]
+enabled = true
+cluster_file = "/etc/foundationdb/fdb.cluster"
+timeout = "60s" # Longer timeouts for debugging
+max_retry_delay = "10s"
+directory_prefix = "debug_seaweedfs"
+```
+
+### Test Configuration
+
+```toml
+[foundationdb]
+enabled = true
+cluster_file = "/tmp/fdb.cluster" # Test cluster
+timeout = "5s"
+directory_prefix = "test_seaweedfs"
+```
+
+## Configuration Best Practices
+
+### 1. Environment Separation
+
+Use different directory prefixes for different environments:
+- Production: `prod_seaweedfs`
+- Staging: `staging_seaweedfs`
+- Development: `dev_seaweedfs`
+
+### 2. Timeout Settings
+
+- Interactive: 5-10 seconds
+- Batch: 30-60 seconds
+- Archive: 120-300 seconds
+
+### 3. Cluster File Management
+
+- Use absolute paths for cluster files
+- Ensure proper file permissions
+- Keep backup copies of cluster files
+
+### 4. Directory Naming
+
+- Use descriptive prefixes
+- Include environment/application identifiers
+- Keep prefixes reasonably short for efficiency
+
+### 5. Error Handling
+
+- Configure appropriate timeouts
+- Monitor retry patterns
+- Set up alerting for configuration errors
+
+## Configuration Testing
+
+### Validation Script
+
+```bash
+#!/bin/bash
+# Test FoundationDB configuration
+
+# Check cluster file
+if [ ! -f "$WEED_FOUNDATIONDB_CLUSTER_FILE" ]; then
+ echo "ERROR: Cluster file not found: $WEED_FOUNDATIONDB_CLUSTER_FILE"
+ exit 1
+fi
+
+# Test connection
+fdbcli -C "$WEED_FOUNDATIONDB_CLUSTER_FILE" --exec 'status' > /dev/null
+if [ $? -ne 0 ]; then
+ echo "ERROR: Cannot connect to FoundationDB cluster"
+ exit 1
+fi
+
+echo "Configuration validation passed"
+```
+
+### Integration Testing
+
+```bash
+# Test configuration with SeaweedFS
+cd test/foundationdb
+make check-env
+make test-unit
+```
diff --git a/weed/filer/foundationdb/INSTALL.md b/weed/filer/foundationdb/INSTALL.md
new file mode 100644
index 000000000..7b3b128fa
--- /dev/null
+++ b/weed/filer/foundationdb/INSTALL.md
@@ -0,0 +1,435 @@
+# FoundationDB Filer Store Installation Guide
+
+This guide covers the installation and setup of the FoundationDB filer store for SeaweedFS.
+
+## Prerequisites
+
+### FoundationDB Server
+
+1. **Install FoundationDB Server**
+
+ **Ubuntu/Debian:**
+ ```bash
+ # Add FoundationDB repository
+ curl -L https://github.com/apple/foundationdb/releases/download/7.4.5/foundationdb-clients_7.4.5-1_amd64.deb -o foundationdb-clients.deb
+ curl -L https://github.com/apple/foundationdb/releases/download/7.4.5/foundationdb-server_7.4.5-1_amd64.deb -o foundationdb-server.deb
+
+ sudo dpkg -i foundationdb-clients.deb foundationdb-server.deb
+ ```
+
+ **CentOS/RHEL:**
+ ```bash
+ # Install RPM packages
+ wget https://github.com/apple/foundationdb/releases/download/7.4.5/foundationdb-clients-7.4.5-1.el7.x86_64.rpm
+ wget https://github.com/apple/foundationdb/releases/download/7.4.5/foundationdb-server-7.4.5-1.el7.x86_64.rpm
+
+ sudo rpm -Uvh foundationdb-clients-7.4.5-1.el7.x86_64.rpm foundationdb-server-7.4.5-1.el7.x86_64.rpm
+ ```
+
+ **macOS:**
+ ```bash
+ # Using Homebrew (if available)
+ brew install foundationdb
+
+ # Or download from GitHub releases
+ # https://github.com/apple/foundationdb/releases
+ ```
+
+2. **Initialize FoundationDB Cluster**
+
+ **Single Node (Development):**
+ ```bash
+ # Start FoundationDB service
+ sudo systemctl start foundationdb
+ sudo systemctl enable foundationdb
+
+ # Initialize database
+ fdbcli --exec 'configure new single ssd'
+ ```
+
+ **Multi-Node Cluster (Production):**
+ ```bash
+ # On each node, edit /etc/foundationdb/fdb.cluster
+ # Example: testing:testing@node1:4500,node2:4500,node3:4500
+
+ # On one node, initialize cluster
+ fdbcli --exec 'configure new double ssd'
+ ```
+
+3. **Verify Installation**
+ ```bash
+ fdbcli --exec 'status'
+ ```
+
+### FoundationDB Client Libraries
+
+The SeaweedFS FoundationDB integration requires the FoundationDB client libraries.
+
+**Ubuntu/Debian:**
+```bash
+sudo apt-get install libfdb-dev
+```
+
+**CentOS/RHEL:**
+```bash
+sudo yum install foundationdb-devel
+```
+
+**macOS:**
+```bash
+# Client libraries are included with the server installation
+export LIBRARY_PATH=/usr/local/lib
+export CPATH=/usr/local/include
+```
+
+## Building SeaweedFS with FoundationDB Support
+
+### Download FoundationDB Go Bindings
+
+```bash
+go mod init seaweedfs-foundationdb
+go get github.com/apple/foundationdb/bindings/go/src/fdb
+```
+
+### Build SeaweedFS
+
+```bash
+# Clone SeaweedFS repository
+git clone https://github.com/seaweedfs/seaweedfs.git
+cd seaweedfs
+
+# Build with FoundationDB support
+go build -tags foundationdb -o weed
+```
+
+### Verify Build
+
+```bash
+./weed version
+# Should show version information
+
+./weed help
+# Should list available commands
+```
+
+## Configuration
+
+### Basic Configuration
+
+Create or edit `filer.toml`:
+
+```toml
+[foundationdb]
+enabled = true
+cluster_file = "/etc/foundationdb/fdb.cluster"
+api_version = 740
+timeout = "5s"
+max_retry_delay = "1s"
+directory_prefix = "seaweedfs"
+```
+
+### Environment Variables
+
+Alternative configuration via environment variables:
+
+```bash
+export WEED_FOUNDATIONDB_ENABLED=true
+export WEED_FOUNDATIONDB_CLUSTER_FILE=/etc/foundationdb/fdb.cluster
+export WEED_FOUNDATIONDB_API_VERSION=740
+export WEED_FOUNDATIONDB_TIMEOUT=5s
+export WEED_FOUNDATIONDB_MAX_RETRY_DELAY=1s
+export WEED_FOUNDATIONDB_DIRECTORY_PREFIX=seaweedfs
+```
+
+### Advanced Configuration
+
+For production deployments:
+
+```toml
+[foundationdb]
+enabled = true
+cluster_file = "/etc/foundationdb/fdb.cluster"
+api_version = 740
+timeout = "30s"
+max_retry_delay = "5s"
+directory_prefix = "seaweedfs_prod"
+
+# Path-specific configuration for backups
+[foundationdb.backup]
+enabled = true
+cluster_file = "/etc/foundationdb/fdb.cluster"
+directory_prefix = "seaweedfs_backup"
+location = "/backup"
+timeout = "60s"
+```
+
+## Deployment
+
+### Single Node Deployment
+
+```bash
+# Start SeaweedFS with FoundationDB filer
+./weed server -filer \
+ -master.port=9333 \
+ -volume.port=8080 \
+ -filer.port=8888 \
+ -s3.port=8333
+```
+
+### Distributed Deployment
+
+**Master Servers:**
+```bash
+# Node 1
+./weed master -port=9333 -peers=master1:9333,master2:9333,master3:9333
+
+# Node 2
+./weed master -port=9333 -peers=master1:9333,master2:9333,master3:9333 -ip=master2
+
+# Node 3
+./weed master -port=9333 -peers=master1:9333,master2:9333,master3:9333 -ip=master3
+```
+
+**Filer Servers with FoundationDB:**
+```bash
+# Filer nodes
+./weed filer -master=master1:9333,master2:9333,master3:9333 -port=8888
+```
+
+**Volume Servers:**
+```bash
+./weed volume -master=master1:9333,master2:9333,master3:9333 -port=8080
+```
+
+### Docker Deployment
+
+**docker-compose.yml:**
+```yaml
+version: '3.9'
+services:
+ foundationdb:
+ image: foundationdb/foundationdb:7.4.5
+ ports:
+ - "4500:4500"
+ volumes:
+ - fdb_data:/var/fdb/data
+ - fdb_config:/var/fdb/config
+
+ seaweedfs:
+ image: chrislusf/seaweedfs:latest
+ command: "server -filer -ip=seaweedfs"
+ ports:
+ - "9333:9333"
+ - "8888:8888"
+ - "8333:8333"
+ environment:
+ WEED_FOUNDATIONDB_ENABLED: "true"
+ WEED_FOUNDATIONDB_CLUSTER_FILE: "/var/fdb/config/fdb.cluster"
+ volumes:
+ - fdb_config:/var/fdb/config
+ depends_on:
+ - foundationdb
+
+volumes:
+ fdb_data:
+ fdb_config:
+```
+
+### Kubernetes Deployment
+
+**FoundationDB Operator:**
+```bash
+# Install FoundationDB operator
+kubectl apply -f https://raw.githubusercontent.com/FoundationDB/fdb-kubernetes-operator/main/config/samples/deployment.yaml
+```
+
+**SeaweedFS with FoundationDB:**
+```yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: seaweedfs-filer
+spec:
+ replicas: 3
+ selector:
+ matchLabels:
+ app: seaweedfs-filer
+ template:
+ metadata:
+ labels:
+ app: seaweedfs-filer
+ spec:
+ containers:
+ - name: seaweedfs
+ image: chrislusf/seaweedfs:latest
+ command: ["weed", "filer"]
+ env:
+ - name: WEED_FOUNDATIONDB_ENABLED
+ value: "true"
+ - name: WEED_FOUNDATIONDB_CLUSTER_FILE
+ value: "/var/fdb/config/cluster_file"
+ ports:
+ - containerPort: 8888
+ volumeMounts:
+ - name: fdb-config
+ mountPath: /var/fdb/config
+ volumes:
+ - name: fdb-config
+ configMap:
+ name: fdb-cluster-config
+```
+
+## Testing Installation
+
+### Quick Test
+
+```bash
+# Start SeaweedFS with FoundationDB
+./weed server -filer &
+
+# Test file operations
+echo "Hello FoundationDB" > test.txt
+curl -F file=@test.txt "http://localhost:8888/test/"
+curl "http://localhost:8888/test/test.txt"
+
+# Test S3 API
+curl -X PUT "http://localhost:8333/testbucket"
+curl -T test.txt "http://localhost:8333/testbucket/test.txt"
+```
+
+### Integration Test Suite
+
+```bash
+# Run the provided test suite
+cd test/foundationdb
+make setup
+make test
+```
+
+## Performance Tuning
+
+### FoundationDB Tuning
+
+```bash
+# Configure for high performance
+fdbcli --exec 'configure triple ssd'
+fdbcli --exec 'configure storage_engine=ssd-redwood-1-experimental'
+```
+
+### SeaweedFS Configuration
+
+```toml
+[foundationdb]
+enabled = true
+cluster_file = "/etc/foundationdb/fdb.cluster"
+timeout = "10s" # Longer timeout for large operations
+max_retry_delay = "2s" # Adjust retry behavior
+directory_prefix = "sw" # Shorter prefix for efficiency
+```
+
+### OS-Level Tuning
+
+```bash
+# Increase file descriptor limits
+echo "* soft nofile 65536" >> /etc/security/limits.conf
+echo "* hard nofile 65536" >> /etc/security/limits.conf
+
+# Adjust network parameters
+echo "net.core.rmem_max = 134217728" >> /etc/sysctl.conf
+echo "net.core.wmem_max = 134217728" >> /etc/sysctl.conf
+sysctl -p
+```
+
+## Monitoring and Maintenance
+
+### Health Checks
+
+```bash
+# FoundationDB cluster health
+fdbcli --exec 'status'
+fdbcli --exec 'status details'
+
+# SeaweedFS health
+curl http://localhost:9333/cluster/status
+curl http://localhost:8888/statistics/health
+```
+
+### Log Monitoring
+
+**FoundationDB Logs:**
+- `/var/log/foundationdb/` (default location)
+- Monitor for errors, warnings, and performance issues
+
+**SeaweedFS Logs:**
+```bash
+# Start with verbose logging
+./weed -v=2 server -filer
+```
+
+### Backup and Recovery
+
+**FoundationDB Backup:**
+```bash
+# Start backup
+fdbbackup start -d file:///path/to/backup -t backup_tag
+
+# Monitor backup
+fdbbackup status -t backup_tag
+
+# Restore from backup
+fdbrestore start -r file:///path/to/backup -t backup_tag --wait
+```
+
+**SeaweedFS Metadata Backup:**
+```bash
+# Export filer metadata
+./weed shell
+> fs.meta.save /path/to/metadata/backup.gz
+```
+
+## Troubleshooting
+
+### Common Issues
+
+1. **Connection Refused**
+ - Check FoundationDB service status: `sudo systemctl status foundationdb`
+ - Verify cluster file: `cat /etc/foundationdb/fdb.cluster`
+ - Check network connectivity: `telnet localhost 4500`
+
+2. **API Version Mismatch**
+ - Update API version in configuration
+ - Rebuild SeaweedFS with matching FDB client library
+
+3. **Transaction Conflicts**
+ - Reduce transaction scope
+ - Implement appropriate retry logic
+ - Check for concurrent access patterns
+
+4. **Performance Issues**
+ - Monitor cluster status: `fdbcli --exec 'status details'`
+ - Check data distribution: `fdbcli --exec 'status json'`
+ - Verify storage configuration
+
+### Debug Mode
+
+```bash
+# Enable FoundationDB client tracing
+export FDB_TRACE_ENABLE=1
+export FDB_TRACE_PATH=/tmp/fdb_trace
+
+# Start SeaweedFS with debug logging
+./weed -v=3 server -filer
+```
+
+### Getting Help
+
+1. **FoundationDB Documentation**: https://apple.github.io/foundationdb/
+2. **SeaweedFS Community**: https://github.com/seaweedfs/seaweedfs/discussions
+3. **Issue Reporting**: https://github.com/seaweedfs/seaweedfs/issues
+
+For specific FoundationDB filer store issues, include:
+- FoundationDB version and cluster configuration
+- SeaweedFS version and build tags
+- Configuration files (filer.toml)
+- Error messages and logs
+- Steps to reproduce the issue
diff --git a/weed/filer/foundationdb/README.md b/weed/filer/foundationdb/README.md
new file mode 100644
index 000000000..68ba6416a
--- /dev/null
+++ b/weed/filer/foundationdb/README.md
@@ -0,0 +1,221 @@
+# FoundationDB Filer Store
+
+This package provides a FoundationDB-based filer store for SeaweedFS, offering ACID transactions and horizontal scalability.
+
+## Features
+
+- **ACID Transactions**: Strong consistency guarantees with full ACID properties
+- **Horizontal Scalability**: Automatic data distribution across multiple nodes
+- **High Availability**: Built-in fault tolerance and automatic failover
+- **Efficient Directory Operations**: Optimized for large directory listings
+- **Key-Value Support**: Full KV operations for metadata storage
+- **Compression**: Automatic compression for large entry chunks
+
+## Installation
+
+### Prerequisites
+
+1. **FoundationDB Server**: Install and configure a FoundationDB cluster
+2. **FoundationDB Client Libraries**: Install libfdb_c client libraries
+3. **Go Build Tags**: Use the `foundationdb` build tag when compiling
+
+### Building SeaweedFS with FoundationDB Support
+
+```bash
+go build -tags foundationdb -o weed
+```
+
+## Configuration
+
+### Basic Configuration
+
+Add the following to your `filer.toml`:
+
+```toml
+[foundationdb]
+enabled = true
+cluster_file = "/etc/foundationdb/fdb.cluster"
+api_version = 740
+timeout = "5s"
+max_retry_delay = "1s"
+directory_prefix = "seaweedfs"
+```
+
+### Configuration Options
+
+| Option | Description | Default | Required |
+|--------|-------------|---------|----------|
+| `enabled` | Enable FoundationDB filer store | `false` | Yes |
+| `cluster_file` | Path to FDB cluster file | `/etc/foundationdb/fdb.cluster` | Yes |
+| `api_version` | FoundationDB API version | `740` | No |
+| `timeout` | Operation timeout duration | `5s` | No |
+| `max_retry_delay` | Maximum retry delay | `1s` | No |
+| `directory_prefix` | Directory prefix for organization | `seaweedfs` | No |
+
+### Path-Specific Configuration
+
+For path-specific filer stores:
+
+```toml
+[foundationdb.backup]
+enabled = true
+cluster_file = "/etc/foundationdb/fdb.cluster"
+directory_prefix = "seaweedfs_backup"
+location = "/backup"
+```
+
+## Environment Variables
+
+Configure via environment variables:
+
+```bash
+export WEED_FOUNDATIONDB_ENABLED=true
+export WEED_FOUNDATIONDB_CLUSTER_FILE=/etc/foundationdb/fdb.cluster
+export WEED_FOUNDATIONDB_API_VERSION=740
+export WEED_FOUNDATIONDB_TIMEOUT=5s
+export WEED_FOUNDATIONDB_MAX_RETRY_DELAY=1s
+export WEED_FOUNDATIONDB_DIRECTORY_PREFIX=seaweedfs
+```
+
+## FoundationDB Cluster Setup
+
+### Single Node (Development)
+
+```bash
+# Start FoundationDB server
+foundationdb start
+
+# Initialize database
+fdbcli --exec 'configure new single ssd'
+```
+
+### Multi-Node Cluster (Production)
+
+1. **Install FoundationDB** on all nodes
+2. **Configure cluster file** (`/etc/foundationdb/fdb.cluster`)
+3. **Initialize cluster**:
+ ```bash
+ fdbcli --exec 'configure new double ssd'
+ ```
+
+### Docker Setup
+
+Use the provided docker-compose.yml in `test/foundationdb/`:
+
+```bash
+cd test/foundationdb
+make setup
+```
+
+## Performance Considerations
+
+### Optimal Configuration
+
+- **API Version**: Use the latest stable API version (720+)
+- **Directory Structure**: Use logical directory prefixes to isolate different SeaweedFS instances
+- **Transaction Size**: Keep transactions under 10MB (FDB limit)
+- **Batch Operations**: Use transactions for multiple related operations
+
+### Monitoring
+
+Monitor FoundationDB cluster status:
+
+```bash
+fdbcli --exec 'status'
+fdbcli --exec 'status details'
+```
+
+### Scaling
+
+FoundationDB automatically handles:
+- Data distribution across nodes
+- Load balancing
+- Automatic failover
+- Storage node addition/removal
+
+## Testing
+
+### Unit Tests
+
+```bash
+cd weed/filer/foundationdb
+go test -tags foundationdb -v
+```
+
+### Integration Tests
+
+```bash
+cd test/foundationdb
+make test
+```
+
+### End-to-End Tests
+
+```bash
+cd test/foundationdb
+make test-e2e
+```
+
+## Troubleshooting
+
+### Common Issues
+
+1. **Connection Failures**:
+ - Verify cluster file path
+ - Check FoundationDB server status
+ - Validate network connectivity
+
+2. **Transaction Conflicts**:
+ - Reduce transaction scope
+ - Implement retry logic
+ - Check for concurrent operations
+
+3. **Performance Issues**:
+ - Monitor cluster health
+ - Check data distribution
+ - Optimize directory structure
+
+### Debug Information
+
+Enable verbose logging:
+
+```bash
+weed -v=2 server -filer
+```
+
+Check FoundationDB status:
+
+```bash
+fdbcli --exec 'status details'
+```
+
+## Security
+
+### Network Security
+
+- Configure TLS for FoundationDB connections
+- Use firewall rules to restrict access
+- Monitor connection attempts
+
+### Data Encryption
+
+- Enable encryption at rest in FoundationDB
+- Use encrypted connections
+- Implement proper key management
+
+## Limitations
+
+- Maximum transaction size: 10MB
+- Single transaction timeout: configurable (default 5s)
+- API version compatibility required
+- Requires FoundationDB cluster setup
+
+## Support
+
+For issues specific to the FoundationDB filer store:
+1. Check FoundationDB cluster status
+2. Verify configuration settings
+3. Review SeaweedFS logs with verbose output
+4. Test with minimal reproduction case
+
+For FoundationDB-specific issues, consult the [FoundationDB documentation](https://apple.github.io/foundationdb/).
diff --git a/weed/filer/foundationdb/doc.go b/weed/filer/foundationdb/doc.go
new file mode 100644
index 000000000..3b3a20bc4
--- /dev/null
+++ b/weed/filer/foundationdb/doc.go
@@ -0,0 +1,13 @@
+/*
+Package foundationdb provides a FoundationDB-based filer store for SeaweedFS.
+
+FoundationDB is a distributed ACID database with strong consistency guarantees
+and excellent scalability characteristics. This filer store leverages FDB's
+directory layer for organizing file metadata and its key-value interface for
+efficient storage and retrieval.
+
+The referenced "github.com/apple/foundationdb/bindings/go/src/fdb" library
+requires FoundationDB client libraries to be installed.
+So this is only compiled with "go build -tags foundationdb".
+*/
+package foundationdb
diff --git a/weed/filer/foundationdb/foundationdb_store.go b/weed/filer/foundationdb/foundationdb_store.go
new file mode 100644
index 000000000..509ee4b86
--- /dev/null
+++ b/weed/filer/foundationdb/foundationdb_store.go
@@ -0,0 +1,575 @@
+//go:build foundationdb
+// +build foundationdb
+
+// Package foundationdb provides a filer store implementation using FoundationDB as the backend.
+//
+// IMPORTANT DESIGN NOTE - DeleteFolderChildren and Transaction Limits:
+//
+// FoundationDB imposes strict transaction limits:
+// - Maximum transaction size: 10MB
+// - Maximum transaction duration: 5 seconds
+//
+// The DeleteFolderChildren operation always uses batched deletion with multiple small transactions
+// to safely handle directories of any size. Even if called within an existing transaction context,
+// it will create its own batch transactions to avoid exceeding FDB limits.
+//
+// This means DeleteFolderChildren is NOT atomic with respect to an outer transaction - it manages
+// its own transaction boundaries for safety and reliability.
+
+package foundationdb
+
+import (
+ "bytes"
+ "context"
+ "fmt"
+ "time"
+
+ "github.com/apple/foundationdb/bindings/go/src/fdb"
+ "github.com/apple/foundationdb/bindings/go/src/fdb/directory"
+ "github.com/apple/foundationdb/bindings/go/src/fdb/tuple"
+
+ "github.com/seaweedfs/seaweedfs/weed/filer"
+ "github.com/seaweedfs/seaweedfs/weed/glog"
+ "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
+ "github.com/seaweedfs/seaweedfs/weed/util"
+)
+
+const (
+ // FoundationDB transaction size limit is 10MB
+ FDB_TRANSACTION_SIZE_LIMIT = 10 * 1024 * 1024
+ // Maximum number of entries to return in a single directory listing
+ // Large batches can cause transaction timeouts and increase memory pressure
+ MAX_DIRECTORY_LIST_LIMIT = 1000
+)
+
+func init() {
+ filer.Stores = append(filer.Stores, &FoundationDBStore{})
+}
+
+type FoundationDBStore struct {
+ database fdb.Database
+ seaweedfsDir directory.DirectorySubspace
+ kvDir directory.DirectorySubspace
+ directoryPrefix string
+ timeout time.Duration
+ maxRetryDelay time.Duration
+}
+
+// Context key type for storing transactions
+type contextKey string
+
+const transactionKey contextKey = "fdb_transaction"
+
+// Helper functions for context-scoped transactions
+func (store *FoundationDBStore) getTransactionFromContext(ctx context.Context) (fdb.Transaction, bool) {
+ val := ctx.Value(transactionKey)
+ if val == nil {
+ var emptyTx fdb.Transaction
+ return emptyTx, false
+ }
+ if tx, ok := val.(fdb.Transaction); ok {
+ return tx, true
+ }
+ var emptyTx fdb.Transaction
+ return emptyTx, false
+}
+
+func (store *FoundationDBStore) setTransactionInContext(ctx context.Context, tx fdb.Transaction) context.Context {
+ return context.WithValue(ctx, transactionKey, tx)
+}
+
+func (store *FoundationDBStore) GetName() string {
+ return "foundationdb"
+}
+
+func (store *FoundationDBStore) Initialize(configuration util.Configuration, prefix string) error {
+ // Set default configuration values
+ configuration.SetDefault(prefix+"cluster_file", "/etc/foundationdb/fdb.cluster")
+ configuration.SetDefault(prefix+"api_version", 740)
+ configuration.SetDefault(prefix+"timeout", "5s")
+ configuration.SetDefault(prefix+"max_retry_delay", "1s")
+ configuration.SetDefault(prefix+"directory_prefix", "seaweedfs")
+
+ clusterFile := configuration.GetString(prefix + "cluster_file")
+ apiVersion := configuration.GetInt(prefix + "api_version")
+ timeoutStr := configuration.GetString(prefix + "timeout")
+ maxRetryDelayStr := configuration.GetString(prefix + "max_retry_delay")
+ store.directoryPrefix = configuration.GetString(prefix + "directory_prefix")
+
+ // Parse timeout values
+ var err error
+ store.timeout, err = time.ParseDuration(timeoutStr)
+ if err != nil {
+ return fmt.Errorf("invalid timeout duration %s: %w", timeoutStr, err)
+ }
+
+ store.maxRetryDelay, err = time.ParseDuration(maxRetryDelayStr)
+ if err != nil {
+ return fmt.Errorf("invalid max_retry_delay duration %s: %w", maxRetryDelayStr, err)
+ }
+
+ return store.initialize(clusterFile, apiVersion)
+}
+
+func (store *FoundationDBStore) initialize(clusterFile string, apiVersion int) error {
+ glog.V(0).Infof("FoundationDB: connecting to cluster file: %s, API version: %d", clusterFile, apiVersion)
+
+ // Set FDB API version
+ if err := fdb.APIVersion(apiVersion); err != nil {
+ return fmt.Errorf("failed to set FoundationDB API version %d: %w", apiVersion, err)
+ }
+
+ // Open database
+ var err error
+ store.database, err = fdb.OpenDatabase(clusterFile)
+ if err != nil {
+ return fmt.Errorf("failed to open FoundationDB database: %w", err)
+ }
+
+ // Create/open seaweedfs directory
+ store.seaweedfsDir, err = directory.CreateOrOpen(store.database, []string{store.directoryPrefix}, nil)
+ if err != nil {
+ return fmt.Errorf("failed to create/open seaweedfs directory: %w", err)
+ }
+
+ // Create/open kv subdirectory for key-value operations
+ store.kvDir, err = directory.CreateOrOpen(store.database, []string{store.directoryPrefix, "kv"}, nil)
+ if err != nil {
+ return fmt.Errorf("failed to create/open kv directory: %w", err)
+ }
+
+ glog.V(0).Infof("FoundationDB store initialized successfully with directory prefix: %s", store.directoryPrefix)
+ return nil
+}
+
+func (store *FoundationDBStore) BeginTransaction(ctx context.Context) (context.Context, error) {
+ // Check if there's already a transaction in this context
+ if _, exists := store.getTransactionFromContext(ctx); exists {
+ return ctx, fmt.Errorf("transaction already in progress for this context")
+ }
+
+ // Create a new transaction
+ tx, err := store.database.CreateTransaction()
+ if err != nil {
+ return ctx, fmt.Errorf("failed to create transaction: %w", err)
+ }
+
+ // Store the transaction in context and return the new context
+ newCtx := store.setTransactionInContext(ctx, tx)
+ return newCtx, nil
+}
+
+func (store *FoundationDBStore) CommitTransaction(ctx context.Context) error {
+ // Get transaction from context
+ tx, exists := store.getTransactionFromContext(ctx)
+ if !exists {
+ return fmt.Errorf("no transaction in progress for this context")
+ }
+
+ // Commit the transaction
+ err := tx.Commit().Get()
+ if err != nil {
+ return fmt.Errorf("failed to commit transaction: %w", err)
+ }
+
+ return nil
+}
+
+func (store *FoundationDBStore) RollbackTransaction(ctx context.Context) error {
+ // Get transaction from context
+ tx, exists := store.getTransactionFromContext(ctx)
+ if !exists {
+ return fmt.Errorf("no transaction in progress for this context")
+ }
+
+ // Cancel the transaction
+ tx.Cancel()
+ return nil
+}
+
+func (store *FoundationDBStore) InsertEntry(ctx context.Context, entry *filer.Entry) error {
+ return store.UpdateEntry(ctx, entry)
+}
+
+func (store *FoundationDBStore) UpdateEntry(ctx context.Context, entry *filer.Entry) error {
+ key := store.genKey(entry.DirAndName())
+
+ value, err := entry.EncodeAttributesAndChunks()
+ if err != nil {
+ return fmt.Errorf("encoding %s %+v: %w", entry.FullPath, entry.Attr, err)
+ }
+
+ if len(entry.GetChunks()) > filer.CountEntryChunksForGzip {
+ value = util.MaybeGzipData(value)
+ }
+
+ // Check transaction size limit
+ if len(value) > FDB_TRANSACTION_SIZE_LIMIT {
+ return fmt.Errorf("entry %s exceeds FoundationDB transaction size limit (%d > %d bytes)",
+ entry.FullPath, len(value), FDB_TRANSACTION_SIZE_LIMIT)
+ }
+
+ // Check if there's a transaction in context
+ if tx, exists := store.getTransactionFromContext(ctx); exists {
+ tx.Set(key, value)
+ return nil
+ }
+
+ // Execute in a new transaction if not in an existing one
+ _, err = store.database.Transact(func(tr fdb.Transaction) (interface{}, error) {
+ tr.Set(key, value)
+ return nil, nil
+ })
+
+ if err != nil {
+ return fmt.Errorf("persisting %s: %w", entry.FullPath, err)
+ }
+
+ return nil
+}
+
+func (store *FoundationDBStore) FindEntry(ctx context.Context, fullpath util.FullPath) (entry *filer.Entry, err error) {
+ key := store.genKey(util.FullPath(fullpath).DirAndName())
+
+ var data []byte
+ // Check if there's a transaction in context
+ if tx, exists := store.getTransactionFromContext(ctx); exists {
+ data, err = tx.Get(key).Get()
+ } else {
+ var result interface{}
+ result, err = store.database.ReadTransact(func(rtr fdb.ReadTransaction) (interface{}, error) {
+ return rtr.Get(key).Get()
+ })
+ if err == nil {
+ if resultBytes, ok := result.([]byte); ok {
+ data = resultBytes
+ }
+ }
+ }
+
+ if err != nil {
+ return nil, fmt.Errorf("find entry %s: %w", fullpath, err)
+ }
+
+ if data == nil {
+ return nil, filer_pb.ErrNotFound
+ }
+
+ entry = &filer.Entry{
+ FullPath: fullpath,
+ }
+
+ err = entry.DecodeAttributesAndChunks(util.MaybeDecompressData(data))
+ if err != nil {
+ return entry, fmt.Errorf("decode %s : %w", entry.FullPath, err)
+ }
+
+ return entry, nil
+}
+
+func (store *FoundationDBStore) DeleteEntry(ctx context.Context, fullpath util.FullPath) error {
+ key := store.genKey(util.FullPath(fullpath).DirAndName())
+
+ // Check if there's a transaction in context
+ if tx, exists := store.getTransactionFromContext(ctx); exists {
+ tx.Clear(key)
+ return nil
+ }
+
+ // Execute in a new transaction if not in an existing one
+ _, err := store.database.Transact(func(tr fdb.Transaction) (interface{}, error) {
+ tr.Clear(key)
+ return nil, nil
+ })
+
+ if err != nil {
+ return fmt.Errorf("deleting %s: %w", fullpath, err)
+ }
+
+ return nil
+}
+
+func (store *FoundationDBStore) DeleteFolderChildren(ctx context.Context, fullpath util.FullPath) error {
+ // Recursively delete all entries in this directory and its subdirectories
+ // We need recursion because our key structure is tuple{dirPath, fileName}
+ // not tuple{dirPath, ...pathComponents}, so a simple prefix range won't catch subdirectories
+
+ // ALWAYS use batched deletion to safely handle directories of any size.
+ // This avoids FoundationDB's 10MB transaction size and 5s timeout limits.
+ //
+ // Note: Even if called within an existing transaction, we create our own batch transactions.
+ // This means DeleteFolderChildren is NOT atomic with an outer transaction, but it ensures
+ // reliability and prevents transaction limit violations.
+ return store.deleteFolderChildrenInBatches(ctx, fullpath)
+}
+
+// deleteFolderChildrenInBatches deletes directory contents in multiple transactions
+// to avoid hitting FoundationDB's transaction size (10MB) and time (5s) limits
+func (store *FoundationDBStore) deleteFolderChildrenInBatches(ctx context.Context, fullpath util.FullPath) error {
+ const BATCH_SIZE = 100 // Delete up to 100 entries per transaction
+
+ // Ensure listing and recursion run outside of any ambient transaction
+ // Store a sentinel nil value so getTransactionFromContext returns false
+ ctxNoTxn := context.WithValue(ctx, transactionKey, (*struct{})(nil))
+
+ for {
+ // Collect one batch of entries
+ var entriesToDelete []util.FullPath
+ var subDirectories []util.FullPath
+
+ // List entries - we'll process BATCH_SIZE at a time
+ _, err := store.ListDirectoryEntries(ctxNoTxn, fullpath, "", true, int64(BATCH_SIZE), func(entry *filer.Entry) bool {
+ entriesToDelete = append(entriesToDelete, entry.FullPath)
+ if entry.IsDirectory() {
+ subDirectories = append(subDirectories, entry.FullPath)
+ }
+ return true
+ })
+
+ if err != nil {
+ return fmt.Errorf("listing children of %s: %w", fullpath, err)
+ }
+
+ // If no entries found, we're done
+ if len(entriesToDelete) == 0 {
+ break
+ }
+
+ // Recursively delete subdirectories first (also in batches)
+ for _, subDir := range subDirectories {
+ if err := store.deleteFolderChildrenInBatches(ctxNoTxn, subDir); err != nil {
+ return err
+ }
+ }
+
+ // Delete this batch of entries in a single transaction
+ _, err = store.database.Transact(func(tr fdb.Transaction) (interface{}, error) {
+ txCtx := store.setTransactionInContext(context.Background(), tr)
+ for _, entryPath := range entriesToDelete {
+ if delErr := store.DeleteEntry(txCtx, entryPath); delErr != nil {
+ return nil, fmt.Errorf("deleting entry %s: %w", entryPath, delErr)
+ }
+ }
+ return nil, nil
+ })
+
+ if err != nil {
+ return err
+ }
+
+ // If we got fewer entries than BATCH_SIZE, we're done with this directory
+ if len(entriesToDelete) < BATCH_SIZE {
+ break
+ }
+ }
+
+ return nil
+}
+
+func (store *FoundationDBStore) ListDirectoryEntries(ctx context.Context, dirPath util.FullPath, startFileName string, includeStartFile bool, limit int64, eachEntryFunc filer.ListEachEntryFunc) (lastFileName string, err error) {
+ return store.ListDirectoryPrefixedEntries(ctx, dirPath, startFileName, includeStartFile, limit, "", eachEntryFunc)
+}
+
+func (store *FoundationDBStore) ListDirectoryPrefixedEntries(ctx context.Context, dirPath util.FullPath, startFileName string, includeStartFile bool, limit int64, prefix string, eachEntryFunc filer.ListEachEntryFunc) (lastFileName string, err error) {
+ // Cap limit for optimal FoundationDB performance
+ // Large batches can cause transaction timeouts and increase memory pressure
+ if limit > MAX_DIRECTORY_LIST_LIMIT || limit <= 0 {
+ limit = MAX_DIRECTORY_LIST_LIMIT
+ }
+
+ // Get the range for the entire directory first
+ dirTuple := tuple.Tuple{string(dirPath)}
+ dirRange, err := fdb.PrefixRange(store.seaweedfsDir.Pack(dirTuple))
+ if err != nil {
+ return "", fmt.Errorf("creating prefix range for %s: %w", dirPath, err)
+ }
+
+ // Determine the key range for the scan
+ // Use FDB's range capabilities to only fetch keys matching the prefix
+ var beginKey, endKey fdb.Key
+ dirBeginConv, dirEndConv := dirRange.FDBRangeKeys()
+ dirBegin := dirBeginConv.FDBKey()
+ dirEnd := dirEndConv.FDBKey()
+
+ if prefix != "" {
+ // Build range by bracketing the filename component
+ // Start at Pack(dirPath, prefix) and end at Pack(dirPath, nextPrefix)
+ // where nextPrefix is the next lexicographic string
+ beginKey = store.seaweedfsDir.Pack(tuple.Tuple{string(dirPath), prefix})
+ endKey = dirEnd
+
+ // Use Strinc to get the next string for proper prefix range
+ if nextPrefix, strincErr := fdb.Strinc([]byte(prefix)); strincErr == nil {
+ endKey = store.seaweedfsDir.Pack(tuple.Tuple{string(dirPath), string(nextPrefix)})
+ }
+ } else {
+ // Use entire directory range
+ beginKey = dirBegin
+ endKey = dirEnd
+ }
+
+ // Determine start key and selector based on startFileName
+ var beginSelector fdb.KeySelector
+ if startFileName != "" {
+ // Start from the specified file
+ startKey := store.seaweedfsDir.Pack(tuple.Tuple{string(dirPath), startFileName})
+ if includeStartFile {
+ beginSelector = fdb.FirstGreaterOrEqual(startKey)
+ } else {
+ beginSelector = fdb.FirstGreaterThan(startKey)
+ }
+ // Ensure beginSelector is within our desired range
+ if bytes.Compare(beginSelector.Key.FDBKey(), beginKey.FDBKey()) < 0 {
+ beginSelector = fdb.FirstGreaterOrEqual(beginKey)
+ }
+ } else {
+ // Start from beginning of the range
+ beginSelector = fdb.FirstGreaterOrEqual(beginKey)
+ }
+
+ // End selector is the end of our calculated range
+ endSelector := fdb.FirstGreaterOrEqual(endKey)
+
+ var kvs []fdb.KeyValue
+ var rangeErr error
+ // Check if there's a transaction in context
+ if tx, exists := store.getTransactionFromContext(ctx); exists {
+ sr := fdb.SelectorRange{Begin: beginSelector, End: endSelector}
+ kvs, rangeErr = tx.GetRange(sr, fdb.RangeOptions{Limit: int(limit)}).GetSliceWithError()
+ if rangeErr != nil {
+ return "", fmt.Errorf("scanning %s: %w", dirPath, rangeErr)
+ }
+ } else {
+ result, err := store.database.ReadTransact(func(rtr fdb.ReadTransaction) (interface{}, error) {
+ sr := fdb.SelectorRange{Begin: beginSelector, End: endSelector}
+ kvSlice, err := rtr.GetRange(sr, fdb.RangeOptions{Limit: int(limit)}).GetSliceWithError()
+ if err != nil {
+ return nil, err
+ }
+ return kvSlice, nil
+ })
+ if err != nil {
+ return "", fmt.Errorf("scanning %s: %w", dirPath, err)
+ }
+ var ok bool
+ kvs, ok = result.([]fdb.KeyValue)
+ if !ok {
+ return "", fmt.Errorf("unexpected type from ReadTransact: %T, expected []fdb.KeyValue", result)
+ }
+ }
+
+ for _, kv := range kvs {
+ fileName, extractErr := store.extractFileName(kv.Key)
+ if extractErr != nil {
+ glog.Warningf("list %s: failed to extract fileName from key %v: %v", dirPath, kv.Key, extractErr)
+ continue
+ }
+
+ entry := &filer.Entry{
+ FullPath: util.NewFullPath(string(dirPath), fileName),
+ }
+
+ if decodeErr := entry.DecodeAttributesAndChunks(util.MaybeDecompressData(kv.Value)); decodeErr != nil {
+ glog.V(0).Infof("list %s : %v", entry.FullPath, decodeErr)
+ continue
+ }
+
+ if !eachEntryFunc(entry) {
+ break
+ }
+ lastFileName = fileName
+ }
+
+ return lastFileName, nil
+}
+
+// KV operations
+func (store *FoundationDBStore) KvPut(ctx context.Context, key []byte, value []byte) error {
+ fdbKey := store.kvDir.Pack(tuple.Tuple{key})
+
+ // Check if there's a transaction in context
+ if tx, exists := store.getTransactionFromContext(ctx); exists {
+ tx.Set(fdbKey, value)
+ return nil
+ }
+
+ _, err := store.database.Transact(func(tr fdb.Transaction) (interface{}, error) {
+ tr.Set(fdbKey, value)
+ return nil, nil
+ })
+
+ return err
+}
+
+func (store *FoundationDBStore) KvGet(ctx context.Context, key []byte) ([]byte, error) {
+ fdbKey := store.kvDir.Pack(tuple.Tuple{key})
+
+ var data []byte
+ var err error
+
+ // Check if there's a transaction in context
+ if tx, exists := store.getTransactionFromContext(ctx); exists {
+ data, err = tx.Get(fdbKey).Get()
+ } else {
+ var result interface{}
+ result, err = store.database.ReadTransact(func(rtr fdb.ReadTransaction) (interface{}, error) {
+ return rtr.Get(fdbKey).Get()
+ })
+ if err == nil {
+ if resultBytes, ok := result.([]byte); ok {
+ data = resultBytes
+ }
+ }
+ }
+
+ if err != nil {
+ return nil, fmt.Errorf("kv get %s: %w", string(key), err)
+ }
+ if data == nil {
+ return nil, filer.ErrKvNotFound
+ }
+
+ return data, nil
+}
+
+func (store *FoundationDBStore) KvDelete(ctx context.Context, key []byte) error {
+ fdbKey := store.kvDir.Pack(tuple.Tuple{key})
+
+ // Check if there's a transaction in context
+ if tx, exists := store.getTransactionFromContext(ctx); exists {
+ tx.Clear(fdbKey)
+ return nil
+ }
+
+ _, err := store.database.Transact(func(tr fdb.Transaction) (interface{}, error) {
+ tr.Clear(fdbKey)
+ return nil, nil
+ })
+
+ return err
+}
+
+func (store *FoundationDBStore) Shutdown() {
+ // FoundationDB doesn't have an explicit close method for Database
+ glog.V(0).Infof("FoundationDB store shutdown")
+}
+
+// Helper functions
+func (store *FoundationDBStore) genKey(dirPath, fileName string) fdb.Key {
+ return store.seaweedfsDir.Pack(tuple.Tuple{dirPath, fileName})
+}
+
+func (store *FoundationDBStore) extractFileName(key fdb.Key) (string, error) {
+ t, err := store.seaweedfsDir.Unpack(key)
+ if err != nil {
+ return "", fmt.Errorf("unpack key %v: %w", key, err)
+ }
+ if len(t) != 2 {
+ return "", fmt.Errorf("tuple unexpected length (len=%d, expected 2) for key %v", len(t), key)
+ }
+
+ if fileName, ok := t[1].(string); ok {
+ return fileName, nil
+ }
+ return "", fmt.Errorf("second element not a string (type=%T) for key %v", t[1], key)
+}
diff --git a/weed/filer/foundationdb/foundationdb_store_test.go b/weed/filer/foundationdb/foundationdb_store_test.go
new file mode 100644
index 000000000..215c98c76
--- /dev/null
+++ b/weed/filer/foundationdb/foundationdb_store_test.go
@@ -0,0 +1,545 @@
+//go:build foundationdb
+// +build foundationdb
+
+package foundationdb
+
+import (
+ "context"
+ "errors"
+ "fmt"
+ "os"
+ "strings"
+ "testing"
+ "time"
+
+ "github.com/seaweedfs/seaweedfs/weed/filer"
+ "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
+ "github.com/seaweedfs/seaweedfs/weed/util"
+)
+
+func TestFoundationDBStore_Initialize(t *testing.T) {
+ // Test with default configuration
+ config := util.GetViper()
+ config.Set("foundationdb.cluster_file", getTestClusterFile())
+ config.Set("foundationdb.api_version", 740)
+
+ store := &FoundationDBStore{}
+ err := store.Initialize(config, "foundationdb.")
+ if err != nil {
+ t.Skip("FoundationDB not available for testing, skipping")
+ }
+
+ defer store.Shutdown()
+
+ if store.GetName() != "foundationdb" {
+ t.Errorf("Expected store name 'foundationdb', got '%s'", store.GetName())
+ }
+
+ if store.directoryPrefix != "seaweedfs" {
+ t.Errorf("Expected default directory prefix 'seaweedfs', got '%s'", store.directoryPrefix)
+ }
+}
+
+func TestFoundationDBStore_InitializeWithCustomConfig(t *testing.T) {
+ config := util.GetViper()
+ config.Set("foundationdb.cluster_file", getTestClusterFile())
+ config.Set("foundationdb.api_version", 740)
+ config.Set("foundationdb.timeout", "10s")
+ config.Set("foundationdb.max_retry_delay", "2s")
+ config.Set("foundationdb.directory_prefix", "custom_prefix")
+
+ store := &FoundationDBStore{}
+ err := store.Initialize(config, "foundationdb.")
+ if err != nil {
+ t.Skip("FoundationDB not available for testing, skipping")
+ }
+
+ defer store.Shutdown()
+
+ if store.directoryPrefix != "custom_prefix" {
+ t.Errorf("Expected custom directory prefix 'custom_prefix', got '%s'", store.directoryPrefix)
+ }
+
+ if store.timeout != 10*time.Second {
+ t.Errorf("Expected timeout 10s, got %v", store.timeout)
+ }
+
+ if store.maxRetryDelay != 2*time.Second {
+ t.Errorf("Expected max retry delay 2s, got %v", store.maxRetryDelay)
+ }
+}
+
+func TestFoundationDBStore_InitializeInvalidConfig(t *testing.T) {
+ tests := []struct {
+ name string
+ config map[string]interface{}
+ errorMsg string
+ }{
+ {
+ name: "invalid timeout",
+ config: map[string]interface{}{
+ "foundationdb.cluster_file": getTestClusterFile(),
+ "foundationdb.api_version": 740,
+ "foundationdb.timeout": "invalid",
+ "foundationdb.directory_prefix": "test",
+ },
+ errorMsg: "invalid timeout duration",
+ },
+ {
+ name: "invalid max_retry_delay",
+ config: map[string]interface{}{
+ "foundationdb.cluster_file": getTestClusterFile(),
+ "foundationdb.api_version": 740,
+ "foundationdb.timeout": "5s",
+ "foundationdb.max_retry_delay": "invalid",
+ "foundationdb.directory_prefix": "test",
+ },
+ errorMsg: "invalid max_retry_delay duration",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ config := util.GetViper()
+ for key, value := range tt.config {
+ config.Set(key, value)
+ }
+
+ store := &FoundationDBStore{}
+ err := store.Initialize(config, "foundationdb.")
+ if err == nil {
+ store.Shutdown()
+ t.Errorf("Expected initialization to fail, but it succeeded")
+ } else if !containsString(err.Error(), tt.errorMsg) {
+ t.Errorf("Expected error message to contain '%s', got '%s'", tt.errorMsg, err.Error())
+ }
+ })
+ }
+}
+
+func TestFoundationDBStore_KeyGeneration(t *testing.T) {
+ store := &FoundationDBStore{}
+ err := store.initialize(getTestClusterFile(), 740)
+ if err != nil {
+ t.Skip("FoundationDB not available for testing, skipping")
+ }
+ defer store.Shutdown()
+
+ // Test key generation for different paths
+ testCases := []struct {
+ dirPath string
+ fileName string
+ desc string
+ }{
+ {"/", "file.txt", "root directory file"},
+ {"/dir", "file.txt", "subdirectory file"},
+ {"/deep/nested/dir", "file.txt", "deep nested file"},
+ {"/dir with spaces", "file with spaces.txt", "paths with spaces"},
+ {"/unicode/测试", "文件.txt", "unicode paths"},
+ }
+
+ for _, tc := range testCases {
+ t.Run(tc.desc, func(t *testing.T) {
+ key := store.genKey(tc.dirPath, tc.fileName)
+ if len(key) == 0 {
+ t.Error("Generated key should not be empty")
+ }
+
+ // Test that we can extract filename back
+ // Note: This tests internal consistency
+ if tc.fileName != "" {
+ extractedName, err := store.extractFileName(key)
+ if err != nil {
+ t.Errorf("extractFileName failed: %v", err)
+ }
+ if extractedName != tc.fileName {
+ t.Errorf("Expected extracted filename '%s', got '%s'", tc.fileName, extractedName)
+ }
+ }
+ })
+ }
+}
+
+func TestFoundationDBStore_ErrorHandling(t *testing.T) {
+ store := &FoundationDBStore{}
+ err := store.initialize(getTestClusterFile(), 740)
+ if err != nil {
+ t.Skip("FoundationDB not available for testing, skipping")
+ }
+ defer store.Shutdown()
+
+ ctx := context.Background()
+
+ // Test FindEntry with non-existent path
+ _, err = store.FindEntry(ctx, "/non/existent/file.txt")
+ if err == nil {
+ t.Error("Expected error for non-existent file")
+ }
+ if !errors.Is(err, filer_pb.ErrNotFound) {
+ t.Errorf("Expected ErrNotFound, got %v", err)
+ }
+
+ // Test KvGet with non-existent key
+ _, err = store.KvGet(ctx, []byte("non_existent_key"))
+ if err == nil {
+ t.Error("Expected error for non-existent key")
+ }
+ if !errors.Is(err, filer.ErrKvNotFound) {
+ t.Errorf("Expected ErrKvNotFound, got %v", err)
+ }
+
+ // Test transaction state errors
+ err = store.CommitTransaction(ctx)
+ if err == nil {
+ t.Error("Expected error when committing without active transaction")
+ }
+
+ err = store.RollbackTransaction(ctx)
+ if err == nil {
+ t.Error("Expected error when rolling back without active transaction")
+ }
+}
+
+func TestFoundationDBStore_TransactionState(t *testing.T) {
+ store := &FoundationDBStore{}
+ err := store.initialize(getTestClusterFile(), 740)
+ if err != nil {
+ t.Skip("FoundationDB not available for testing, skipping")
+ }
+ defer store.Shutdown()
+
+ ctx := context.Background()
+
+ // Test double transaction begin
+ txCtx, err := store.BeginTransaction(ctx)
+ if err != nil {
+ t.Fatalf("BeginTransaction failed: %v", err)
+ }
+
+ // Try to begin another transaction on the same context
+ _, err = store.BeginTransaction(txCtx)
+ if err == nil {
+ t.Error("Expected error when beginning transaction while one is active")
+ }
+
+ // Commit the transaction
+ err = store.CommitTransaction(txCtx)
+ if err != nil {
+ t.Fatalf("CommitTransaction failed: %v", err)
+ }
+
+ // Now should be able to begin a new transaction
+ txCtx2, err := store.BeginTransaction(ctx)
+ if err != nil {
+ t.Fatalf("BeginTransaction after commit failed: %v", err)
+ }
+
+ // Rollback this time
+ err = store.RollbackTransaction(txCtx2)
+ if err != nil {
+ t.Fatalf("RollbackTransaction failed: %v", err)
+ }
+}
+
+// Benchmark tests
+func BenchmarkFoundationDBStore_InsertEntry(b *testing.B) {
+ store := createBenchmarkStore(b)
+ defer store.Shutdown()
+
+ ctx := context.Background()
+ entry := &filer.Entry{
+ FullPath: "/benchmark/file.txt",
+ Attr: filer.Attr{
+ Mode: 0644,
+ Uid: 1000,
+ Gid: 1000,
+ Mtime: time.Now(),
+ },
+ }
+
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ entry.FullPath = util.NewFullPath("/benchmark", fmt.Sprintf("%x", uint64(i))+".txt")
+ err := store.InsertEntry(ctx, entry)
+ if err != nil {
+ b.Fatalf("InsertEntry failed: %v", err)
+ }
+ }
+}
+
+func BenchmarkFoundationDBStore_FindEntry(b *testing.B) {
+ store := createBenchmarkStore(b)
+ defer store.Shutdown()
+
+ ctx := context.Background()
+
+ // Pre-populate with test entries
+ numEntries := 1000
+ for i := 0; i < numEntries; i++ {
+ entry := &filer.Entry{
+ FullPath: util.NewFullPath("/benchmark", fmt.Sprintf("%x", uint64(i))+".txt"),
+ Attr: filer.Attr{
+ Mode: 0644,
+ Uid: 1000,
+ Gid: 1000,
+ Mtime: time.Now(),
+ },
+ }
+ err := store.InsertEntry(ctx, entry)
+ if err != nil {
+ b.Fatalf("Pre-population InsertEntry failed: %v", err)
+ }
+ }
+
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ path := util.NewFullPath("/benchmark", fmt.Sprintf("%x", uint64(i%numEntries))+".txt")
+ _, err := store.FindEntry(ctx, path)
+ if err != nil {
+ b.Fatalf("FindEntry failed: %v", err)
+ }
+ }
+}
+
+func BenchmarkFoundationDBStore_KvOperations(b *testing.B) {
+ store := createBenchmarkStore(b)
+ defer store.Shutdown()
+
+ ctx := context.Background()
+ key := []byte("benchmark_key")
+ value := []byte("benchmark_value")
+
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ // Put
+ err := store.KvPut(ctx, key, value)
+ if err != nil {
+ b.Fatalf("KvPut failed: %v", err)
+ }
+
+ // Get
+ _, err = store.KvGet(ctx, key)
+ if err != nil {
+ b.Fatalf("KvGet failed: %v", err)
+ }
+ }
+}
+
+// Helper functions
+func getTestClusterFile() string {
+ clusterFile := os.Getenv("FDB_CLUSTER_FILE")
+ if clusterFile == "" {
+ clusterFile = "/var/fdb/config/fdb.cluster"
+ }
+ return clusterFile
+}
+
+func createBenchmarkStore(b *testing.B) *FoundationDBStore {
+ clusterFile := getTestClusterFile()
+ if _, err := os.Stat(clusterFile); os.IsNotExist(err) {
+ b.Skip("FoundationDB cluster file not found, skipping benchmark")
+ }
+
+ store := &FoundationDBStore{}
+ err := store.initialize(clusterFile, 740)
+ if err != nil {
+ b.Skipf("Failed to initialize FoundationDB store: %v", err)
+ }
+
+ return store
+}
+
+func getTestStore(t *testing.T) *FoundationDBStore {
+ t.Helper()
+
+ clusterFile := getTestClusterFile()
+ if _, err := os.Stat(clusterFile); os.IsNotExist(err) {
+ t.Skip("FoundationDB cluster file not found, skipping test")
+ }
+
+ store := &FoundationDBStore{}
+ if err := store.initialize(clusterFile, 740); err != nil {
+ t.Skipf("Failed to initialize FoundationDB store: %v", err)
+ }
+
+ return store
+}
+
+func containsString(s, substr string) bool {
+ return strings.Contains(s, substr)
+}
+
+func TestFoundationDBStore_DeleteFolderChildrenWithBatching(t *testing.T) {
+ // This test validates that DeleteFolderChildren always uses batching
+ // to safely handle large directories, regardless of transaction context
+
+ store := getTestStore(t)
+ defer store.Shutdown()
+
+ ctx := context.Background()
+ testDir := util.FullPath(fmt.Sprintf("/test_batch_delete_%d", time.Now().UnixNano()))
+
+ // Create a large directory (> 100 entries to trigger batching)
+ const NUM_ENTRIES = 250
+
+ t.Logf("Creating %d test entries...", NUM_ENTRIES)
+ for i := 0; i < NUM_ENTRIES; i++ {
+ entry := &filer.Entry{
+ FullPath: util.NewFullPath(string(testDir), fmt.Sprintf("file_%04d.txt", i)),
+ Attr: filer.Attr{
+ Mode: 0644,
+ Uid: 1000,
+ Gid: 1000,
+ Mtime: time.Now(),
+ },
+ }
+ if err := store.InsertEntry(ctx, entry); err != nil {
+ t.Fatalf("Failed to insert test entry %d: %v", i, err)
+ }
+ }
+
+ // Test 1: DeleteFolderChildren outside transaction should succeed
+ t.Run("OutsideTransaction", func(t *testing.T) {
+ testDir1 := util.FullPath(fmt.Sprintf("/test_batch_1_%d", time.Now().UnixNano()))
+
+ // Create entries
+ for i := 0; i < NUM_ENTRIES; i++ {
+ entry := &filer.Entry{
+ FullPath: util.NewFullPath(string(testDir1), fmt.Sprintf("file_%04d.txt", i)),
+ Attr: filer.Attr{
+ Mode: 0644,
+ Uid: 1000,
+ Gid: 1000,
+ Mtime: time.Now(),
+ },
+ }
+ store.InsertEntry(ctx, entry)
+ }
+
+ // Delete with batching
+ err := store.DeleteFolderChildren(ctx, testDir1)
+ if err != nil {
+ t.Errorf("DeleteFolderChildren outside transaction should succeed, got error: %v", err)
+ }
+
+ // Verify all entries deleted
+ var count int
+ store.ListDirectoryEntries(ctx, testDir1, "", true, 1000, func(entry *filer.Entry) bool {
+ count++
+ return true
+ })
+ if count != 0 {
+ t.Errorf("Expected all entries to be deleted, found %d", count)
+ }
+ })
+
+ // Test 2: DeleteFolderChildren with transaction context - uses its own batched transactions
+ t.Run("WithTransactionContext", func(t *testing.T) {
+ testDir2 := util.FullPath(fmt.Sprintf("/test_batch_2_%d", time.Now().UnixNano()))
+
+ // Create entries
+ for i := 0; i < NUM_ENTRIES; i++ {
+ entry := &filer.Entry{
+ FullPath: util.NewFullPath(string(testDir2), fmt.Sprintf("file_%04d.txt", i)),
+ Attr: filer.Attr{
+ Mode: 0644,
+ Uid: 1000,
+ Gid: 1000,
+ Mtime: time.Now(),
+ },
+ }
+ store.InsertEntry(ctx, entry)
+ }
+
+ // Start a transaction (DeleteFolderChildren will ignore it and use its own batching)
+ txCtx, err := store.BeginTransaction(ctx)
+ if err != nil {
+ t.Fatalf("BeginTransaction failed: %v", err)
+ }
+
+ // Delete large directory - should succeed with batching
+ err = store.DeleteFolderChildren(txCtx, testDir2)
+ if err != nil {
+ t.Errorf("DeleteFolderChildren should succeed with batching even when transaction context present, got: %v", err)
+ }
+
+ // Rollback transaction (DeleteFolderChildren used its own transactions, so this doesn't affect deletions)
+ store.RollbackTransaction(txCtx)
+
+ // Verify entries are still deleted (because DeleteFolderChildren managed its own transactions)
+ var count int
+ store.ListDirectoryEntries(ctx, testDir2, "", true, 1000, func(entry *filer.Entry) bool {
+ count++
+ return true
+ })
+
+ if count != 0 {
+ t.Errorf("Expected all entries to be deleted, found %d (DeleteFolderChildren uses its own transactions)", count)
+ }
+ })
+
+ // Test 3: Nested directories with batching
+ t.Run("NestedDirectories", func(t *testing.T) {
+ testDir3 := util.FullPath(fmt.Sprintf("/test_batch_3_%d", time.Now().UnixNano()))
+
+ // Create nested structure
+ for i := 0; i < 50; i++ {
+ // Files in root
+ entry := &filer.Entry{
+ FullPath: util.NewFullPath(string(testDir3), fmt.Sprintf("file_%02d.txt", i)),
+ Attr: filer.Attr{
+ Mode: 0644,
+ Uid: 1000,
+ Gid: 1000,
+ Mtime: time.Now(),
+ },
+ }
+ store.InsertEntry(ctx, entry)
+
+ // Subdirectory
+ subDir := &filer.Entry{
+ FullPath: util.NewFullPath(string(testDir3), fmt.Sprintf("dir_%02d", i)),
+ Attr: filer.Attr{
+ Mode: 0755 | os.ModeDir,
+ Uid: 1000,
+ Gid: 1000,
+ Mtime: time.Now(),
+ },
+ }
+ store.InsertEntry(ctx, subDir)
+
+ // Files in subdirectory
+ for j := 0; j < 3; j++ {
+ subEntry := &filer.Entry{
+ FullPath: util.NewFullPath(string(testDir3)+"/"+fmt.Sprintf("dir_%02d", i), fmt.Sprintf("subfile_%02d.txt", j)),
+ Attr: filer.Attr{
+ Mode: 0644,
+ Uid: 1000,
+ Gid: 1000,
+ Mtime: time.Now(),
+ },
+ }
+ store.InsertEntry(ctx, subEntry)
+ }
+ }
+
+ // Delete all with batching
+ err := store.DeleteFolderChildren(ctx, testDir3)
+ if err != nil {
+ t.Errorf("DeleteFolderChildren should handle nested directories, got: %v", err)
+ }
+
+ // Verify all deleted
+ var count int
+ store.ListDirectoryEntries(ctx, testDir3, "", true, 1000, func(entry *filer.Entry) bool {
+ count++
+ return true
+ })
+ if count != 0 {
+ t.Errorf("Expected all nested entries to be deleted, found %d", count)
+ }
+ })
+
+ // Cleanup
+ store.DeleteFolderChildren(ctx, testDir)
+}
diff --git a/weed/server/filer_server.go b/weed/server/filer_server.go
index f395f6d60..79fb90742 100644
--- a/weed/server/filer_server.go
+++ b/weed/server/filer_server.go
@@ -28,6 +28,7 @@ import (
_ "github.com/seaweedfs/seaweedfs/weed/filer/cassandra2"
_ "github.com/seaweedfs/seaweedfs/weed/filer/elastic/v7"
_ "github.com/seaweedfs/seaweedfs/weed/filer/etcd"
+ _ "github.com/seaweedfs/seaweedfs/weed/filer/foundationdb"
_ "github.com/seaweedfs/seaweedfs/weed/filer/hbase"
_ "github.com/seaweedfs/seaweedfs/weed/filer/leveldb"
_ "github.com/seaweedfs/seaweedfs/weed/filer/leveldb2"