diff options
Diffstat (limited to 'seaweedfs-rdma-sidecar/docker/scripts')
| -rwxr-xr-x | seaweedfs-rdma-sidecar/docker/scripts/setup-soft-roce.sh | 183 | ||||
| -rwxr-xr-x | seaweedfs-rdma-sidecar/docker/scripts/test-rdma.sh | 253 | ||||
| -rwxr-xr-x | seaweedfs-rdma-sidecar/docker/scripts/ucx-info.sh | 269 |
3 files changed, 705 insertions, 0 deletions
diff --git a/seaweedfs-rdma-sidecar/docker/scripts/setup-soft-roce.sh b/seaweedfs-rdma-sidecar/docker/scripts/setup-soft-roce.sh new file mode 100755 index 000000000..55c8f3b80 --- /dev/null +++ b/seaweedfs-rdma-sidecar/docker/scripts/setup-soft-roce.sh @@ -0,0 +1,183 @@ +#!/bin/bash + +# Setup Soft-RoCE (RXE) for RDMA simulation +# This script enables RDMA over Ethernet using the RXE kernel module + +set -e + +echo "๐ง Setting up Soft-RoCE (RXE) RDMA simulation..." + +# Function to check if running with required privileges +check_privileges() { + if [ "$EUID" -ne 0 ]; then + echo "โ This script requires root privileges" + echo "Run with: sudo $0 or inside a privileged container" + exit 1 + fi +} + +# Function to load RXE kernel module +load_rxe_module() { + echo "๐ฆ Loading RXE kernel module..." + + # Try to load the rdma_rxe module + if modprobe rdma_rxe 2>/dev/null; then + echo "โ
rdma_rxe module loaded successfully" + else + echo "โ ๏ธ Failed to load rdma_rxe module, trying alternative approach..." + + # Alternative: Try loading rxe_net (older kernels) + if modprobe rxe_net 2>/dev/null; then + echo "โ
rxe_net module loaded successfully" + else + echo "โ Failed to load RXE modules. Possible causes:" + echo " - Kernel doesn't support RXE (needs CONFIG_RDMA_RXE=m)" + echo " - Running in unprivileged container" + echo " - Missing kernel modules" + echo "" + echo "๐ง Workaround: Run container with --privileged flag" + exit 1 + fi + fi + + # Verify module is loaded + if lsmod | grep -q "rdma_rxe\|rxe_net"; then + echo "โ
RXE module verification successful" + else + echo "โ RXE module verification failed" + exit 1 + fi +} + +# Function to setup virtual RDMA device +setup_rxe_device() { + echo "๐ Setting up RXE device over Ethernet interface..." + + # Find available network interface (prefer eth0, fallback to others) + local interface="" + for iface in eth0 enp0s3 enp0s8 lo; do + if ip link show "$iface" >/dev/null 2>&1; then + interface="$iface" + break + fi + done + + if [ -z "$interface" ]; then + echo "โ No suitable network interface found" + echo "Available interfaces:" + ip link show | grep "^[0-9]" | cut -d':' -f2 | tr -d ' ' + exit 1 + fi + + echo "๐ก Using network interface: $interface" + + # Create RXE device + echo "๐จ Creating RXE device on $interface..." + + # Try modern rxe_cfg approach first + if command -v rxe_cfg >/dev/null 2>&1; then + rxe_cfg add "$interface" || { + echo "โ ๏ธ rxe_cfg failed, trying manual approach..." + setup_rxe_manual "$interface" + } + else + echo "โ ๏ธ rxe_cfg not available, using manual setup..." + setup_rxe_manual "$interface" + fi +} + +# Function to manually setup RXE device +setup_rxe_manual() { + local interface="$1" + + # Use sysfs interface to create RXE device + if [ -d /sys/module/rdma_rxe ]; then + echo "$interface" > /sys/module/rdma_rxe/parameters/add 2>/dev/null || { + echo "โ Failed to add RXE device via sysfs" + exit 1 + } + else + echo "โ RXE sysfs interface not found" + exit 1 + fi +} + +# Function to verify RDMA devices +verify_rdma_devices() { + echo "๐ Verifying RDMA devices..." + + # Check for RDMA devices + if [ -d /sys/class/infiniband ]; then + local devices=$(ls /sys/class/infiniband/ 2>/dev/null | wc -l) + if [ "$devices" -gt 0 ]; then + echo "โ
Found $devices RDMA device(s):" + ls /sys/class/infiniband/ + + # Show device details + for device in /sys/class/infiniband/*; do + if [ -d "$device" ]; then + local dev_name=$(basename "$device") + echo " ๐ Device: $dev_name" + + # Try to get device info + if command -v ibv_devinfo >/dev/null 2>&1; then + ibv_devinfo -d "$dev_name" | head -10 + fi + fi + done + else + echo "โ No RDMA devices found in /sys/class/infiniband/" + exit 1 + fi + else + echo "โ /sys/class/infiniband directory not found" + exit 1 + fi +} + +# Function to test basic RDMA functionality +test_basic_rdma() { + echo "๐งช Testing basic RDMA functionality..." + + # Test libibverbs + if command -v ibv_devinfo >/dev/null 2>&1; then + echo "๐ RDMA device information:" + ibv_devinfo | head -20 + else + echo "โ ๏ธ ibv_devinfo not available" + fi + + # Test UCX if available + if command -v ucx_info >/dev/null 2>&1; then + echo "๐ UCX information:" + ucx_info -d | head -10 + else + echo "โ ๏ธ UCX tools not available" + fi +} + +# Main execution +main() { + echo "๐ Starting Soft-RoCE RDMA simulation setup..." + echo "======================================" + + check_privileges + load_rxe_module + setup_rxe_device + verify_rdma_devices + test_basic_rdma + + echo "" + echo "๐ Soft-RoCE setup completed successfully!" + echo "======================================" + echo "โ
RDMA simulation is ready for testing" + echo "๐ก You can now run RDMA applications" + echo "" + echo "Next steps:" + echo " - Test with: /opt/rdma-sim/test-rdma.sh" + echo " - Check UCX: /opt/rdma-sim/ucx-info.sh" + echo " - Run your RDMA applications" +} + +# Execute main function +main "$@" diff --git a/seaweedfs-rdma-sidecar/docker/scripts/test-rdma.sh b/seaweedfs-rdma-sidecar/docker/scripts/test-rdma.sh new file mode 100755 index 000000000..91e60ca7f --- /dev/null +++ b/seaweedfs-rdma-sidecar/docker/scripts/test-rdma.sh @@ -0,0 +1,253 @@ +#!/bin/bash + +# Test RDMA functionality in simulation environment +# This script validates that RDMA devices and libraries are working + +set -e + +echo "๐งช Testing RDMA simulation environment..." + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +# Function to print colored output +print_status() { + local status="$1" + local message="$2" + + case "$status" in + "success") + echo -e "${GREEN}โ
$message${NC}" + ;; + "warning") + echo -e "${YELLOW}โ ๏ธ $message${NC}" + ;; + "error") + echo -e "${RED}โ $message${NC}" + ;; + "info") + echo -e "${BLUE}๐ $message${NC}" + ;; + esac +} + +# Function to test RDMA devices +test_rdma_devices() { + print_status "info" "Testing RDMA devices..." + + # Check for InfiniBand/RDMA devices + if [ -d /sys/class/infiniband ]; then + local device_count=$(ls /sys/class/infiniband/ 2>/dev/null | wc -l) + if [ "$device_count" -gt 0 ]; then + print_status "success" "Found $device_count RDMA device(s)" + + # List devices + for device in /sys/class/infiniband/*; do + if [ -d "$device" ]; then + local dev_name=$(basename "$device") + print_status "info" "Device: $dev_name" + fi + done + return 0 + else + print_status "error" "No RDMA devices found" + return 1 + fi + else + print_status "error" "/sys/class/infiniband directory not found" + return 1 + fi +} + +# Function to test libibverbs +test_libibverbs() { + print_status "info" "Testing libibverbs..." + + if command -v ibv_devinfo >/dev/null 2>&1; then + # Get device info + local device_info=$(ibv_devinfo 2>/dev/null) + if [ -n "$device_info" ]; then + print_status "success" "libibverbs working - devices detected" + + # Show basic info + echo "$device_info" | head -5 + + # Test device capabilities + if echo "$device_info" | grep -q "transport.*InfiniBand\|transport.*Ethernet"; then + print_status "success" "RDMA transport layer detected" + else + print_status "warning" "Transport layer information unclear" + fi + + return 0 + else + print_status "error" "ibv_devinfo found no devices" + return 1 + fi + else + print_status "error" "ibv_devinfo command not found" + return 1 + fi +} + +# Function to test UCX +test_ucx() { + print_status "info" "Testing UCX..." + + if command -v ucx_info >/dev/null 2>&1; then + # Test UCX device detection + local ucx_output=$(ucx_info -d 2>/dev/null) + if [ -n "$ucx_output" ]; then + print_status "success" "UCX detecting devices" + + # Show UCX device info + echo "$ucx_output" | head -10 + + # Check for RDMA transports + if echo "$ucx_output" | grep -q "rc\|ud\|dc"; then + print_status "success" "UCX RDMA transports available" + else + print_status "warning" "UCX RDMA transports not detected" + fi + + return 0 + else + print_status "warning" "UCX not detecting devices" + return 1 + fi + else + print_status "warning" "UCX tools not available" + return 1 + fi +} + +# Function to test RDMA CM (Connection Manager) +test_rdma_cm() { + print_status "info" "Testing RDMA Connection Manager..." + + # Check for RDMA CM device + if [ -e /dev/infiniband/rdma_cm ]; then + print_status "success" "RDMA CM device found" + return 0 + else + print_status "warning" "RDMA CM device not found" + return 1 + fi +} + +# Function to test basic RDMA operations +test_rdma_operations() { + print_status "info" "Testing basic RDMA operations..." + + # Try to run a simple RDMA test if tools are available + if command -v ibv_rc_pingpong >/dev/null 2>&1; then + # This would need a client/server setup, so just check if binary exists + print_status "success" "RDMA test tools available (ibv_rc_pingpong)" + else + print_status "warning" "RDMA test tools not available" + fi + + # Check for other useful RDMA utilities + local tools_found=0 + for tool in ibv_asyncwatch ibv_read_lat ibv_write_lat; do + if command -v "$tool" >/dev/null 2>&1; then + tools_found=$((tools_found + 1)) + fi + done + + if [ "$tools_found" -gt 0 ]; then + print_status "success" "Found $tools_found additional RDMA test tools" + else + print_status "warning" "No additional RDMA test tools found" + fi +} + +# Function to generate test summary +generate_summary() { + echo "" + print_status "info" "RDMA Simulation Test Summary" + echo "======================================" + + # Re-run key tests for summary + local devices_ok=0 + local libibverbs_ok=0 + local ucx_ok=0 + + if [ -d /sys/class/infiniband ] && [ "$(ls /sys/class/infiniband/ 2>/dev/null | wc -l)" -gt 0 ]; then + devices_ok=1 + fi + + if command -v ibv_devinfo >/dev/null 2>&1 && ibv_devinfo >/dev/null 2>&1; then + libibverbs_ok=1 + fi + + if command -v ucx_info >/dev/null 2>&1 && ucx_info -d >/dev/null 2>&1; then + ucx_ok=1 + fi + + echo "๐ Test Results:" + [ "$devices_ok" -eq 1 ] && print_status "success" "RDMA Devices: PASS" || print_status "error" "RDMA Devices: FAIL" + [ "$libibverbs_ok" -eq 1 ] && print_status "success" "libibverbs: PASS" || print_status "error" "libibverbs: FAIL" + [ "$ucx_ok" -eq 1 ] && print_status "success" "UCX: PASS" || print_status "warning" "UCX: FAIL/WARNING" + + echo "" + if [ "$devices_ok" -eq 1 ] && [ "$libibverbs_ok" -eq 1 ]; then + print_status "success" "RDMA simulation environment is ready! ๐" + echo "" + print_status "info" "You can now:" + echo " - Run RDMA applications" + echo " - Test SeaweedFS RDMA engine with real RDMA" + echo " - Use UCX for high-performance transfers" + return 0 + else + print_status "error" "RDMA simulation setup needs attention" + echo "" + print_status "info" "Troubleshooting:" + echo " - Run setup script: sudo /opt/rdma-sim/setup-soft-roce.sh" + echo " - Check container privileges (--privileged flag)" + echo " - Verify kernel RDMA support" + return 1 + fi +} + +# Main test execution +main() { + echo "๐ RDMA Simulation Test Suite" + echo "======================================" + + # Run tests + test_rdma_devices || true + echo "" + + test_libibverbs || true + echo "" + + test_ucx || true + echo "" + + test_rdma_cm || true + echo "" + + test_rdma_operations || true + echo "" + + # Generate summary + generate_summary +} + +# Health check mode (for Docker healthcheck) +if [ "$1" = "healthcheck" ]; then + # Quick health check - just verify devices exist + if [ -d /sys/class/infiniband ] && [ "$(ls /sys/class/infiniband/ 2>/dev/null | wc -l)" -gt 0 ]; then + exit 0 + else + exit 1 + fi +fi + +# Execute main function +main "$@" diff --git a/seaweedfs-rdma-sidecar/docker/scripts/ucx-info.sh b/seaweedfs-rdma-sidecar/docker/scripts/ucx-info.sh new file mode 100755 index 000000000..9bf287c6e --- /dev/null +++ b/seaweedfs-rdma-sidecar/docker/scripts/ucx-info.sh @@ -0,0 +1,269 @@ +#!/bin/bash + +# UCX Information and Testing Script +# Provides detailed information about UCX configuration and capabilities + +set -e + +echo "๐ UCX (Unified Communication X) Information" +echo "=============================================" + +# Colors for output +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +print_section() { + echo -e "\n${BLUE}๐ $1${NC}" + echo "----------------------------------------" +} + +print_info() { + echo -e "${GREEN}โน๏ธ $1${NC}" +} + +print_warning() { + echo -e "${YELLOW}โ ๏ธ $1${NC}" +} + +# Function to check UCX installation +check_ucx_installation() { + print_section "UCX Installation Status" + + if command -v ucx_info >/dev/null 2>&1; then + print_info "UCX tools are installed" + + # Get UCX version + if ucx_info -v >/dev/null 2>&1; then + local version=$(ucx_info -v 2>/dev/null | head -1) + print_info "Version: $version" + fi + else + print_warning "UCX tools not found" + echo "Install with: apt-get install ucx-tools libucx-dev" + return 1 + fi + + # Check UCX libraries + local libs_found=0 + for lib in libucp.so libucs.so libuct.so; do + if ldconfig -p | grep -q "$lib"; then + libs_found=$((libs_found + 1)) + fi + done + + if [ "$libs_found" -eq 3 ]; then + print_info "All UCX libraries found (ucp, ucs, uct)" + else + print_warning "Some UCX libraries may be missing ($libs_found/3 found)" + fi +} + +# Function to show UCX device information +show_ucx_devices() { + print_section "UCX Transport Devices" + + if command -v ucx_info >/dev/null 2>&1; then + echo "Available UCX transports and devices:" + ucx_info -d 2>/dev/null || { + print_warning "Failed to get UCX device information" + return 1 + } + else + print_warning "ucx_info command not available" + return 1 + fi +} + +# Function to show UCX configuration +show_ucx_config() { + print_section "UCX Configuration" + + if command -v ucx_info >/dev/null 2>&1; then + echo "UCX configuration parameters:" + ucx_info -c 2>/dev/null | head -20 || { + print_warning "Failed to get UCX configuration" + return 1 + } + + echo "" + print_info "Key UCX environment variables:" + echo " UCX_TLS - Transport layers to use" + echo " UCX_NET_DEVICES - Network devices to use" + echo " UCX_LOG_LEVEL - Logging level (error, warn, info, debug, trace)" + echo " UCX_MEMTYPE_CACHE - Memory type caching (y/n)" + else + print_warning "ucx_info command not available" + return 1 + fi +} + +# Function to test UCX capabilities +test_ucx_capabilities() { + print_section "UCX Capability Testing" + + if command -v ucx_info >/dev/null 2>&1; then + print_info "Testing UCX transport capabilities..." + + # Check for RDMA transports + local ucx_transports=$(ucx_info -d 2>/dev/null | grep -i "transport\|tl:" || true) + + if echo "$ucx_transports" | grep -q "rc\|dc\|ud"; then + print_info "โ
RDMA transports detected (RC/DC/UD)" + else + print_warning "No RDMA transports detected" + fi + + if echo "$ucx_transports" | grep -q "tcp"; then + print_info "โ
TCP transport available" + else + print_warning "TCP transport not detected" + fi + + if echo "$ucx_transports" | grep -q "shm\|posix"; then + print_info "โ
Shared memory transport available" + else + print_warning "Shared memory transport not detected" + fi + + # Memory types + print_info "Testing memory type support..." + local memory_info=$(ucx_info -d 2>/dev/null | grep -i "memory\|md:" || true) + if [ -n "$memory_info" ]; then + echo "$memory_info" | head -5 + fi + + else + print_warning "Cannot test UCX capabilities - ucx_info not available" + return 1 + fi +} + +# Function to show recommended UCX settings for RDMA +show_rdma_settings() { + print_section "Recommended UCX Settings for RDMA" + + print_info "For optimal RDMA performance with SeaweedFS:" + echo "" + echo "Environment Variables:" + echo " export UCX_TLS=rc_verbs,ud_verbs,rc_mlx5_dv,dc_mlx5_dv" + echo " export UCX_NET_DEVICES=all" + echo " export UCX_LOG_LEVEL=info" + echo " export UCX_RNDV_SCHEME=put_zcopy" + echo " export UCX_RNDV_THRESH=8192" + echo "" + + print_info "For development/debugging:" + echo " export UCX_LOG_LEVEL=debug" + echo " export UCX_LOG_FILE=/tmp/ucx.log" + echo "" + + print_info "For Soft-RoCE (RXE) specifically:" + echo " export UCX_TLS=rc_verbs,ud_verbs" + echo " export UCX_IB_DEVICE_SPECS=rxe0:1" + echo "" +} + +# Function to test basic UCX functionality +test_ucx_basic() { + print_section "Basic UCX Functionality Test" + + if command -v ucx_hello_world >/dev/null 2>&1; then + print_info "UCX hello_world test available" + echo "You can test UCX with:" + echo " Server: UCX_TLS=tcp ucx_hello_world -l" + echo " Client: UCX_TLS=tcp ucx_hello_world <server_ip>" + else + print_warning "UCX hello_world test not available" + fi + + # Check for other UCX test utilities + local test_tools=0 + for tool in ucx_perftest ucp_hello_world; do + if command -v "$tool" >/dev/null 2>&1; then + test_tools=$((test_tools + 1)) + print_info "UCX test tool available: $tool" + fi + done + + if [ "$test_tools" -eq 0 ]; then + print_warning "No UCX test tools found" + echo "Consider installing: ucx-tools package" + fi +} + +# Function to generate UCX summary +generate_summary() { + print_section "UCX Status Summary" + + local ucx_ok=0 + local devices_ok=0 + local rdma_ok=0 + + # Check UCX availability + if command -v ucx_info >/dev/null 2>&1; then + ucx_ok=1 + fi + + # Check devices + if command -v ucx_info >/dev/null 2>&1 && ucx_info -d >/dev/null 2>&1; then + devices_ok=1 + + # Check for RDMA + if ucx_info -d 2>/dev/null | grep -q "rc\|dc\|ud"; then + rdma_ok=1 + fi + fi + + echo "๐ UCX Status:" + [ "$ucx_ok" -eq 1 ] && print_info "โ
UCX Installation: OK" || print_warning "โ UCX Installation: Missing" + [ "$devices_ok" -eq 1 ] && print_info "โ
UCX Devices: Detected" || print_warning "โ UCX Devices: Not detected" + [ "$rdma_ok" -eq 1 ] && print_info "โ
RDMA Support: Available" || print_warning "โ ๏ธ RDMA Support: Limited/Missing" + + echo "" + if [ "$ucx_ok" -eq 1 ] && [ "$devices_ok" -eq 1 ]; then + print_info "๐ UCX is ready for SeaweedFS RDMA integration!" + + if [ "$rdma_ok" -eq 1 ]; then + print_info "๐ Real RDMA acceleration is available" + else + print_warning "๐ก Only TCP/shared memory transports available" + fi + else + print_warning "๐ง UCX setup needs attention for optimal performance" + fi +} + +# Main execution +main() { + check_ucx_installation + echo "" + + show_ucx_devices + echo "" + + show_ucx_config + echo "" + + test_ucx_capabilities + echo "" + + show_rdma_settings + echo "" + + test_ucx_basic + echo "" + + generate_summary + + echo "" + print_info "For SeaweedFS RDMA engine integration:" + echo " 1. Use UCX with your Rust engine" + echo " 2. Configure appropriate transport layers" + echo " 3. Test with SeaweedFS RDMA sidecar" + echo " 4. Monitor performance and adjust settings" +} + +# Execute main function +main "$@" |
