blob: 55c8f3b80541c4af0e4528872d712449b8b015b9 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
|
#!/bin/bash
# Setup Soft-RoCE (RXE) for RDMA simulation
# This script enables RDMA over Ethernet using the RXE kernel module
set -e
echo "๐ง Setting up Soft-RoCE (RXE) RDMA simulation..."
# Function to check if running with required privileges
check_privileges() {
if [ "$EUID" -ne 0 ]; then
echo "โ This script requires root privileges"
echo "Run with: sudo $0 or inside a privileged container"
exit 1
fi
}
# Function to load RXE kernel module
load_rxe_module() {
echo "๐ฆ Loading RXE kernel module..."
# Try to load the rdma_rxe module
if modprobe rdma_rxe 2>/dev/null; then
echo "โ
rdma_rxe module loaded successfully"
else
echo "โ ๏ธ Failed to load rdma_rxe module, trying alternative approach..."
# Alternative: Try loading rxe_net (older kernels)
if modprobe rxe_net 2>/dev/null; then
echo "โ
rxe_net module loaded successfully"
else
echo "โ Failed to load RXE modules. Possible causes:"
echo " - Kernel doesn't support RXE (needs CONFIG_RDMA_RXE=m)"
echo " - Running in unprivileged container"
echo " - Missing kernel modules"
echo ""
echo "๐ง Workaround: Run container with --privileged flag"
exit 1
fi
fi
# Verify module is loaded
if lsmod | grep -q "rdma_rxe\|rxe_net"; then
echo "โ
RXE module verification successful"
else
echo "โ RXE module verification failed"
exit 1
fi
}
# Function to setup virtual RDMA device
setup_rxe_device() {
echo "๐ Setting up RXE device over Ethernet interface..."
# Find available network interface (prefer eth0, fallback to others)
local interface=""
for iface in eth0 enp0s3 enp0s8 lo; do
if ip link show "$iface" >/dev/null 2>&1; then
interface="$iface"
break
fi
done
if [ -z "$interface" ]; then
echo "โ No suitable network interface found"
echo "Available interfaces:"
ip link show | grep "^[0-9]" | cut -d':' -f2 | tr -d ' '
exit 1
fi
echo "๐ก Using network interface: $interface"
# Create RXE device
echo "๐จ Creating RXE device on $interface..."
# Try modern rxe_cfg approach first
if command -v rxe_cfg >/dev/null 2>&1; then
rxe_cfg add "$interface" || {
echo "โ ๏ธ rxe_cfg failed, trying manual approach..."
setup_rxe_manual "$interface"
}
else
echo "โ ๏ธ rxe_cfg not available, using manual setup..."
setup_rxe_manual "$interface"
fi
}
# Function to manually setup RXE device
setup_rxe_manual() {
local interface="$1"
# Use sysfs interface to create RXE device
if [ -d /sys/module/rdma_rxe ]; then
echo "$interface" > /sys/module/rdma_rxe/parameters/add 2>/dev/null || {
echo "โ Failed to add RXE device via sysfs"
exit 1
}
else
echo "โ RXE sysfs interface not found"
exit 1
fi
}
# Function to verify RDMA devices
verify_rdma_devices() {
echo "๐ Verifying RDMA devices..."
# Check for RDMA devices
if [ -d /sys/class/infiniband ]; then
local devices=$(ls /sys/class/infiniband/ 2>/dev/null | wc -l)
if [ "$devices" -gt 0 ]; then
echo "โ
Found $devices RDMA device(s):"
ls /sys/class/infiniband/
# Show device details
for device in /sys/class/infiniband/*; do
if [ -d "$device" ]; then
local dev_name=$(basename "$device")
echo " ๐ Device: $dev_name"
# Try to get device info
if command -v ibv_devinfo >/dev/null 2>&1; then
ibv_devinfo -d "$dev_name" | head -10
fi
fi
done
else
echo "โ No RDMA devices found in /sys/class/infiniband/"
exit 1
fi
else
echo "โ /sys/class/infiniband directory not found"
exit 1
fi
}
# Function to test basic RDMA functionality
test_basic_rdma() {
echo "๐งช Testing basic RDMA functionality..."
# Test libibverbs
if command -v ibv_devinfo >/dev/null 2>&1; then
echo "๐ RDMA device information:"
ibv_devinfo | head -20
else
echo "โ ๏ธ ibv_devinfo not available"
fi
# Test UCX if available
if command -v ucx_info >/dev/null 2>&1; then
echo "๐ UCX information:"
ucx_info -d | head -10
else
echo "โ ๏ธ UCX tools not available"
fi
}
# Main execution
main() {
echo "๐ Starting Soft-RoCE RDMA simulation setup..."
echo "======================================"
check_privileges
load_rxe_module
setup_rxe_device
verify_rdma_devices
test_basic_rdma
echo ""
echo "๐ Soft-RoCE setup completed successfully!"
echo "======================================"
echo "โ
RDMA simulation is ready for testing"
echo "๐ก You can now run RDMA applications"
echo ""
echo "Next steps:"
echo " - Test with: /opt/rdma-sim/test-rdma.sh"
echo " - Check UCX: /opt/rdma-sim/ucx-info.sh"
echo " - Run your RDMA applications"
}
# Execute main function
main "$@"
|