7 files changed, 119 insertions, 0 deletions
diff --git a/weed/command/mount.go b/weed/command/mount.go
index 98f139c6f..ba0639bc1 100644
--- a/weed/command/mount.go
+++ b/weed/command/mount.go
@@ -43,6 +43,13 @@ type MountOptions struct {
 	rdmaReadOnly      *bool
 	rdmaMaxConcurrent *int
 	rdmaTimeoutMs     *int
+	
+	// ML optimization options
+	mlOptimizationEnabled *bool
+	mlPrefetchWorkers     *int
+	mlConfidenceThreshold *float64
+	mlMaxPrefetchAhead    *int
+	mlBatchSize           *int
 }
 
 var (
@@ -90,6 +97,13 @@ func init() {
 	mountOptions.rdmaReadOnly = cmdMount.Flag.Bool("rdma.readOnly", false, "use RDMA for reads only (writes use HTTP)")
 	mountOptions.rdmaMaxConcurrent = cmdMount.Flag.Int("rdma.maxConcurrent", 64, "max concurrent RDMA operations")
 	mountOptions.rdmaTimeoutMs = cmdMount.Flag.Int("rdma.timeoutMs", 5000, "RDMA operation timeout in milliseconds")
+	
+	// ML optimization flags
+	mountOptions.mlOptimizationEnabled = cmdMount.Flag.Bool("ml.enabled", false, "enable ML-aware optimizations for machine learning workloads")
+	mountOptions.mlPrefetchWorkers = cmdMount.Flag.Int("ml.prefetchWorkers", 8, "number of prefetch worker threads for ML workloads")
+	mountOptions.mlConfidenceThreshold = cmdMount.Flag.Float64("ml.confidenceThreshold", 0.6, "minimum confidence threshold to trigger ML prefetch")
+	mountOptions.mlMaxPrefetchAhead = cmdMount.Flag.Int("ml.maxPrefetchAhead", 8, "maximum number of chunks to prefetch ahead")
+	mountOptions.mlBatchSize = cmdMount.Flag.Int("ml.batchSize", 3, "batch size for ML prefetch operations")
 
 	mountCpuProfile = cmdMount.Flag.String("cpuprofile", "", "cpu profile output file")
 	mountMemProfile = cmdMount.Flag.String("memprofile", "", "memory profile output file")
@@ -124,5 +138,17 @@ var cmdMount = &Command{
     -rdma.maxConcurrent=64       Max concurrent RDMA operations
     -rdma.timeoutMs=5000         RDMA operation timeout in milliseconds
 
+  ML Optimization:
+  For machine learning workloads, enable intelligent prefetching and caching:
+    weed mount -filer=localhost:8888 -dir=/mnt/seaweedfs \
+      -ml.enabled=true
+
+  ML Options:
+    -ml.enabled=false            Enable ML-aware optimizations
+    -ml.prefetchWorkers=8        Number of concurrent prefetch workers
+    -ml.confidenceThreshold=0.6  Minimum confidence to trigger ML prefetch
+    -ml.maxPrefetchAhead=8       Maximum chunks to prefetch ahead
+    -ml.batchSize=3             Batch size for prefetch operations
+
   `,
 }
diff --git a/weed/command/mount_std.go b/weed/command/mount_std.go
index 53b09589d..afc00e781 100644
--- a/weed/command/mount_std.go
+++ b/weed/command/mount_std.go
@@ -260,6 +260,12 @@ func RunMount(option *MountOptions, umask os.FileMode) bool {
 		RdmaReadOnly:      *option.rdmaReadOnly,
 		RdmaMaxConcurrent: *option.rdmaMaxConcurrent,
 		RdmaTimeoutMs:     *option.rdmaTimeoutMs,
+		// ML optimization options
+		MLOptimizationEnabled: *option.mlOptimizationEnabled,
+		MLPrefetchWorkers:     *option.mlPrefetchWorkers,
+		MLConfidenceThreshold: *option.mlConfidenceThreshold,
+		MLMaxPrefetchAhead:    *option.mlMaxPrefetchAhead,
+		MLBatchSize:           *option.mlBatchSize,
 	})
 
 	// create mount root
diff --git a/weed/mount/ml_integration.go b/weed/mount/ml_integration.go
index fefd23cd6..7dc18b00d 100644
--- a/weed/mount/ml_integration.go
+++ b/weed/mount/ml_integration.go
@@ -1,6 +1,8 @@
 package mount
 
 import (
+	"time"
+	
 	"github.com/hanwen/go-fuse/v2/fuse"
 	"github.com/seaweedfs/seaweedfs/weed/glog"
 	"github.com/seaweedfs/seaweedfs/weed/mount/ml"
@@ -35,6 +37,42 @@ func NewMLIntegrationManager(chunkCache chunk_cache.ChunkCache, lookupFn wdclien
 	return manager
 }
 
+// NewMLIntegrationManagerWithConfig creates a new ML integration manager with custom configuration
+func NewMLIntegrationManagerWithConfig(
+	chunkCache chunk_cache.ChunkCache,
+	lookupFn wdclient.LookupFileIdFunctionType,
+	prefetchWorkers int,
+	confidenceThreshold float64,
+	maxPrefetchAhead int,
+	batchSize int,
+) *MLIntegrationManager {
+	config := &ml.MLConfig{
+		PrefetchWorkers:     prefetchWorkers,
+		PrefetchQueueSize:   prefetchWorkers * 4, // 4x workers for queue depth
+		PrefetchTimeout:     30 * time.Second,
+		EnableMLHeuristics:  true,
+		SequentialThreshold: 5,
+		ConfidenceThreshold: confidenceThreshold,
+		MaxPrefetchAhead:    maxPrefetchAhead,
+		PrefetchBatchSize:   batchSize,
+	}
+	
+	mlOpt := ml.NewMLOptimization(config, chunkCache, lookupFn)
+
+	// Create FUSE integration
+	fuseInt := ml.NewFUSEMLIntegration(mlOpt)
+
+	manager := &MLIntegrationManager{
+		mlOptimization:  mlOpt,
+		fuseIntegration: fuseInt,
+		enabled:         true,
+	}
+
+	glog.V(1).Infof("ML integration manager initialized with custom config: workers=%d, confidence=%.2f, prefetchAhead=%d, batchSize=%d",
+		prefetchWorkers, confidenceThreshold, maxPrefetchAhead, batchSize)
+	return manager
+}
+
 // EnableMLOptimization enables or disables ML optimization
 func (mgr *MLIntegrationManager) EnableMLOptimization(enabled bool) {
 	mgr.enabled = enabled
diff --git a/weed/mount/weedfs.go b/weed/mount/weedfs.go
index 95864ef00..445136f96 100644
--- a/weed/mount/weedfs.go
+++ b/weed/mount/weedfs.go
@@ -70,6 +70,13 @@ type Option struct {
 	RdmaReadOnly      bool
 	RdmaMaxConcurrent int
 	RdmaTimeoutMs     int
+	
+	// ML optimization options
+	MLOptimizationEnabled bool
+	MLPrefetchWorkers     int
+	MLConfidenceThreshold float64
+	MLMaxPrefetchAhead    int
+	MLBatchSize           int
 
 	uniqueCacheDirForRead  string
 	uniqueCacheDirForWrite string
@@ -96,6 +103,7 @@ type WFS struct {
 	IsOverQuota          bool
 	fhLockTable          *util.LockTable[FileHandleId]
 	rdmaClient           *RDMAMountClient
+	mlIntegration        *MLIntegrationManager
 	FilerConf            *filer.FilerConf
 }
 
@@ -151,6 +159,9 @@ func NewSeaweedFileSystem(option *Option) *WFS {
 		if wfs.rdmaClient != nil {
 			wfs.rdmaClient.Close()
 		}
+		if wfs.mlIntegration != nil {
+			wfs.mlIntegration.Shutdown()
+		}
 	})
 
 	// Initialize RDMA client if enabled
@@ -169,6 +180,20 @@ func NewSeaweedFileSystem(option *Option) *WFS {
 				option.RdmaSidecarAddr, option.RdmaMaxConcurrent, option.RdmaTimeoutMs)
 		}
 	}
+	
+	// Initialize ML optimization if enabled
+	if option.MLOptimizationEnabled {
+		wfs.mlIntegration = NewMLIntegrationManagerWithConfig(
+			wfs.chunkCache,
+			wfs.LookupFn(),
+			option.MLPrefetchWorkers,
+			option.MLConfidenceThreshold,
+			option.MLMaxPrefetchAhead,
+			option.MLBatchSize,
+		)
+		glog.Infof("ML optimization enabled: prefetchWorkers=%d, confidenceThreshold=%.2f, maxPrefetchAhead=%d",
+			option.MLPrefetchWorkers, option.MLConfidenceThreshold, option.MLMaxPrefetchAhead)
+	}
 
 	if wfs.option.ConcurrentWriters > 0 {
 		wfs.concurrentWriters = util.NewLimitedConcurrentExecutor(wfs.option.ConcurrentWriters)
diff --git a/weed/mount/weedfs_attr.go b/weed/mount/weedfs_attr.go
index 0bd5771cd..ccdeb3c9f 100644
--- a/weed/mount/weedfs_attr.go
+++ b/weed/mount/weedfs_attr.go
@@ -22,6 +22,12 @@ func (wfs *WFS) GetAttr(cancel <-chan struct{}, input *fuse.GetAttrIn, out *fuse
 	_, _, entry, status := wfs.maybeReadEntry(inode)
 	if status == fuse.OK {
 		out.AttrValid = 1
+		
+		// Apply ML-specific attribute cache optimizations if enabled
+		if wfs.mlIntegration != nil {
+			wfs.mlIntegration.OptimizeAttributes(inode, out)
+		}
+		
 		wfs.setAttrByPbEntry(&out.Attr, inode, entry, true)
 		return status
 	} else {
diff --git a/weed/mount/weedfs_file_io.go b/weed/mount/weedfs_file_io.go
index 04fe7f21c..add20caea 100644
--- a/weed/mount/weedfs_file_io.go
+++ b/weed/mount/weedfs_file_io.go
@@ -67,6 +67,14 @@ func (wfs *WFS) Open(cancel <-chan struct{}, in *fuse.OpenIn, out *fuse.OpenOut)
 	if status == fuse.OK {
 		out.Fh = uint64(fileHandle.fh)
 		out.OpenFlags = in.Flags
+		
+		// Apply ML optimizations if enabled
+		if wfs.mlIntegration != nil {
+			if path, _, entry, pathStatus := wfs.maybeReadEntry(in.NodeId); pathStatus == fuse.OK {
+				wfs.mlIntegration.OnFileOpen(in.NodeId, entry, string(path), in.Flags, out)
+			}
+		}
+		
 		if wfs.option.IsMacOs {
 			// remove the direct_io flag, as it is not well-supported on macOS
 			// https://code.google.com/archive/p/macfuse/wikis/OPTIONS.wiki recommended to avoid the direct_io flag
@@ -106,5 +114,10 @@ func (wfs *WFS) Open(cancel <-chan struct{}, in *fuse.OpenIn, out *fuse.OpenOut)
  * @param fi file information
  */
 func (wfs *WFS) Release(cancel <-chan struct{}, in *fuse.ReleaseIn) {
+	// Notify ML integration of file close
+	if wfs.mlIntegration != nil {
+		wfs.mlIntegration.OnFileClose(in.NodeId)
+	}
+	
 	wfs.ReleaseHandle(FileHandleId(in.Fh))
 }
diff --git a/weed/mount/weedfs_file_read.go b/weed/mount/weedfs_file_read.go
index dc79d3dc7..b1cf2d948 100644
--- a/weed/mount/weedfs_file_read.go
+++ b/weed/mount/weedfs_file_read.go
@@ -62,6 +62,11 @@ func (wfs *WFS) Read(cancel <-chan struct{}, in *fuse.ReadIn, buff []byte) (fuse
 		glog.Warningf("file handle read %s %d: %v", fh.FullPath(), totalRead, err)
 		return nil, fuse.EIO
 	}
+	
+	// Notify ML integration of file read for pattern detection
+	if wfs.mlIntegration != nil && totalRead > 0 {
+		wfs.mlIntegration.OnFileRead(in.NodeId, offset, int(totalRead))
+	}
 
 	if IsDebugFileReadWrite {
 		// print(".")