| # GPU Configuration | |
| # Docking@HOME - CUDA/CUDPP Settings | |
| # === GPU Selection === | |
| use_gpu = true # Enable GPU acceleration | |
| gpu_device_id = 0 # GPU device ID to use (-1 = auto-detect best) | |
| use_multiple_gpus = false # Use multiple GPUs if available | |
| gpu_ids = [0] # List of GPU IDs to use (if use_multiple_gpus = true) | |
| # === CUDA Settings === | |
| cuda_device_name = "auto" # CUDA device name (auto = auto-detect) | |
| cuda_compute_capability = "auto" # Compute capability (auto, 5.0, 6.1, 7.0, 7.5, 8.0, 8.6, 9.0) | |
| cuda_threads_per_block = 256 # Threads per block (64, 128, 256, 512, 1024) | |
| cuda_blocks_per_grid = 128 # Blocks per grid (32, 64, 128, 256) | |
| # === Memory Settings === | |
| gpu_memory_limit_mb = 0 # GPU memory limit in MB (0 = auto, use available) | |
| host_memory_pinned = true # Use pinned host memory for faster transfers | |
| cache_maps_on_gpu = true # Cache grid maps on GPU memory | |
| # === Performance Tuning === | |
| # AutoDock-GPU specific settings | |
| energy_eval_per_gpu_call = 1024 # Energy evaluations per GPU kernel call | |
| ls_method = "sw" # Local search method: sw (Solis-Wets), sd (Steepest Descent), fire | |
| population_size = 150 # Population size for genetic algorithm | |
| num_generations = 27000 # Number of generations | |
| # === CUDPP Settings === | |
| use_cudpp = true # Use CUDPP for GPU primitives | |
| cudpp_sort_algorithm = "radix" # Sort algorithm: radix, merge, quick | |
| cudpp_scan_algorithm = "efficient" # Scan algorithm: efficient, work-efficient | |
| # === Optimization Flags === | |
| optimize_for_speed = true # Optimize for speed vs accuracy | |
| use_fast_math = true # Use fast math operations (less precise) | |
| use_texture_memory = true # Use texture memory for grid maps | |
| async_execution = true # Asynchronous kernel execution | |
| # === Multi-GPU Load Balancing === | |
| load_balance_strategy = "dynamic" # static, dynamic, round-robin | |
| tasks_per_gpu_min = 10 # Minimum tasks per GPU | |
| tasks_per_gpu_max = 100 # Maximum tasks per GPU | |
| # === Error Handling === | |
| retry_on_gpu_error = true # Retry on GPU errors | |
| max_gpu_retries = 3 # Maximum retry attempts | |
| fallback_to_cpu = true # Fallback to CPU on GPU failure | |
| # === Thermal Management === | |
| enable_thermal_monitoring = true # Monitor GPU temperature | |
| max_gpu_temperature = 85 # Maximum GPU temperature (°C) | |
| throttle_at_temperature = 80 # Start throttling at this temperature (°C) | |
| shutdown_at_temperature = 90 # Emergency shutdown temperature (°C) | |
| check_temperature_interval = 10 # Seconds between temperature checks | |
| # === Power Management === | |
| gpu_power_limit_watts = 0 # Power limit in watts (0 = default) | |
| enable_power_monitoring = true # Monitor power consumption | |
| # === Debugging === | |
| verbose_gpu_output = false # Enable verbose GPU output | |
| profile_gpu_kernels = false # Profile GPU kernel execution times | |
| save_gpu_debug_info = false # Save debug information | |
| cuda_error_checking = true # Enable CUDA error checking (slower) | |
| # === Compatibility === | |
| force_cpu_mode = false # Force CPU mode even if GPU available | |
| gpu_driver_version_min = "450.0" # Minimum GPU driver version | |
| cuda_runtime_version_min = "11.0" # Minimum CUDA runtime version | |
| # === Specific GPU Optimizations === | |
| # NVIDIA RTX 30xx Series | |
| rtx30xx_optimized = false | |
| # NVIDIA RTX 40xx Series | |
| rtx40xx_optimized = false | |
| # AMD RDNA2/3 | |
| amd_rdna_optimized = false | |
| # === Benchmark Settings === | |
| run_benchmark_on_startup = false # Run benchmark on startup | |
| benchmark_duration_seconds = 30 # Benchmark duration | |
| save_benchmark_results = true # Save benchmark results | |
| # === Advanced CUDA Settings === | |
| cuda_stream_count = 2 # Number of CUDA streams | |
| cuda_graph_enabled = false # Use CUDA graphs (CUDA 10+) | |
| cooperative_groups = false # Use cooperative groups | |
| unified_memory = false # Use CUDA unified memory | |