#cuda #gpu #maiden

maidenx

CUDA implementation for Maiden Engine

3 releases

new 0.0.5 Oct 30, 2024
0.0.4 Oct 28, 2024
0.0.3 Oct 27, 2024

#139 in Science

Download history 50/week @ 2024-10-21 308/week @ 2024-10-28

358 downloads per month

MIT/Apache

18KB

MaidenX

Rust-based CUDA library designed for learning purposes and building my AI engines named Maiden Engine

License Crates.io

TODOS

[!NOTE]
This project is structured to resemble the PyTorch framework where possible, to aid in familiarization and learning.

[!WARNING] 🚧 This project is for personal learning and testing purposes, so it may not function properly. 🚧

Getting Started

Prerequisites

  • CUDA Toolkit
  • CMake

How to use

Using PyTorch Using MaidenX
Creation torch.Tensor([[1, 2], [3, 4]]) Tensor::new(vec![vec![1.0, 2.0], vec![3.0, 4.0]])
Creation torch.Tensor([[1, 2], [3, 4]]) Tensor::from_vec(vec![1.0, 2.0, 3.0, 4.0], &[2, 2])
Creation nn.Sequential(A, B, C) nn::ModuleBuilder::new().add_layer(A).add_layer(B).add_layer(C)

Example

[dependencies]
maidenx = { version = "0.0.5", features = ["full"] }
# only cpu
# maidenx = { version = "0.0.5" }
# only cuda, but cpu is default
# maidenx = {version = "0.0.5", features = ["cuda"]}

How to use Tensor:

use maidenx::prelude::*;

fn main() -> Result<(), Box<dyn std::error::Error>> {
    let tensor1 = Tensor::new(vec![
        vec![1.0, 2.0, 3.0],
        vec![4.0, 5.0, 6.0],
    ])?;

    let tensor2 = Tensor::new(vec![
        vec![7.0, 8.0, 9.0],
        vec![10.0, 11.0, 12.0],
    ])?;

    let result = tensor1.add(&tensor2)?;
    
    println!("Shape: {:?}", result.shape());
    println!("Result:\n{}", result);

    Ok(())
}

How to use linear module:

use maidenx::prelude::*;

fn main() -> Result<(), Box<dyn std::error::Error>> {
    let input = Tensor::from_vec(vec![1.0, 2.0, 3.0, 4.0], &[2, 2])?;

    let linear = nn::Linear::new_with_bias(2, 3, true)?;

    let linear_output = linear.forward(&input)?;

    let relu = nn::ReLU::new();
    let relu_output = relu.forward(&linear_output)?;

    let sigmoid = nn::Sigmoid::new();
    let sigmoid_output = sigmoid.forward(&relu_output)?;

    let tanh = nn::Tanh::new();
    let tanh_output = tanh.forward(&sigmoid_output)?;

    println!("Input:\n{}", input);
    println!("Linear output:\n{}", linear_output);
    println!("ReLU output:\n{}", relu_output);
    println!("Sigmoid output:\n{}", sigmoid_output);
    println!("Tanh output:\n{}", tanh_output);

    Ok(())
}

How to select device: default: cpu

fn main() -> Result<(), Box<dyn std::error::Error>> {
    // Set CUDA if present on startup
    if let Ok(cuda_device) = Device::cuda(0) {
        set_current_device(cuda_device)?;
    }
    // all subsequent operations will use the set device
    ...
    Ok(())
}

How to use DeviceGuard:

use maidenx::prelude::*;

fn main() -> Result<(), Box<dyn std::error::Error>> {
   // CPU
   {
       let _guard = DeviceGuard::new(Device::cpu())?;
       
       let a = Tensor::new(vec![1.0, 2.0, 3.0])?;
       let b = Tensor::new(vec![4.0, 5.0, 6.0])?;
       let c = a.add(&b)?;
       println!("CPU Result: {}", c);
   } // CPU guard drops here

   // CUDA 
   if let Ok(cuda_device) = Device::cuda(0) {
       let _guard = DeviceGuard::new(cuda_device)?;
       
       let x = Tensor::new(vec![1.0, 2.0, 3.0])?;
       let y = Tensor::new(vec![4.0, 5.0, 6.0])?;
       let z = x.add(&y)?;
       println!("CUDA Result: {}", z);
   } // CUDA guard drops here

   Ok(())
}

For more examples, see examples.

Development Setup

Prerequisites

  • Rust
  • CUDA Toolkit
  • CMake
  • clangd

LSP Setup

For IDE support with CUDA files, create .clangd file in crates/maidenx_cuda_kernels/:

CompileFlags:
  Remove: 
    - "-forward-unknown-to-host-compiler"
    - "-rdc=*"
    - "-Xcompiler*"
    - "--options-file"
    - "--generate-code*"
  Add: 
    - "-xcuda"
    - "-std=c++14"
    - "-I/YOUR/CUDA/PATH/include"    # Update this path
    - "-I../../cuda-headers"
    - "--cuda-gpu-arch=sm_75"
  Compiler: clang

Index:
  Background: Build

Diagnostics:
  UnusedIncludes: None

Find your CUDA include path:

# Linux
which nvcc | sed 's/\/bin\/nvcc//'

# Windows (PowerShell)
(Get-Command nvcc).Path -replace '\\bin\\nvcc.exe',''

Dependencies

~4MB
~79K SLoC