Add Windows Kernel for PyTorch 2.9 + CUDA13 (#5)

Files changed (4) hide show

.gitattributes CHANGED Viewed

@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 *.so filter=lfs diff=lfs merge=lfs -text

 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 *.so filter=lfs diff=lfs merge=lfs -text
+build/torch29-cu130-x86_64-windows/rotary/_rotary_a793e44.pyd filter=lfs diff=lfs merge=lfs -text

build/torch29-cu130-x86_64-windows/rotary/__init__.py ADDED Viewed

+from typing import Optional, Tuple
+import torch
+from ._ops import ops
+def apply_rotary(
+    x1: torch.Tensor,
+    x2: torch.Tensor,
+    cos: torch.Tensor,
+    sin: torch.Tensor,
+    out1: torch.Tensor,
+    out2: torch.Tensor,
+    conj: bool,
+) -> None:
+    ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj)
+def apply_rotary_transformers(
+    q: torch.Tensor,
+    k: torch.Tensor,
+    cos: torch.Tensor,
+    sin: torch.Tensor,
+    position_ids: Optional[torch.Tensor] = None,
+    unsqueeze_dim: int = 1,
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """
+    Rotary kernel implementation wrapper
+    Adapts rotary kernels implementation to match transformers apply_rotary_pos_emb signature
+    """
+    cos = cos.unsqueeze(unsqueeze_dim)
+    sin = sin.unsqueeze(unsqueeze_dim)
+    q_rotated = q.clone()
+    k_rotated = k.clone()
+    # Get half dimension for rotation
+    half_dim = q.shape[-1] // 2
+    q1 = q_rotated[..., :half_dim]
+    q2 = q_rotated[..., half_dim:]
+    k1 = k_rotated[..., :half_dim]
+    k2 = k_rotated[..., half_dim:]
+    if cos.shape[-1] != half_dim:
+        # Trim cos/sin to match half_dim
+        cos = cos[..., :half_dim]
+        sin = sin[..., :half_dim]
+    apply_rotary(q1, q2, cos, sin, q1, q2, False)
+    apply_rotary(k1, k2, cos, sin, k1, k2, False)
+    return q_rotated, k_rotated
+__all__ = ["apply_rotary", "apply_rotary_transformers"]

build/torch29-cu130-x86_64-windows/rotary/_ops.py ADDED Viewed

+import torch
+from . import _rotary_a793e44
+ops = torch.ops._rotary_a793e44
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_rotary_a793e44::{op_name}"

build/torch29-cu130-x86_64-windows/rotary/_rotary_a793e44.pyd ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:606c6eb81894dc8197f73e0e71a5356f56c61c612e5f77ab5c3d7c351eab8d3a
+size 8007680