Code compile on H100

pytorch · Sep 12, 2024 · ed4ce1c · ed4ce1c
1 parent 85d03de
commit ed4ce1c
Show file tree

Hide file tree

Showing 2 changed files with 6 additions and 2 deletions.
diff --git a/scripts/hf_eval.py b/scripts/hf_eval.py
@@ -66,6 +66,7 @@ def run_evaluation(repo_id, tasks, limit, device, precision, quantization, spars
         model = autoquant(model.to(device=device))
 
     if quantization != "autoquant" and compile:
+        model = model.to(device)
         model = torch.compile(model, mode="max-autotune", fullgraph=True)
 
     if sparsity == "semi_sparse":
@@ -89,7 +90,7 @@ def all_linear(mod, name):
     with torch.no_grad():
         result = evaluate(
             HFLM(
-                pretrained=model.to(device),
+                pretrained=model,
                 tokenizer=tokenizer,
                 batch_size=batch_size,
                 max_length=max_length),

diff --git a/torchao/kernel/intmm.py b/torchao/kernel/intmm.py
@@ -69,7 +69,10 @@ def safe_int_mm(input: torch.Tensor, mat2: torch.Tensor) -> torch.Tensor:
             input = (
                 input.contiguous()
             )  # (it seems the transpose makes cublas check the above j constraint on i)
-        return out_dtype(torch.ops.aten.mm.default, torch.int32, input, mat2)
+        try:
+            return out_dtype(torch.ops.aten.mm.default, torch.int32, input, mat2)
+        except:
+            return torch.matmul(input.to(torch.float32), mat2.to(torch.float32)).to(torch.int32)
 else:
     def safe_int_mm(input: torch.Tensor, mat2: torch.Tensor) -> torch.Tensor:
         """