InfiniTensor · baominghelly · Apr 27, 2026
diff --git a/include/infinicore/nn/linear.hpp b/include/infinicore/nn/linear.hpp
@@ -28,6 +28,8 @@ class BaseLinear : public Module {
     size_t out_features() const { return out_features_; }
     bool has_bias() const { return has_bias_; }
     DataType dtype() const { return dtype_; }
+    float alpha() const { return alpha_; }
+    void set_alpha(float alpha) { alpha_ = alpha; }
 
     // Accessors for parameters
     Tensor weight() const { return weight_; }
@@ -56,6 +58,7 @@ class BaseLinear : public Module {
     size_t out_features_;
     bool has_bias_;
     DataType dtype_;
+    float alpha_ = 1.0f;
     std::shared_ptr<infinicore::quantization::BaseQuantization> quantization_ = std::make_shared<infinicore::quantization::NoneQuantization>(nullptr);
 };
 

diff --git a/include/infinicore/ops/linear.hpp b/include/infinicore/ops/linear.hpp
@@ -5,8 +5,8 @@
 
 namespace infinicore::op {
 
-Tensor linear(Tensor input, Tensor weight, std::optional<Tensor> bias);
+Tensor linear(Tensor input, Tensor weight, std::optional<Tensor> bias, float alpha = 1.0f);
 
-void linear_(Tensor out, Tensor input, Tensor weight, std::optional<Tensor> bias);
+void linear_(Tensor out, Tensor input, Tensor weight, std::optional<Tensor> bias, float alpha = 1.0f);
 
 } // namespace infinicore::op
diff --git a/python/infinicore/nn/functional/linear.py b/python/infinicore/nn/functional/linear.py
@@ -4,15 +4,23 @@
 __all__ = ["linear"]
 
 
-def linear(input: Tensor, weight: Tensor, bias=None, *, out=None) -> Tensor:
-    r"""Applies a linear transformation to the incoming data: y=xA^T+b."""
+def linear(
+    input: Tensor,
+    weight: Tensor,
+    bias=None,
+    *,
+    alpha: float = 1.0,
+    out=None,
+) -> Tensor:
+    r"""Applies a linear transformation to the incoming data: y=alpha*xA^T+b."""
 
     if out is None:
         return Tensor(
             _infinicore.linear(
                 input._underlying,
                 weight._underlying,
                 None if bias is None else bias._underlying,
+                alpha,
             )
         )
 
@@ -21,5 +29,6 @@ def linear(input: Tensor, weight: Tensor, bias=None, *, out=None) -> Tensor:
         input._underlying,
         weight._underlying,
         None if bias is None else bias._underlying,
+        alpha,
     )
     return out
diff --git a/python/infinicore/nn/modules/linear.py b/python/infinicore/nn/modules/linear.py
@@ -45,6 +45,7 @@ def __init__(
         super().__init__()
         self.in_features = in_features
         self.out_features = out_features
+        self._alpha = 1.0
         self.weight = Parameter(
             infinicore.empty([out_features, in_features], **factory_kwargs)
         )
@@ -55,7 +56,15 @@ def __init__(
             self.register_parameter("bias", None)
 
     def forward(self, input: Tensor) -> Tensor:
-        return F.linear(input, self.weight, self.bias)
+        return F.linear(input, self.weight, self.bias, alpha=self._alpha)
 
     def extra_repr(self) -> str:
         return f"in_features={self.in_features}, out_features={self.out_features}, bias={self.bias is not None}"
+
+    @property
+    def alpha(self) -> float:
+        return self._alpha
+
+    @alpha.setter
+    def alpha(self, value: float):
+        self._alpha = value
diff --git a/src/infinicore/nn/linear.cc b/src/infinicore/nn/linear.cc
@@ -78,7 +78,7 @@ Tensor BaseLinear::compute_linear(Tensor &input) const {
         Tensor weight_tensor = static_cast<const Tensor &>(weight_);
         std::optional<Tensor> bias_opt = has_bias_ ? std::make_optional<Tensor>(static_cast<const Tensor &>(bias_)) : std::nullopt;
 
-        auto output = infinicore::op::linear(input_contiguous->contiguous(), weight_tensor->contiguous(), bias_opt);
+        auto output = infinicore::op::linear(input_contiguous->contiguous(), weight_tensor->contiguous(), bias_opt, alpha_);
         return output;
     }
     }

diff --git a/src/infinicore/ops/linear/linear.cc b/src/infinicore/ops/linear/linear.cc
@@ -6,7 +6,8 @@ namespace infinicore::op {
 
 Tensor linear(Tensor input,
               Tensor weight,
-              std::optional<Tensor> bias) {
+              std::optional<Tensor> bias,
+              float alpha) {
 
     Size ndim = input->ndim();
     Size out_features = weight->shape()[0];
@@ -17,14 +18,15 @@ Tensor linear(Tensor input,
     auto out = Tensor::empty(output_shape, input->dtype(), input->device());
 
     // Inplace Calculate
-    linear_(out, input, weight, bias);
+    linear_(out, input, weight, bias, alpha);
     return out;
 }
 
 void linear_(Tensor out,
              Tensor input,
              Tensor weight,
-             std::optional<Tensor> bias) {
+             std::optional<Tensor> bias,
+             float alpha) {
 
     auto weight_shape = weight->shape();
     Size out_features = weight_shape[0];
@@ -43,7 +45,6 @@ void linear_(Tensor out,
     // linear transformation
     Tensor out_view = out->view({N, out_features});
     // Add bias
-    float alpha = 1.0f;
     float beta = 0.0f;
     if (bias.has_value()) {
         rearrange_(out_view,

diff --git a/src/infinicore/pybind11/ops/linear.hpp b/src/infinicore/pybind11/ops/linear.hpp
@@ -10,25 +10,27 @@ namespace infinicore::ops {
 
 Tensor py_linear(Tensor input,
                  Tensor weight,
-                 pybind11::object bias) {
+                 pybind11::object bias,
+                 float alpha = 1.0f) {
     std::optional<Tensor> bias_tensor = std::nullopt;
     if (!bias.is_none()) {
         bias_tensor = bias.cast<Tensor>();
     }
-    return op::linear(input, weight, bias_tensor);
+    return op::linear(input, weight, bias_tensor, alpha);
 }
 
 void py_linear_(Tensor out,
                 Tensor input,
                 Tensor weight,
-                pybind11::object bias) {
+                pybind11::object bias,
+                float alpha = 1.0f) {
 
     std::optional<Tensor> bias_tensor = std::nullopt;
     if (!bias.is_none()) {
         bias_tensor = bias.cast<Tensor>();
     }
 
-    op::linear_(out, input, weight, bias_tensor);
+    op::linear_(out, input, weight, bias_tensor, alpha);
 }
 
 inline void bind_linear(py::module &m) {
@@ -38,15 +40,17 @@ inline void bind_linear(py::module &m) {
           py::arg("input"),
           py::arg("weight"),
           py::arg("bias") = py::none(),
-          R"doc(Applies a linear transformation to the incoming data: y=xA^T+b.)doc");
+          py::arg("alpha") = 1.0f,
+          R"doc(Applies a linear transformation to the incoming data: y=alpha*xA^T+b.)doc");
 
     m.def("linear_",
           &ops::py_linear_,
           py::arg("out"),
           py::arg("input"),
           py::arg("weight"),
           py::arg("bias") = py::none(),
-          R"doc(In-place, applies a linear transformation to the incoming data: y=xA^T+b.)doc");
+          py::arg("alpha") = 1.0f,
+          R"doc(In-place, applies a linear transformation to the incoming data: y=alpha*xA^T+b.)doc");
 }
 
 } // namespace infinicore::ops
diff --git a/test/infinicore/nn/linear.py b/test/infinicore/nn/linear.py
@@ -30,6 +30,13 @@
     ((10, 5, 1024), (3072, 1024), (3072,), False),
 ]
 
+# Alpha test cases: (x_shape, weight_shape, bias_shape, bias, alpha)
+_ALPHA_TEST_CASES_DATA = [
+    ((2, 5, 256), (512, 256), (512,), True, 2.5),
+    ((2, 5, 256), (512, 256), (512,), False, 0.5),
+    ((1, 1024), (3072, 1024), (3072,), True, 1.0),
+]
+
 # Tolerance configuration
 _TOLERANCE_MAP = {
     infinicore.float16: {"atol": 0, "rtol": 1e-2},
@@ -74,6 +81,25 @@ def parse_test_cases():
                 )
             )
 
+    # Alpha test cases
+    for x_shape, weight_shape, bias_shape, has_bias, alpha in _ALPHA_TEST_CASES_DATA:
+        for dtype in _TENSOR_DTYPES:
+            tolerance = _TOLERANCE_MAP.get(dtype, {"atol": 0, "rtol": 1e-3})
+            x_spec = TensorSpec.from_tensor(x_shape, None, dtype, name="x")
+            weight_spec = TensorSpec.from_tensor(weight_shape, None, dtype, name="weight")
+            bias_spec = TensorSpec.from_tensor(bias_shape, None, dtype, name="bias")
+
+            test_cases.append(
+                TestCase(
+                    inputs=[x_spec, weight_spec, bias_spec],
+                    kwargs={"has_bias": has_bias, "alpha": alpha},
+                    output_spec=None,
+                    comparison_target=None,
+                    tolerance=tolerance,
+                    description=f"nn.Linear - ALPHA={alpha}",
+                )
+            )
+
     return test_cases
 
 
@@ -123,7 +149,7 @@ def __init__(self):
     def get_test_cases(self):
         return parse_test_cases()
 
-    def torch_operator(self, x, weight, bias, has_bias):
+    def torch_operator(self, x, weight, bias, has_bias, alpha=None):
         """PyTorch nn.Linear implementation"""
         out_features, in_features = weight.shape
         params_dict = {"l.weight": weight}
@@ -141,9 +167,13 @@ def torch_operator(self, x, weight, bias, has_bias):
 
         with torch.no_grad():
             y = model(x)
+        if alpha is not None:
+            # alpha scales only matmul, not bias: alpha * (x @ W^T) + b
+            y_matmul = torch.nn.functional.linear(x, weight)
+            y = alpha * y_matmul + (bias if has_bias else 0)
         return y
 
-    def infinicore_operator(self, x, weight, bias, has_bias):
+    def infinicore_operator(self, x, weight, bias, has_bias, alpha=None):
         """InfiniCore nn.Linear implementation"""
 
         out_features, in_features = weight.shape
@@ -158,6 +188,8 @@ def infinicore_operator(self, x, weight, bias, has_bias):
             device=weight.device,
             dtype=weight.dtype,
         )
+        if alpha is not None:
+            model.l.alpha = alpha
         model.load_state_dict(params_dict)
 
         y = model(x)

diff --git a/test/infinicore/ops/linear.py b/test/infinicore/ops/linear.py
@@ -26,6 +26,13 @@
     (None, 1, 2048, 5632, True, None, None, None),
 ]
 
+# Alpha test cases: (bs, n, in_features, out_features, bias, input_strides, weight_strides, out_strides, alpha)
+_ALPHA_TEST_CASES_DATA = [
+    (2, 5, 256, 512, True, None, None, None, 2.5),
+    (2, 5, 256, 512, False, None, None, None, 0.5),
+    (1, 10, 256, 512, True, None, None, None, 0.0),
+]
+
 # Tolerance configuration
 _TOLERANCE_MAP = {
     infinicore.float16: {"atol": 0, "rtol": 1e-2},
@@ -109,6 +116,40 @@ def parse_test_cases():
                     )
                 )
 
+    # Alpha test cases
+    for data in _ALPHA_TEST_CASES_DATA:
+        bs = data[0]
+        n, in_features, out_features = data[1], data[2], data[3]
+        bias = data[4]
+        input_strides = data[5] if len(data) > 5 else None
+        weight_strides = data[6] if len(data) > 6 else None
+        out_strides = data[7] if len(data) > 7 else None
+        alpha = data[8]
+
+        if bs is None:
+            input_shape = (n, in_features)
+        else:
+            input_shape = (bs, n, in_features)
+        weight_shape = (out_features, in_features)
+        bias_shape = (out_features,) if bias else None
+
+        for dtype in _TENSOR_DTYPES:
+            tolerance = _TOLERANCE_MAP.get(dtype, {"atol": 0, "rtol": 1e-3})
+            input_spec = TensorSpec.from_tensor(input_shape, input_strides, dtype)
+            weight_spec = TensorSpec.from_tensor(weight_shape, weight_strides, dtype)
+            bias_spec = TensorSpec.from_tensor(bias_shape, None, dtype) if bias_shape else None
+
+            test_cases.append(
+                TestCase(
+                    inputs=[input_spec, weight_spec, bias_spec],
+                    kwargs={"alpha": alpha},
+                    output_spec=None,
+                    comparison_target=None,
+                    tolerance=tolerance,
+                    description=f"Linear - ALPHA={alpha}",
+                )
+            )
+
     return test_cases
 
 
@@ -123,6 +164,19 @@ def get_test_cases(self):
 
     def torch_operator(self, *args, **kwargs):
         """PyTorch linear implementation"""
+        alpha = kwargs.pop("alpha", 1.0)
+        if alpha != 1.0:
+            input_tensor = args[0]
+            weight = args[1]
+            bias = args[2] if len(args) > 2 else None
+            out = kwargs.get("out")
+            matmul_result = torch.nn.functional.linear(input_tensor, weight)
+            bias_value = bias if bias is not None else 0
+            result = alpha * matmul_result + bias_value
+            if out is not None:
+                out.copy_(result)
+                return out
+            return result
         return torch.nn.functional.linear(*args, **kwargs)
 
     def infinicore_operator(self, *args, **kwargs):