InfiniTensor · YdrMaster · May 11, 2025 · Apr 21, 2025 · Apr 22, 2025 · Apr 22, 2025
diff --git a/ggml-quants/README.md b/ggml-quants/README.md
@@ -12,4 +12,41 @@
 ![GitHub contributors](https://img.shields.io/github/contributors/InfiniTensor/gguf)
 ![GitHub commit activity](https://img.shields.io/github/commit-activity/m/InfiniTensor/gguf)
 
-`ggml-quants` is a Rust library that implements the quantized data types defined by `ggml` and their corresponding quantization and dequantization algorithms.
+`ggml-quants` 是一个 Rust 库，用于实现 `ggml` 定义的量化数据类型及其对应的量化和反量化算法。
+
+---
+
+## 项目简介
+
+`ggml-quants` 提供了一组高效的量化工具，用于将浮点数数据压缩为更小的量化格式（如 `Q4_0`, `Q8_1` 等），并支持从量化数据还原为浮点数。  
+该库的核心功能包括：
+- 支持多种量化格式（如 `Q4_0`, `Q8_0`, `Q8_1` 等）。
+- 提供通用的量化和反量化接口。
+- 使用并行化技术（基于 `rayon`）提升大规模数据处理性能。
+
+---
+
+## 使用示例
+
+```rust
+use ggml_quants::{Quantize, Q8_1};
+
+// 原始浮点数数据
+let data: [f32; 32] = [0.1, 0.2, 0.3, /* ... */];
+
+// 量化数据
+let quantized = Q8_1::quantize(&data);
+
+// 反量化数据
+let dequantized = quantized.dequantize();
+```
+
+---
+
+## 应用场景
+
+在`gguf`项目中，模型权重通常以浮点数形式存储（如`f32`或`f16`），这会占用大量内存，成为限制性能的主要因素。通过使用`ggml-quants`提供的量化工具，可以在能够容忍的精度损失下，将权重从`f32`压缩为更小的格式（如`Q4_0`或`Q8_1`），从而：
+
+- 减少存储空间：降低模型部署对硬件内存的需求。
+- 加快加载速度：量化后的权重文件更小，加载时间显著减少。
+- 提升推理效率：在支持量化计算的硬件（如 GPU 或专用加速器）上，推理速度可以显著提升。
diff --git a/ggml-quants/src/lib.rs b/ggml-quants/src/lib.rs
@@ -123,6 +123,7 @@ pub mod types;
 
 #[cfg(test)]
 #[allow(dead_code)]
+// 测试工具，仅在测试时使用
 pub(crate) mod test_utils {
     use crate::Quantize;
     use std::fmt;

diff --git a/ggml-quants/src/structs/half.rs b/ggml-quants/src/structs/half.rs
@@ -8,6 +8,7 @@ impl_data_block!(bf16 = ty::BF16; bf16::ZERO);
 impl Quantize<f32, _1> for f16 {
     #[inline]
     fn quantize(&[data]: &[f32; _1]) -> Self {
+        // 验证块大小是否正确，需要对常量进行断言
         #[allow(clippy::assertions_on_constants)]
         const {
             assert!(Self::COUNT == _1)
@@ -23,6 +24,7 @@ impl Quantize<f32, _1> for f16 {
 impl Quantize<f32, _1> for bf16 {
     #[inline]
     fn quantize(&[data]: &[f32; _1]) -> Self {
+        // 验证块大小是否正确，需要对常量进行断言
         #[allow(clippy::assertions_on_constants)]
         const {
             assert!(Self::COUNT == _1)

diff --git a/ggml-quants/src/structs/q4_0.rs b/ggml-quants/src/structs/q4_0.rs
@@ -18,6 +18,7 @@ impl_data_block! {
 
 impl Quantize<f32, _32> for Q4_0 {
     fn quantize(data: &[f32; _32]) -> Self {
+        // 验证块大小是否正确，需要对常量进行断言
         #[allow(clippy::assertions_on_constants)]
         const {
             assert!(Self::COUNT == _32)

diff --git a/ggml-quants/src/structs/q4_1.rs b/ggml-quants/src/structs/q4_1.rs
@@ -18,6 +18,7 @@ impl_data_block! {
 
 impl Quantize<f32, _32> for Q4_1 {
     fn quantize(data: &[f32; _32]) -> Self {
+        // 验证块大小是否正确，需要对常量进行断言
         #[allow(clippy::assertions_on_constants)]
         const {
             assert!(Self::COUNT == _32)

diff --git a/ggml-quants/src/structs/q5_0.rs b/ggml-quants/src/structs/q5_0.rs
@@ -20,6 +20,7 @@ impl_data_block! {
 
 impl Quantize<f32, _32> for Q5_0 {
     fn quantize(data: &[f32; _32]) -> Self {
+        // 验证块大小是否正确，需要对常量进行断言
         #[allow(clippy::assertions_on_constants)]
         const {
             assert!(Self::COUNT == _32)

diff --git a/ggml-quants/src/structs/q5_1.rs b/ggml-quants/src/structs/q5_1.rs
@@ -20,6 +20,7 @@ impl_data_block! {
 
 impl Quantize<f32, _32> for Q5_1 {
     fn quantize(data: &[f32; _32]) -> Self {
+        // 验证块大小是否正确，需要对常量进行断言
         #[allow(clippy::assertions_on_constants)]
         const {
             assert!(Self::COUNT == _32)

diff --git a/ggml-quants/src/structs/q8_0.rs b/ggml-quants/src/structs/q8_0.rs
@@ -18,6 +18,7 @@ impl_data_block! {
 
 impl Quantize<f32, _32> for Q8_0 {
     fn quantize(data: &[f32; _32]) -> Self {
+        // 验证块大小是否正确，需要对常量进行断言
         #[allow(clippy::assertions_on_constants)]
         const {
             assert!(Self::COUNT == _32)

diff --git a/ggml-quants/src/structs/q8_1.rs b/ggml-quants/src/structs/q8_1.rs
@@ -22,6 +22,7 @@ impl_data_block! {
 
 impl Quantize<f32, _32> for Q8_1 {
     fn quantize(data: &[f32; _32]) -> Self {
+        // 验证块大小是否正确，需要对常量进行断言
         #[allow(clippy::assertions_on_constants)]
         const {
             assert!(Self::COUNT == _32)

diff --git a/ggml-quants/src/structs/q8_k.rs b/ggml-quants/src/structs/q8_k.rs
@@ -21,6 +21,7 @@ impl_data_block! {
 
 impl Quantize<f32, _256> for Q8K {
     fn quantize(data: &[f32; _256]) -> Self {
+        // 验证块大小是否正确，需要对常量进行断言
         #[allow(clippy::assertions_on_constants)]
         const {
             assert!(Self::COUNT == _256)