테스트

2026-02-01 18:15:59 +09:00
parent b7efaf0542
commit 58decec7f5
2 changed files with 34 additions and 10 deletions
--- a/config/ai/call_llm_model.py
+++ b/config/ai/call_llm_model.py
@@ -1,4 +1,5 @@
-from transformers import AutoTokenizer, AutoModelForCausalLM
+from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
+from sympy.printing.pytorch import torch

 QWEN_MODEL_PATH = "./models/Qwen3-0.6B"

@@ -25,10 +26,18 @@ def get_qwen_model():
        local_files_only=True
    )

+    # # 4-bit 양자화 설정 gpu
+    # bnb_config = BitsAndBytesConfig(
+    #     load_in_4bit=True,
+    #     bnb_4bit_use_double_quant=True,     # 추가 압축
+    #     bnb_4bit_quant_type="nf4",          # 성능이 좋은 양자화 방식
+    #     bnb_4bit_compute_dtype=torch.bfloat16 # 연산 속도 유지
+    # )
+
    # 모델 로드
-    from sympy.printing.pytorch import torch
    _model = AutoModelForCausalLM.from_pretrained(
        QWEN_MODEL_PATH,
+        # quantization_config=bnb_config, #gpu
        dtype=torch.bfloat16,  # CPU: bfloat16, GPU: float16 권장
        device_map="auto",
        trust_remote_code=True,