Skip to content

Commit

Permalink
cuda : fix LLAMA_CUDA_F16 build (ggerganov#6197)
Browse files Browse the repository at this point in the history
  • Loading branch information
slaren authored and hodlen committed Apr 3, 2024
1 parent 6fb00ab commit 26600e5
Showing 1 changed file with 1 addition and 1 deletion.
2 changes: 1 addition & 1 deletion ggml-cuda.cu
Expand Up @@ -9453,7 +9453,7 @@ static void ggml_cuda_op_dequantize_mul_mat_vec(

// on some GPUs it is faster to convert src1 to half and to use half precision intrinsics
#ifdef GGML_CUDA_F16
cuda_pool_alloc<half> src1_dfloat_a;
ggml_cuda_pool_alloc<half> src1_dfloat_a(ctx.pool());
half * src1_dfloat = nullptr; // dfloat == half

bool src1_convert_f16 =
Expand Down

0 comments on commit 26600e5

Please sign in to comment.