Skip to content

Commit

Permalink
fix compiling errors
Browse files Browse the repository at this point in the history
  • Loading branch information
hodlen committed Jan 24, 2024
1 parent 06e0774 commit f3172b1
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 9 deletions.
2 changes: 1 addition & 1 deletion common/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -963,7 +963,7 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
auto mparams = llama_model_params_from_gpt_params(params);
auto cparams = llama_context_params_from_gpt_params(params);

llama_model * model = llama_load_model_from_file(params.model.c_str(), mparams, cparams);
llama_model * model = llama_load_model_from_file_with_context(params.model.c_str(), mparams, &cparams);
if (model == NULL) {
fprintf(stderr, "%s: error: failed to load model '%s'\n", __func__, params.model.c_str());
return std::make_tuple(nullptr, nullptr);
Expand Down
23 changes: 16 additions & 7 deletions llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3042,7 +3042,7 @@ static size_t llm_load_gpu_split(llama_model_loader & ml, llama_model & model, b
static void llm_load_sparse_model_tensors(
llama_model_loader & ml,
llama_model & model,
const llama_context_params & cparams,
const llama_context_params * cparams,
int main_gpu,
long int vram_budget_bytes,
bool reset_gpu_index,
Expand Down Expand Up @@ -3216,7 +3216,9 @@ static void llm_load_sparse_model_tensors(
model.mapping = std::move(ml.mapping);

// Reserve KV cache in VRAM
llama_reserve_model_kv_cache(&model, &cparams);
if (cparams != NULL) {
llama_reserve_model_kv_cache(&model, cparams);
}
// Offload FFN segments to GPU if possible
model.ffn_offloaded_bytes = llm_load_gpu_split(ml, model, reset_gpu_index, disable_ffn_split);

Expand Down Expand Up @@ -3977,7 +3979,7 @@ static void llm_load_tensors(
model.t_load_us = ggml_time_us() - model.t_start_us;
}

static bool llama_model_load(const std::string & fname, llama_model & model, const llama_model_params & params, const llama_context_params & cparams) {
static bool llama_model_load(const std::string & fname, llama_model & model, const llama_model_params & params, const llama_context_params * cparams) {
try {
llama_model_loader ml(fname, params.use_mmap);

Expand Down Expand Up @@ -9431,10 +9433,11 @@ int64_t llama_time_us(void) {
return ggml_time_us();
}

struct llama_model * llama_load_model_from_file(
const char * path_model,
struct llama_model_params params,
struct llama_context_params cparams) {
struct llama_model * llama_load_model_from_file_with_context(
const char * path_model,
struct llama_model_params params,
struct llama_context_params * cparams
) {
ggml_time_init();

llama_model * model = new llama_model;
Expand Down Expand Up @@ -9464,6 +9467,12 @@ struct llama_model * llama_load_model_from_file(
return model;
}

struct llama_model * llama_load_model_from_file(
const char * path_model,
struct llama_model_params params) {
return llama_load_model_from_file_with_context(path_model, params, nullptr);
}

void llama_free_model(struct llama_model * model) {
delete model;
}
Expand Down
6 changes: 5 additions & 1 deletion llama.h
Original file line number Diff line number Diff line change
Expand Up @@ -274,8 +274,12 @@ extern "C" {

LLAMA_API struct llama_model * llama_load_model_from_file(
const char * path_model,
struct llama_model_params params);

LLAMA_API struct llama_model * llama_load_model_from_file_with_context(
const char * path_model,
struct llama_model_params params,
struct llama_context_params cparams);
struct llama_context_params * cparams);

LLAMA_API void llama_free_model(struct llama_model * model);

Expand Down

0 comments on commit f3172b1

Please sign in to comment.