mirror of
https://gitlab.com/niansa/discord_llama.git
synced 2025-03-06 20:48:25 +01:00
Implemented max context age
This commit is contained in:
parent
18b1acb00e
commit
d60a1eebaa
4 changed files with 24 additions and 2 deletions
|
@ -21,4 +21,5 @@ pool_size 2
|
||||||
threads 4
|
threads 4
|
||||||
timeout 120
|
timeout 120
|
||||||
ctx_size 1012
|
ctx_size 1012
|
||||||
|
max_context_age 0
|
||||||
scroll_keep 20
|
scroll_keep 20
|
||||||
|
|
|
@ -50,5 +50,8 @@ timeout 120
|
||||||
# Max. context size
|
# Max. context size
|
||||||
ctx_size 1012
|
ctx_size 1012
|
||||||
|
|
||||||
|
# Max. context age in seconds; 0 to disable
|
||||||
|
max_context_age 0
|
||||||
|
|
||||||
# Percentage of context below prompt to be kept when scrolling. 0 means no context will be kept when scolling (not recommended!!!)
|
# Percentage of context below prompt to be kept when scrolling. 0 means no context will be kept when scolling (not recommended!!!)
|
||||||
scroll_keep 20
|
scroll_keep 20
|
||||||
|
|
|
@ -1 +1 @@
|
||||||
Subproject commit 06984bf36c5a8afae2b37b138a4ca0a5868e94e2
|
Subproject commit 3c62cbebaf13ae231614fd3d4b9d4316ac7a8227
|
20
main.cpp
20
main.cpp
|
@ -33,6 +33,7 @@ class Bot {
|
||||||
std::vector<dpp::snowflake> my_messages;
|
std::vector<dpp::snowflake> my_messages;
|
||||||
std::unordered_map<dpp::snowflake, dpp::user> users;
|
std::unordered_map<dpp::snowflake, dpp::user> users;
|
||||||
std::thread::id llm_tid;
|
std::thread::id llm_tid;
|
||||||
|
utils::Timer cleanup_timer;
|
||||||
sqlite::database db;
|
sqlite::database db;
|
||||||
|
|
||||||
std::mutex command_completion_buffer_mutex;
|
std::mutex command_completion_buffer_mutex;
|
||||||
|
@ -82,7 +83,8 @@ public:
|
||||||
threads = 4,
|
threads = 4,
|
||||||
scroll_keep = 20,
|
scroll_keep = 20,
|
||||||
shard_count = 1,
|
shard_count = 1,
|
||||||
shard_id = 0;
|
shard_id = 0,
|
||||||
|
max_context_age = 0;
|
||||||
bool persistance = true,
|
bool persistance = true,
|
||||||
mlock = false,
|
mlock = false,
|
||||||
live_edit = false,
|
live_edit = false,
|
||||||
|
@ -415,6 +417,19 @@ private:
|
||||||
return (unsigned(id.get_creation_time()) % config.shard_count) == config.shard_id;
|
return (unsigned(id.get_creation_time()) % config.shard_count) == config.shard_id;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void cleanup() {
|
||||||
|
// Clean up InferencePool
|
||||||
|
llm_pool.cleanup(config.max_context_age);
|
||||||
|
// Reset timer
|
||||||
|
cleanup_timer.reset();
|
||||||
|
}
|
||||||
|
void attempt_cleanup() {
|
||||||
|
// Run cleanup if enough time has passed
|
||||||
|
if (cleanup_timer.get<std::chrono::seconds>() > config.max_context_age / 4) {
|
||||||
|
cleanup();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
std::string create_thread_name(const std::string& model_name, bool instruct_mode) const {
|
std::string create_thread_name(const std::string& model_name, bool instruct_mode) const {
|
||||||
return "Chat with "+model_name+" " // Model name
|
return "Chat with "+model_name+" " // Model name
|
||||||
+(instruct_mode?"":"(Non Instruct mode) ") // Instruct mode
|
+(instruct_mode?"":"(Non Instruct mode) ") // Instruct mode
|
||||||
|
@ -693,6 +708,7 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
void start() {
|
void start() {
|
||||||
|
cleanup();
|
||||||
bot.start(dpp::st_wait);
|
bot.start(dpp::st_wait);
|
||||||
}
|
}
|
||||||
void stop_prepare() {
|
void stop_prepare() {
|
||||||
|
@ -780,6 +796,8 @@ int main(int argc, char **argv) {
|
||||||
cfg.timeout = std::stoi(value);
|
cfg.timeout = std::stoi(value);
|
||||||
} else if (key == "ctx_size") {
|
} else if (key == "ctx_size") {
|
||||||
cfg.ctx_size = std::stoi(value);
|
cfg.ctx_size = std::stoi(value);
|
||||||
|
} else if (key == "max_context_age") {
|
||||||
|
cfg.max_context_age = std::stoi(value);
|
||||||
} else if (key == "mlock") {
|
} else if (key == "mlock") {
|
||||||
cfg.mlock = parse_bool(value);
|
cfg.mlock = parse_bool(value);
|
||||||
} else if (key == "live_edit") {
|
} else if (key == "live_edit") {
|
||||||
|
|
Loading…
Add table
Reference in a new issue