-
Notifications
You must be signed in to change notification settings - Fork 4.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix(ai-proxy): Fix Cohere breaks with model parameter in body; Fix OpenAI token counting for function requests; Fix user sending own-model parameter #13000
base: master
Are you sure you want to change the base?
Changes from all commits
1197088
c491d58
a1e1033
ccb4c6f
63bea73
0de9bff
082be0c
6cc8c56
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
message: | | ||
**AI-proxy-plugin**: Fix a bug where setting OpenAI SDK model parameter "null" caused analytics | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use a past tense Fixed. (@outsinre please correct me if I'm wrong) |
||
to not be written to the logging plugin(s). | ||
scope: Plugin | ||
type: bugfix |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
message: | | ||
**AI-proxy-plugin**: Fix a bug where certain Azure models would return partial tokens/words | ||
when in response-streaming mode. | ||
scope: Plugin | ||
type: bugfix |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
message: | | ||
**AI-proxy-plugin**: Fix a bug where Cohere and Anthropic providers don't read the `model` parameter properly | ||
from the caller's request body. | ||
scope: Plugin | ||
type: bugfix |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
message: | | ||
**AI-proxy-plugin**: Fix the bug where using "OpenAI Function" inference requests would log a | ||
request error, and then hang until timeout. | ||
scope: Plugin | ||
type: bugfix |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
message: | | ||
**AI-proxy-plugin**: Fix a bug where AI Proxy would still allow callers to specify their own model, | ||
ignoring the plugin-configured model name. | ||
scope: Plugin | ||
type: bugfix |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
message: | | ||
**AI-proxy-plugin**: Fix a bug where AI Proxy would not take precedence of the | ||
plugin's configured model tuning options, over those in the user's LLM request. | ||
scope: Plugin | ||
type: bugfix |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -93,8 +93,8 @@ local transformers_to = { | |
return nil, nil, err | ||
end | ||
|
||
messages.temperature = request_table.temperature or (model.options and model.options.temperature) or nil | ||
messages.max_tokens = request_table.max_tokens or (model.options and model.options.max_tokens) or nil | ||
messages.temperature = (model.options and model.options.temperature) or request_table.temperature or nil | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. or nil can be omitted
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. and the following places |
||
messages.max_tokens = (model.options and model.options.max_tokens) or request_table.max_tokens or nil | ||
messages.model = model.name or request_table.model | ||
messages.stream = request_table.stream or false -- explicitly set this if nil | ||
|
||
|
@@ -110,9 +110,8 @@ local transformers_to = { | |
return nil, nil, err | ||
end | ||
|
||
prompt.temperature = request_table.temperature or (model.options and model.options.temperature) or nil | ||
prompt.max_tokens_to_sample = request_table.max_tokens or (model.options and model.options.max_tokens) or nil | ||
prompt.model = model.name | ||
prompt.temperature = (model.options and model.options.temperature) or request_table.temperature or nil | ||
prompt.max_tokens_to_sample = (model.options and model.options.max_tokens) or request_table.max_tokens or nil | ||
prompt.model = model.name or request_table.model | ||
prompt.stream = request_table.stream or false -- explicitly set this if nil | ||
|
||
|
@@ -442,12 +441,7 @@ function _M.post_request(conf) | |
end | ||
|
||
function _M.pre_request(conf, body) | ||
-- check for user trying to bring own model | ||
if body and body.model then | ||
return nil, "cannot use own model for this instance" | ||
end | ||
|
||
return true, nil | ||
return true | ||
end | ||
|
||
-- returns err or nil | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -400,12 +400,7 @@ function _M.post_request(conf) | |
end | ||
|
||
function _M.pre_request(conf, body) | ||
-- check for user trying to bring own model | ||
if body and body.model then | ||
return false, "cannot use own model for this instance" | ||
end | ||
|
||
return true, nil | ||
return true | ||
end | ||
|
||
function _M.subrequest(body, conf, http_opts, return_res_table) | ||
|
@@ -467,7 +462,7 @@ end | |
function _M.configure_request(conf) | ||
local parsed_url | ||
|
||
if conf.model.options.upstream_url then | ||
if conf.model.options and conf.model.options.upstream_url then | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can |
||
parsed_url = socket_url.parse(conf.model.options.upstream_url) | ||
else | ||
parsed_url = socket_url.parse(ai_shared.upstream_url_format[DRIVER_NAME]) | ||
|
@@ -476,10 +471,6 @@ function _M.configure_request(conf) | |
or ai_shared.operation_map[DRIVER_NAME][conf.route_type] | ||
and ai_shared.operation_map[DRIVER_NAME][conf.route_type].path | ||
or "/" | ||
|
||
if not parsed_url.path then | ||
return false, fmt("operation %s is not supported for cohere provider", conf.route_type) | ||
end | ||
end | ||
|
||
-- if the path is read from a URL capture, ensure that it is valid | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -34,7 +34,8 @@ local function get_token_text(event_t) | |
-- - event_t.choices[1].delta.content | ||
-- - event_t.choices[1].text | ||
-- - "" | ||
return (first_choice.delta or EMPTY).content or first_choice.text or "" | ||
local token_text = (first_choice.delta or EMPTY).content or first_choice.text or "" | ||
return (type(token_text) == "string" and token_text) or "" | ||
end | ||
|
||
|
||
|
@@ -334,17 +335,25 @@ function _M:access(conf) | |
|
||
-- copy from the user request if present | ||
if (not multipart) and (not conf_m.model.name) and (request_table.model) then | ||
conf_m.model.name = request_table.model | ||
if request_table.model ~= cjson.null then | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should we check the case that the model in request is a blank string? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How about checking |
||
conf_m.model.name = request_table.model | ||
end | ||
elseif multipart then | ||
conf_m.model.name = "NOT_SPECIFIED" | ||
end | ||
|
||
-- check that the user isn't trying to override the plugin conf model in the request body | ||
if request_table and request_table.model and type(request_table.model) == "string" then | ||
if request_table.model ~= conf_m.model.name then | ||
return bad_request("cannot use own model - must be: " .. conf_m.model.name) | ||
end | ||
end | ||
|
||
-- model is stashed in the copied plugin conf, for consistency in transformation functions | ||
if not conf_m.model.name then | ||
return bad_request("model parameter not found in request, nor in gateway configuration") | ||
end | ||
|
||
-- stash for analytics later | ||
kong_ctx_plugin.llm_model_requested = conf_m.model.name | ||
|
||
-- check the incoming format is the same as the configured LLM format | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Let's rename this file to make it more appropriate