1
0

response.py 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189
  1. import json
  2. from uuid import uuid4
  3. from open_webui.utils.misc import (
  4. openai_chat_chunk_message_template,
  5. openai_chat_completion_message_template,
  6. )
  7. def convert_ollama_tool_call_to_openai(tool_calls: list) -> list:
  8. openai_tool_calls = []
  9. for tool_call in tool_calls:
  10. function = tool_call.get("function", {})
  11. openai_tool_call = {
  12. "index": tool_call.get("index", function.get("index", 0)),
  13. "id": tool_call.get("id", f"call_{str(uuid4())}"),
  14. "type": "function",
  15. "function": {
  16. "name": function.get("name", ""),
  17. "arguments": json.dumps(function.get("arguments", {})),
  18. },
  19. }
  20. openai_tool_calls.append(openai_tool_call)
  21. return openai_tool_calls
  22. def convert_ollama_usage_to_openai(data: dict) -> dict:
  23. return {
  24. "response_token/s": (
  25. round(
  26. (
  27. (
  28. data.get("eval_count", 0)
  29. / ((data.get("eval_duration", 0) / 10_000_000))
  30. )
  31. * 100
  32. ),
  33. 2,
  34. )
  35. if data.get("eval_duration", 0) > 0
  36. else "N/A"
  37. ),
  38. "prompt_token/s": (
  39. round(
  40. (
  41. (
  42. data.get("prompt_eval_count", 0)
  43. / ((data.get("prompt_eval_duration", 0) / 10_000_000))
  44. )
  45. * 100
  46. ),
  47. 2,
  48. )
  49. if data.get("prompt_eval_duration", 0) > 0
  50. else "N/A"
  51. ),
  52. "total_duration": data.get("total_duration", 0),
  53. "load_duration": data.get("load_duration", 0),
  54. "prompt_eval_count": data.get("prompt_eval_count", 0),
  55. "prompt_tokens": int(
  56. data.get("prompt_eval_count", 0)
  57. ), # This is the OpenAI compatible key
  58. "prompt_eval_duration": data.get("prompt_eval_duration", 0),
  59. "eval_count": data.get("eval_count", 0),
  60. "completion_tokens": int(
  61. data.get("eval_count", 0)
  62. ), # This is the OpenAI compatible key
  63. "eval_duration": data.get("eval_duration", 0),
  64. "approximate_total": (lambda s: f"{s // 3600}h{(s % 3600) // 60}m{s % 60}s")(
  65. (data.get("total_duration", 0) or 0) // 1_000_000_000
  66. ),
  67. "total_tokens": int( # This is the OpenAI compatible key
  68. data.get("prompt_eval_count", 0) + data.get("eval_count", 0)
  69. ),
  70. "completion_tokens_details": { # This is the OpenAI compatible key
  71. "reasoning_tokens": 0,
  72. "accepted_prediction_tokens": 0,
  73. "rejected_prediction_tokens": 0,
  74. },
  75. }
  76. def convert_response_ollama_to_openai(ollama_response: dict) -> dict:
  77. model = ollama_response.get("model", "ollama")
  78. message_content = ollama_response.get("message", {}).get("content", "")
  79. reasoning_content = ollama_response.get("message", {}).get("thinking", None)
  80. tool_calls = ollama_response.get("message", {}).get("tool_calls", None)
  81. openai_tool_calls = None
  82. if tool_calls:
  83. openai_tool_calls = convert_ollama_tool_call_to_openai(tool_calls)
  84. data = ollama_response
  85. usage = convert_ollama_usage_to_openai(data)
  86. response = openai_chat_completion_message_template(
  87. model, message_content, reasoning_content, openai_tool_calls, usage
  88. )
  89. return response
  90. async def convert_streaming_response_ollama_to_openai(ollama_streaming_response):
  91. async for data in ollama_streaming_response.body_iterator:
  92. data = json.loads(data)
  93. model = data.get("model", "ollama")
  94. message_content = data.get("message", {}).get("content", None)
  95. reasoning_content = data.get("message", {}).get("thinking", None)
  96. tool_calls = data.get("message", {}).get("tool_calls", None)
  97. openai_tool_calls = None
  98. if tool_calls:
  99. openai_tool_calls = convert_ollama_tool_call_to_openai(tool_calls)
  100. done = data.get("done", False)
  101. usage = None
  102. if done:
  103. usage = convert_ollama_usage_to_openai(data)
  104. data = openai_chat_chunk_message_template(
  105. model, message_content, reasoning_content, openai_tool_calls, usage
  106. )
  107. line = f"data: {json.dumps(data)}\n\n"
  108. yield line
  109. yield "data: [DONE]\n\n"
  110. def convert_embedding_response_ollama_to_openai(response) -> dict:
  111. """
  112. Convert the response from Ollama embeddings endpoint to the OpenAI-compatible format.
  113. Args:
  114. response (dict): The response from the Ollama API,
  115. e.g. {"embedding": [...], "model": "..."}
  116. or {"embeddings": [{"embedding": [...], "index": 0}, ...], "model": "..."}
  117. Returns:
  118. dict: Response adapted to OpenAI's embeddings API format.
  119. e.g. {
  120. "object": "list",
  121. "data": [
  122. {"object": "embedding", "embedding": [...], "index": 0},
  123. ...
  124. ],
  125. "model": "...",
  126. }
  127. """
  128. # Ollama batch-style output
  129. if isinstance(response, dict) and "embeddings" in response:
  130. openai_data = []
  131. for i, emb in enumerate(response["embeddings"]):
  132. openai_data.append(
  133. {
  134. "object": "embedding",
  135. "embedding": emb.get("embedding"),
  136. "index": emb.get("index", i),
  137. }
  138. )
  139. return {
  140. "object": "list",
  141. "data": openai_data,
  142. "model": response.get("model"),
  143. }
  144. # Ollama single output
  145. elif isinstance(response, dict) and "embedding" in response:
  146. return {
  147. "object": "list",
  148. "data": [
  149. {
  150. "object": "embedding",
  151. "embedding": response["embedding"],
  152. "index": 0,
  153. }
  154. ],
  155. "model": response.get("model"),
  156. }
  157. # Already OpenAI-compatible?
  158. elif (
  159. isinstance(response, dict)
  160. and "data" in response
  161. and isinstance(response["data"], list)
  162. ):
  163. return response
  164. # Fallback: return as is if unrecognized
  165. return response