# OpenAI 兼容层 schema,供 server.yaml 通过 $ref 引用 swagger: '2.0' info: title: OpenAPI fragments — OpenAI-compatible schemas version: "0.0" definitions: # --- Completions /v1/completions --- # POST /v1/completions/prompt OpenAICompletionsPromptRequest: type: object additionalProperties: true required: - model - messages properties: model: type: string description: Model ID messages: description: OpenAI 形状消息数组;含 system/user/assistant/tool type: array items: type: object additionalProperties: true tools: description: 可选;注入 chat template 的工具 JSON schema 数组;省略时不注入 type: array items: type: object additionalProperties: true enable_thinking: description: 可选;是否启用 Qwen3 chat template 的 thinking 模式;省略或 false 时不插入 thinking 块 type: boolean enable_tool_calling: description: 已废弃;请传 tools 数组 type: boolean OpenAICompletionsPromptResponse: type: object required: - prompt_used properties: prompt_used: type: string description: 套用 chat template 后、与 POST /v1/completions 的 prompt 字段同义的完整模型输入 # POST /v1/completions/prompt-incremental OpenAICompletionsPromptIncrementalRequest: type: object required: - model - tool_content properties: model: type: string description: Model ID tool_content: type: string description: tool 消息的 content(mock 返回值) tool_name: type: string description: 可选;tool 消息的 name 字段 enable_thinking: description: 可选;与生成时一致;控制 generation scaffold 是否含 thinking 块 type: boolean OpenAICompletionsPromptIncrementalResponse: type: object required: - incremental_suffix properties: incremental_suffix: type: string description: wire 追加 tool response 及下一轮 generation scaffold 的字面量后缀 OpenAICompletionsRequest: type: object additionalProperties: true required: - model - prompt properties: model: type: string description: Model ID prompt: description: 已确定的完整模型输入(需 chat template 时请先 POST /v1/completions/prompt) type: string best_of: type: integer minimum: 0 maximum: 20 echo: type: boolean frequency_penalty: type: number minimum: -2 maximum: 2 logit_bias: type: object additionalProperties: type: number logprobs: type: integer minimum: 0 maximum: 5 max_tokens: description: 可选;正整数(> 0),且与 prompt 合计不超过续写接口的上下文 token 上限;省略则用满剩余额度 type: integer minimum: 1 n: type: integer minimum: 1 maximum: 128 presence_penalty: type: number minimum: -2 maximum: 2 seed: type: integer format: int64 stop: description: 停止序列,最多 4 个 type: - string - array items: type: string maxItems: 4 stream: type: boolean description: 本服务忽略;响应恒为 SSE stream_options: type: object properties: include_usage: type: boolean suffix: type: string temperature: type: number minimum: 0 maximum: 2 top_p: type: number minimum: 0 maximum: 1 user: type: string OpenAICompletionUsage: type: object properties: prompt_tokens: type: integer completion_tokens: type: integer total_tokens: type: integer completion_tokens_details: type: object additionalProperties: true prompt_tokens_details: type: object additionalProperties: true OpenAICompletionChoice: type: object properties: text: type: string index: type: integer logprobs: description: 无 logprobs 时为 null;否则含 text_offset、token_logprobs、tokens、top_logprobs type: object x-nullable: true finish_reason: type: string enum: - stop - length - content_filter x-nullable: true # bpe_strings 项与 server.yaml#/definitions/TokenWithOffset 一致(跨文件复用) InfoRadarCompletionPayload: type: object properties: bpe_strings: type: array items: $ref: "server.yaml#/definitions/TokenWithOffset" OpenAICompletionsResponse: type: object required: - id - object - created - model - choices properties: id: type: string object: type: string enum: - text_completion created: type: integer description: Unix 时间戳(秒) model: type: string choices: type: array items: $ref: "#/definitions/OpenAICompletionChoice" system_fingerprint: type: string usage: $ref: "#/definitions/OpenAICompletionUsage" info_radar: $ref: "#/definitions/InfoRadarCompletionPayload" description: 续写 token 级概率与 top-N,便于与信息密度可视化对齐