当前位置: 首页 > news >正文

OpenAI API JSON 格式指南与json_repair错误修复

核心参数是response_format={"type": "json_object"} ,其他支持json调用的模型也可以这样使用的,下面我们以Openai模型为例

指定OpenAI API返回JSON格式

基本JSON格式响应示例

import openaiclient = openai.OpenAI(api_key="your-api-key")response = client.chat.completions.create(model="gpt-4-turbo",response_format={"type": "json_object"},messages=[{"role": "system", "content": "你是一个返回JSON格式的助手。"},{"role": "user", "content": "返回包含用户名、年龄和爱好的JSON"}]
)print(response.choices[0].message.content)
# 输出示例:
# {
#   "name": "John Doe",
#   "age": 30,
#   "hobbies": ["reading", "hiking", "photography"]
# }

更复杂的结构化数据请求

response = client.chat.completions.create(model="gpt-4-turbo",response_format={"type": "json_object"},messages=[{"role": "system", "content": "你是一个返回JSON格式的助手。"},{"role": "user", "content": "生成5个用户的数据,包括姓名、电子邮件和订阅状态"}]
)print(response.choices[0].message.content)
# 输出示例:
# {
#   "users": [
#     {"id": 1, "name": "Alice Smith", "email": "alice@example.com", "subscribed": true},
#     {"id": 2, "name": "Bob Johnson", "email": "bob@example.com", "subscribed": false},
#     {"id": 3, "name": "Carol Williams", "email": "carol@example.com", "subscribed": true},
#     {"id": 4, "name": "David Brown", "email": "david@example.com", "subscribed": true},
#     {"id": 5, "name": "Eve Davis", "email": "eve@example.com", "subscribed": false}
#   ]
# }

使用函数调用确保JSON响应

response = client.chat.completions.create(model="gpt-4-turbo",messages=[{"role": "system", "content": "你是一个帮助用户的助手。"},{"role": "user", "content": "分析以下文本的情感:'我今天非常开心,但天气不太好'"}],tools=[{"type": "function","function": {"name": "analyze_sentiment","description": "分析文本的情感","parameters": {"type": "object","properties": {"text": {"type": "string", "description": "要分析的文本"},"sentiment": {"type": "string", "enum": ["positive", "negative", "neutral", "mixed"]},"confidence": {"type": "number", "description": "情感分析的置信度"},"details": {"type": "object","properties": {"positive_aspects": {"type": "array", "items": {"type": "string"}},"negative_aspects": {"type": "array", "items": {"type": "string"}}}}},"required": ["sentiment", "confidence"]}}}],tool_choice={"type": "function", "function": {"name": "analyze_sentiment"}}
)print(response.choices[0].message.tool_calls[0].function.arguments)
# 输出示例:
# {
#   "text": "我今天非常开心,但天气不太好",
#   "sentiment": "mixed",
#   "confidence": 0.85,
#   "details": {
#     "positive_aspects": ["今天非常开心"],
#     "negative_aspects": ["天气不太好"]
#   }
# }

处理特定场景的JSON返回格式

示例1: 中文内容的JSON格式


response = client.chat.completions.create(model="gpt-4-turbo",response_format={"type": "json_object"},messages=[{"role": "system", "content": "你是一个返回JSON格式的助手。"},{"role": "user", "content": "返回一个包含中文句子及其英文翻译的JSON数组"}]
)print(response.choices[0].message.content)
# 输出示例:
# {
#   "translations": [
#     {"chinese": "你好世界", "english": "Hello world"},
#     {"chinese": "很高兴认识你", "english": "Nice to meet you"},
#     {"chinese": "我爱学习编程", "english": "I love learning programming"}
#   ]
# }

示例2: 嵌套JSON结构

response = client.chat.completions.create(model="gpt-4-turbo",response_format={"type": "json_object"},messages=[{"role": "system", "content": "你是一个返回JSON格式的助手。"},{"role": "user", "content": "返回一个公司结构的JSON,包含部门和员工"}]
)print(response.choices[0].message.content)
# 输出示例:
# {
#   "company": {
#     "name": "Tech Solutions Inc.",
#     "founded": 2010,
#     "departments": [
#       {
#         "name": "Engineering",
#         "head": "Zhang Wei",
#         "employees": [
#           {"id": 101, "name": "李明", "position": "Senior Developer"},
#           {"id": 102, "name": "王芳", "position": "QA Engineer"}
#         ]
#       },
#       {
#         "name": "Marketing",
#         "head": "Sarah Johnson",
#         "employees": [
#           {"id": 201, "name": "刘青", "position": "Marketing Specialist"},
#           {"id": 202, "name": "陈晓", "position": "Content Writer"}
#         ]
#       }
#     ]
#   }
# }

示例3: 强制模型遵循特定JSON模式


def get_structured_data(query, schema):system_prompt = f"""你必须严格按照以下JSON模式返回数据:```{json.dumps(schema, ensure_ascii=False, indent=2)}```不要添加任何额外的字段,也不要省略任何必需的字段。不要在返回的JSON外包含任何其他文本、解释或注释。"""response = client.chat.completions.create(model="gpt-4-turbo",response_format={"type": "json_object"},messages=[{"role": "system", "content": system_prompt},{"role": "user", "content": query}])return response.choices[0].message.content# 定义一个特定的数据模式
product_schema = {"type": "object","properties": {"products": {"type": "array","items": {"type": "object","properties": {"id": {"type": "string"},"name": {"type": "string"},"price": {"type": "number"},"category": {"type": "string"},"inStock": {"type": "boolean"}},"required": ["id", "name", "price", "category", "inStock"]}}},"required": ["products"]
}result = get_structured_data("生成3个电子产品的详细信息", product_schema)
print(result)
# 输出示例:
# {
#   "products": [
#     {
#       "id": "EP001",
#       "name": "超薄笔记本电脑",
#       "price": 5999.99,
#       "category": "电脑",
#       "inStock": true
#     },
#     {
#       "id": "EP002",
#       "name": "智能手机",
#       "price": 3999.99,
#       "category": "手机",
#       "inStock": true
#     },
#     {
#       "id": "EP003",
#       "name": "无线耳机",
#       "price": 999.99,
#       "category": "音频设备",
#       "inStock": false
#     }
#   ]
# }

使用json_repair修复JSON错误示例

当OpenAI API返回的JSON格式有问题时,可以使用json_repair库修复这些错误。可以看到大部分简单的错误示例是可以直接修复的,有些语义难度大的确实比较难修复。以下是常见的JSON错误及其修复示例:

from json_repair import repair_json, loads
import json
  • 示例1: 修复单引号替代双引号的问题
bad_json1 = "{'name': 'John', 'age': 30, 'city': 'New York'}"
fixed_json1 = repair_json(bad_json1)
print("修复单引号:")
print(f"修复前: {bad_json1}")
print(f"修复后: {fixed_json1}")
print()
  • 示例2: 修复缺少引号的键
bad_json2 = "{name: 'John', age: 30, city: 'New York'}"
fixed_json2 = repair_json(bad_json2)
print("修复缺少引号的键:")
print(f"修复前: {bad_json2}")
print(f"修复后: {fixed_json2}")
print()
  • 示例3: 修复逗号问题
bad_json3 = '{"name": "John", "age": 30, "city": "New York",}'  # 结尾多余的逗号
fixed_json3 = repair_json(bad_json3)
print("修复多余的逗号:")
print(f"修复前: {bad_json3}")
print(f"修复后: {fixed_json3}")
print()
  • 示例4: 修复缺少大括号的问题
bad_json4 = '"name": "John", "age": 30, "city": "New York"'
fixed_json4 = repair_json(bad_json4)
print("修复缺少括号:")
print(f"修复前: {bad_json4}")
print(f"修复后: {fixed_json4}")
print()


这个直接失败了,没有还原大括号

  • 示例5: 修复非标准的布尔值或空值
bad_json5 = '{"name": "John", "active": True, "data": None}'
fixed_json5 = repair_json(bad_json5)
print("修复非标准的布尔值或空值:")
print(f"修复前: {bad_json5}")
print(f"修复后: {fixed_json5}")
print()
  • 示例6: 修复嵌套结构中的错误
bad_json6 = '{"user": {"name": "John", "contacts": {"email": "john@example.com", phone: "123-456-7890"}}}'
fixed_json6 = repair_json(bad_json6)
print("修复嵌套结构中的错误:")
print(f"修复前: {bad_json6}")
print(f"修复后: {fixed_json6}")
print()
  • 示例7: 修复数组中的错误
bad_json7 = '{"items": [1, 2, 3,, 4, 5]}'  # 数组中有多余的逗号
fixed_json7 = repair_json(bad_json7)
print("修复数组中的错误:")
print(f"修复前: {bad_json7}")
print(f"修复后: {fixed_json7}")
print()
  • 示例8: 修复不匹配的括号
bad_json8 = '{"name": "John", "items": [1, 2, 3}'  # 方括号没有闭合
fixed_json8 = repair_json(bad_json8)
print("修复不匹配的括号:")
print(f"修复前: {bad_json8}")
print(f"修复后: {fixed_json8}")
print()- 示例9: 修复中文等非ASCII字符的问题
```python
bad_json9 = "{'name': '张三', 'city': '北京'}"
fixed_json9 = repair_json(bad_json9, ensure_ascii=False)
print("修复包含中文的JSON并保留中文字符:")
print(f"修复前: {bad_json9}")
print(f"修复后: {fixed_json9}")
print()
  • 示例10: 直接获取Python对象而不是JSON字符串
bad_json10 = "{'name': 'John', 'age': 30, 'skills': ['Python', 'JavaScript']}"
fixed_obj10 = loads(bad_json10)  # 等同于 repair_json(bad_json10, return_objects=True)
print("直接获取Python对象:")
print(f"修复前: {bad_json10}")
print(f"修复后(Python对象): {fixed_obj10}")
print(f"对象类型: {type(fixed_obj10)}")
print()
  • 示例11: 处理严重破损的JSON
severely_broken_json = "{这不是有效的JSON,name: 'John', age: missing_value}"
try:fixed_severely_broken = repair_json(severely_broken_json)print("修复严重破损的JSON:")print(f"修复前: {severely_broken_json}")print(f"修复后: {fixed_severely_broken}")
except Exception as e:print(f"修复失败: {e}")
print()


这个其实修复失败了,主要是因为前一个字段确实有句话影响比较大,修复难度比较大。

  • 示例12: 处理包含注释的JSON (JSON标准不支持注释)
json_with_comments = """
{"name": "John", // 这是用户名"age": 30, /* 这是年龄 */"city": "New York"
}
"""
fixed_json_comments = repair_json(json_with_comments)
print("修复包含注释的JSON:")
print(f"修复前: {json_with_comments}")
print(f"修复后: {fixed_json_comments}")

还有一个场景,就是我们会经常遇到开头为```json

比如下面:


markdown_json = """```json
{"name": "张三","age": 30,"skills": ['Python', 'JavaScript', 'React'],"contact": {email: "zhangsan@example.com",phone: "123-456-7890"}
}
```"""

或者

broken_json = """{"products": [{"id": 1, "name": "笔记本电脑", "price": 5999.99},{"id": 2, "name": "智能手机", "price": 3999.99,},{"id": 3, name: "无线耳机", "price": 999.99}],"total_items": 3,"in_stock": True
}"""

我们可以用下面一个函数来去除前缀和后缀,然后再去修复

def repair_json_output(content: str) -> str:"""Repair and normalize JSON output.Args:content (str): String content that may contain JSONReturns:str: Repaired JSON string, or original content if not JSON"""content = content.strip()if content.startswith(("{", "[")) or "```json" in content or "```ts" in content:try:# If content is wrapped in ```json code block, extract the JSON partif content.startswith("```json"):content = content.removeprefix("```json")if content.startswith("```ts"):content = content.removeprefix("```ts")if content.endswith("```"):content = content.removesuffix("```")# Try to repair and parse JSONrepaired_content = json_repair.loads(content)return json.dumps(repaired_content, ensure_ascii=False)except Exception as e:logger.warning(f"JSON repair failed: {e}")return content
http://www.xdnf.cn/news/379279.html

相关文章:

  • 深入理解卷积神经网络的输入层:数据的起点与预处理核心
  • [Pandas]数据处理
  • MySQL 从入门到精通(六):视图全面详解 —— 虚拟表的灵活运用
  • PyTorch量化感知训练技术:模型压缩与高精度边缘部署实践
  • TDengine 在智能制造中的核心价值
  • 工控新宠| 触想Z系列工控机C款发布,方寸机身,智控万千
  • OSPF综合实验实验报告
  • 深度学习篇---MediaPipe 及其人体姿态估计模型详解
  • 广东省省考备考(第七天5.10)—言语:片段阅读(每日一练)
  • Vue插槽(Slots)详解
  • SkyReels-V2 视频生成
  • Cadence 高速系统设计流程及工具使用三
  • 加速pip下载:永久解决网络慢问题
  • 数据集-目标检测系列- 冥想 检测数据集 close_eye>> DataBall
  • AI实战笔记(1)AI 的 6 大核心方向 + 学习阶段路径
  • Linxu实验五——NFS服务器
  • WordPress插件targetsms存在远程命令执行漏洞(CVE-2025-3776)
  • 20250510-查看 Anaconda 配置的镜像源
  • redis未授权访问
  • [架构之美]从零开始整合Spring Boot与Maven(十五)
  • AUTODL Chatglm2 langchain 部署大模型聊天助手
  • C语言初阶秘籍6
  • 二分法和牛顿迭代法解方程实根,详解
  • 第十九节:图像梯度与边缘检测- Laplacian 算子
  • 「OC」源码学习——cache_t的原理探究
  • C32-编程案例用函数封装获取两个数的较大数
  • IPFS与去中心化存储:重塑数字世界的基石
  • nuscenes_devkit工具
  • Windows:Powershell的使用
  • 进阶二:基于HC-SR04和LCD1602的超声波测距