从2gis API我得到以下JSON字符串。
{
"api_version": "1.3",
"response_code": "200",
"id": "3237490513229753",
"lon": "38.969916127827",
"lat": "45.069889625267",
"page_url": null,
"name": "ATB",
"firm_group": {
"id": "3237499103085728",
"count": "1"
},
"city_name": "Krasnodar",
"city_id": "3237585002430511",
"address": "Turgeneva, 172/1",
"create_time": "2008-07-22 10:02:04 07",
"modification_time": "2013-08-09 20:04:36 07",
"see_also": [
{
"id": "3237491513434577",
"lon": 38.973110606808,
"lat": 45.029031222211,
"name": "Advance",
"hash": "5698hn745A8IJ1H86177uvgn94521J3464he26763737242Cf6e654G62J0I7878e",
"ads": {
"sponsored_article": {
"title": "Center "ADVANCE"",
"text": "Business.English."
},
"warning": null
}
}
]
}
但Python不承认它:
json.loads(firm_str)
期待,分隔符:第1行第3646行(字符3645)
看起来像引号中的问题:“标题”:“中心”ADVANCE“”
如何在Python中自动修复它?
发布于 2019-01-03 14:25:01
唯一真实和明确的解决方案是2gis来修复他们的API。
与此同时,可以修复字符串中编码错误的JSON转义双引号。如果每个键值对后跟换行符(因为它似乎来自发布的数据),则以下函数将执行以下操作:
def fixjson(badjson):
s = badjson
idx = 0
while True:
try:
start = s.index( '": "', idx) + 4
end1 = s.index( '",\n',idx)
end2 = s.index( '"\n', idx)
if end1 < end2:
end = end1
else:
end = end2
content = s[start:end]
content = content.replace('"', '\\"')
s = s[:start] + content + s[end:]
idx = start + len(content) + 6
except:
return s
请注意,有些假设:
该函数试图转义属于键值对的值字符串内的双引号字符。
假设要转义的文本在序列之后开始
": "
并在序列之前结束
",\n
要么
"\n
将发布的JSON传递给函数会产生此返回值
{
"api_version": "1.3",
"response_code": "200",
"id": "3237490513229753",
"lon": "38.969916127827",
"lat": "45.069889625267",
"page_url": null,
"name": "ATB",
"firm_group": {
"id": "3237499103085728",
"count": "1"
},
"city_name": "Krasnodar",
"city_id": "3237585002430511",
"address": "Turgeneva, 172/1",
"create_time": "2008-07-22 10:02:04 07",
"modification_time": "2013-08-09 20:04:36 07",
"see_also": [
{
"id": "3237491513434577",
"lon": 38.973110606808,
"lat": 45.029031222211,
"name": "Advance",
"hash": "5698hn745A8IJ1H86177uvgn94521J3464he26763737242Cf6e654G62J0I7878e",
"ads": {
"sponsored_article": {
"title": "Center \"ADVANCE\"",
"text": "Business.English."
},
"warning": null
}
}
]
}
请记住,如果您的需求不能完全满足,您可以轻松自定义功能。
发布于 2019-01-03 15:58:22
上面的想法很好,但我有问题。我的json Sting只包含一个额外的双引号。所以,我修复了上面给出的代码。
jsonStr是
{
"api_version": "1.3",
"response_code": "200",
"id": "3237490513229753",
"lon": "38.969916127827",
"lat": "45.069889625267",
"page_url": null,
"name": "ATB",
"firm_group": {
"id": "3237499103085728",
"count": "1"
},
"city_name": "Krasnodar",
"city_id": "3237585002430511",
"address": "Turgeneva, 172/1",
"create_time": "2008-07-22 10:02:04 07",
"modification_time": "2013-08-09 20:04:36 07",
"see_also": [
{
"id": "3237491513434577",
"lon": 38.973110606808,
"lat": 45.029031222211,
"name": "Advance",
"hash": "5698hn745A8IJ1H86177uvgn94521J3464he26763737242Cf6e654G62J0I7878e",
"ads": {
"sponsored_article": {
"title": "Center "ADVANCE",
"text": "Business.English."
},
"warning": null
}
}
]
}
修复方法如下:
import json, re
def fixJSON(jsonStr):
# Substitue all the backslash from JSON string.
jsonStr = re.sub(r'\\', '', jsonStr)
try:
return json.loads(jsonStr)
except ValueError:
while True:
# Search json string specifically for '"'
b = re.search(r'[\w|"]\s?(")\s?[\w|"]', jsonStr)
# If we don't find any the we come out of loop
if not b:
break
# Get the location of \"
s, e = b.span(1)
c = jsonStr[s:e]
# Replace \" with \'
c = c.replace('"',"'")
jsonStr = jsonStr[:s] + c + jsonStr[e:]
return json.loads(jsonStr)
此代码也适用于问题陈述中提到的JSON字符串
或者你也可以这样做:
def fixJSON(jsonStr):
# First remove the " from where it is supposed to be.
jsonStr = re.sub(r'\\', '', jsonStr)
jsonStr = re.sub(r'{"', '{`', jsonStr)
jsonStr = re.sub(r'"}', '`}', jsonStr)
jsonStr = re.sub(r'":"', '`:`', jsonStr)
jsonStr = re.sub(r'":', '`:', jsonStr)
jsonStr = re.sub(r'","', '`,`', jsonStr)
jsonStr = re.sub(r'",', '`,', jsonStr)
jsonStr = re.sub(r',"', ',`', jsonStr)
jsonStr = re.sub(r'\["', '\[`', jsonStr)
jsonStr = re.sub(r'"\]', '`\]', jsonStr)
# Remove all the unwanted " and replace with ' '
jsonStr = re.sub(r'"',' ', jsonStr)
# Put back all the " where it supposed to be.
jsonStr = re.sub(r'\`','\"', jsonStr)
return json.loads(jsonStr)
https://stackoverflow.com/questions/-100004124
复制相似问题