|
- import json
- import pandas as pd
- import numpy as np
-
-
- with open(r"D:/code/score_test/data/nlt_info_20230926.log",
- "r",
- encoding='ANSI') as f:
- data = f.read()
- data = data.split('\n')
- count = 0
- C = 0
- all = 0
- json_list = []
- for i in range(len(data)):
- all += 1
- temp = '{' + data[i] + '}'
- d = temp.find('"recommend')
- e = temp.find('"recommendlist')
- if d < 0:
- continue
- if d > 0 and e > 0:
- continue
- t = json.loads(temp)
- json_list.append(t)
- data1 = pd.DataFrame(json_list)
- data2 = pd.read_csv('D:/code/score_test/data/20230829评分系统材料.csv')
-
- questionDict = {}
- for i in range(data2.shape[0]):
- temp = data2.loc[i, 'question']
- temp_answer = data2.loc[i, 'expected']
- if not temp_answer == '空':
- expected_list = temp_answer.split('/')
- else:
- expected_list = []
- questionDict[temp] = expected_list
-
-
- dataw = []
- for i in range(data1.shape[0]):
- temp = data1.loc[i, 'question']
- temp_answer = data1.loc[i, 'answers']
- for j in range(len(temp_answer)):
- if temp_answer[j][0] in questionDict[temp]:
- dataw.append([temp, temp_answer[j][0], 1])
- else:
- dataw.append([temp, temp_answer[j][0], 0])
-
- datas = []
- for i in dataw:
- if i not in datas:
- datas.append(i)
- print(len(datas))
- dd = pd.DataFrame(datas, columns=['question', 'answer', 'label'])
- dd.to_csv('data.csv', encoding="utf_8_sig")
|