develop
/
network


			
							import json
import pandas as pd
import numpy as np


with open(r"D:/code/score_test/data/nlt_info_20230926.log",
          "r",
          encoding='ANSI') as f:
    data = f.read()
    data = data.split('\n')
    count = 0
    C = 0
    all = 0
    json_list = []
    for i in range(len(data)):
        all += 1
        temp = '{' + data[i] + '}'
        d = temp.find('"recommend')
        e = temp.find('"recommendlist')
        if d < 0:
            continue
        if d > 0 and e > 0:
            continue
        t = json.loads(temp)
        json_list.append(t)
    data1 = pd.DataFrame(json_list)
data2 = pd.read_csv('D:/code/score_test/data/20230829评分系统材料.csv')

questionDict = {}
for i in range(data2.shape[0]):
    temp = data2.loc[i, 'question']
    temp_answer = data2.loc[i, 'expected']
    if not temp_answer == '空':
        expected_list = temp_answer.split('/')
    else:
        expected_list = []
    questionDict[temp] = expected_list


dataw = []
for i in range(data1.shape[0]):
    temp = data1.loc[i, 'question']
    temp_answer = data1.loc[i, 'answers']
    for j in range(len(temp_answer)):
        if temp_answer[j][0] in questionDict[temp]:
            dataw.append([temp, temp_answer[j][0], 1])
        else:
            dataw.append([temp, temp_answer[j][0], 0])

datas = []
for i in dataw:
    if i not in datas:
        datas.append(i)
print(len(datas))
dd = pd.DataFrame(datas, columns=['question', 'answer', 'label'])
dd.to_csv('data.csv', encoding="utf_8_sig")