You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

57 lines
1.5 KiB

  1. import json
  2. import pandas as pd
  3. import numpy as np
  4. with open(r"D:/code/score_test/data/nlt_info_20230926.log",
  5. "r",
  6. encoding='ANSI') as f:
  7. data = f.read()
  8. data = data.split('\n')
  9. count = 0
  10. C = 0
  11. all = 0
  12. json_list = []
  13. for i in range(len(data)):
  14. all += 1
  15. temp = '{' + data[i] + '}'
  16. d = temp.find('"recommend')
  17. e = temp.find('"recommendlist')
  18. if d < 0:
  19. continue
  20. if d > 0 and e > 0:
  21. continue
  22. t = json.loads(temp)
  23. json_list.append(t)
  24. data1 = pd.DataFrame(json_list)
  25. data2 = pd.read_csv('D:/code/score_test/data/20230829评分系统材料.csv')
  26. questionDict = {}
  27. for i in range(data2.shape[0]):
  28. temp = data2.loc[i, 'question']
  29. temp_answer = data2.loc[i, 'expected']
  30. if not temp_answer == '空':
  31. expected_list = temp_answer.split('/')
  32. else:
  33. expected_list = []
  34. questionDict[temp] = expected_list
  35. dataw = []
  36. for i in range(data1.shape[0]):
  37. temp = data1.loc[i, 'question']
  38. temp_answer = data1.loc[i, 'answers']
  39. for j in range(len(temp_answer)):
  40. if temp_answer[j][0] in questionDict[temp]:
  41. dataw.append([temp, temp_answer[j][0], 1])
  42. else:
  43. dataw.append([temp, temp_answer[j][0], 0])
  44. datas = []
  45. for i in dataw:
  46. if i not in datas:
  47. datas.append(i)
  48. print(len(datas))
  49. dd = pd.DataFrame(datas, columns=['question', 'answer', 'label'])
  50. dd.to_csv('data.csv', encoding="utf_8_sig")