選択できるのは25トピックまでです。 トピックは、先頭が英数字で、英数字とダッシュ('-')を使用した35文字以内のものにしてください。

98 行
2.7 KiB

  1. import torch
  2. import dashscope
  3. from http import HTTPStatus
  4. from dashscope import TextEmbedding
  5. from torch import nn
  6. import torch.optim as optim
  7. import pandas as pd
  8. import numpy as np
  9. dashscope.api_key = 'sk-44ccc9ab5e754eddb545cade12b632cf'
  10. cache = {}
  11. answerCache = []
  12. def getem(question):
  13. global cache
  14. if question in cache.keys():
  15. return cache[question]
  16. resp = TextEmbedding.call(model=TextEmbedding.Models.text_embedding_v1,
  17. input=question,
  18. text_type='query')
  19. if resp.status_code == HTTPStatus.OK:
  20. cache[question] = resp['output']['embeddings'][0]['embedding']
  21. return resp['output']['embeddings'][0]['embedding']
  22. # 创建模型实例
  23. model = ConvNet()
  24. # 定义损失函数和优化器
  25. criterion = nn.MSELoss()
  26. optimizer = optim.Adam(model.parameters(), lr=0.001)
  27. train = []
  28. test = []
  29. dataall = pd.read_csv('data.csv')
  30. dataall = dataall.iloc[:, 1:4]
  31. dataall = dataall.sample(frac=1)
  32. train = dataall.iloc[0:300]
  33. train = train.reset_index(drop=True)
  34. test = dataall.iloc[300:]
  35. test = test.reset_index(drop=True)
  36. nlossLast = 0
  37. for i in range(10):
  38. nloss = 0
  39. for k in range(len(train)):
  40. va = getem(train.iloc[k]['question'])
  41. vb = getem(train.iloc[k]['answer'])
  42. if train.iloc[k]['answer'] not in answerCache:
  43. answerCache.append(train.iloc[k]['answer'])
  44. va = torch.Tensor(va).reshape([1, 1, 1536])
  45. vb = torch.Tensor(vb).reshape([1, 1, 1536])
  46. output = model(va, vb)
  47. # 计算损失
  48. if train.iloc[k]['label'] == 1:
  49. loss = criterion(output,
  50. torch.tensor(1).float())
  51. else:
  52. loss = criterion(output,
  53. torch.tensor(0).float())
  54. # 反向传播并更新权重
  55. optimizer.zero_grad()
  56. loss.backward()
  57. optimizer.step()
  58. nloss += loss
  59. if k % 50 == 0:
  60. print(i, k, 'done')
  61. print('one loop done', nloss/len(train))
  62. p = 0
  63. for i in range(len(test)):
  64. va = getem(test.iloc[i]['question'])
  65. Scores = np.zeros(len(answerCache))
  66. for j in range(len(answerCache)):
  67. vb = getem(answerCache[j])
  68. vaa = torch.Tensor(va).reshape([1, 1, len(va)])
  69. vbb = torch.Tensor(vb).reshape([1, 1, len(vb)])
  70. output = model(vaa, vbb)
  71. Scores[j] = output
  72. for k in range(2):
  73. if test.iloc[i]['label'] == 1:
  74. vc = test.iloc[i]['answer']
  75. else:
  76. vc = ''
  77. tt = Scores.argmax()
  78. if Scores[tt] > 0.5:
  79. vb = answerCache[tt]
  80. Scores[tt] = -1
  81. else:
  82. vb = ''
  83. if vb == vc:
  84. p += 1
  85. break
  86. print(p/len(test))