Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

98 rader
2.7 KiB

  1. import torch
  2. import dashscope
  3. from http import HTTPStatus
  4. from dashscope import TextEmbedding
  5. from torch import nn
  6. import torch.optim as optim
  7. import pandas as pd
  8. import numpy as np
  9. dashscope.api_key = 'sk-44ccc9ab5e754eddb545cade12b632cf'
  10. cache = {}
  11. answerCache = []
  12. def getem(question):
  13. global cache
  14. if question in cache.keys():
  15. return cache[question]
  16. resp = TextEmbedding.call(model=TextEmbedding.Models.text_embedding_v1,
  17. input=question,
  18. text_type='query')
  19. if resp.status_code == HTTPStatus.OK:
  20. cache[question] = resp['output']['embeddings'][0]['embedding']
  21. return resp['output']['embeddings'][0]['embedding']
  22. # 创建模型实例
  23. model = ConvNet()
  24. # 定义损失函数和优化器
  25. criterion = nn.MSELoss()
  26. optimizer = optim.Adam(model.parameters(), lr=0.001)
  27. train = []
  28. test = []
  29. dataall = pd.read_csv('data.csv')
  30. dataall = dataall.iloc[:, 1:4]
  31. dataall = dataall.sample(frac=1)
  32. train = dataall.iloc[0:300]
  33. train = train.reset_index(drop=True)
  34. test = dataall.iloc[300:]
  35. test = test.reset_index(drop=True)
  36. nlossLast = 0
  37. for i in range(10):
  38. nloss = 0
  39. for k in range(len(train)):
  40. va = getem(train.iloc[k]['question'])
  41. vb = getem(train.iloc[k]['answer'])
  42. if train.iloc[k]['answer'] not in answerCache:
  43. answerCache.append(train.iloc[k]['answer'])
  44. va = torch.Tensor(va).reshape([1, 1, 1536])
  45. vb = torch.Tensor(vb).reshape([1, 1, 1536])
  46. output = model(va, vb)
  47. # 计算损失
  48. if train.iloc[k]['label'] == 1:
  49. loss = criterion(output,
  50. torch.tensor(1).float())
  51. else:
  52. loss = criterion(output,
  53. torch.tensor(0).float())
  54. # 反向传播并更新权重
  55. optimizer.zero_grad()
  56. loss.backward()
  57. optimizer.step()
  58. nloss += loss
  59. if k % 50 == 0:
  60. print(i, k, 'done')
  61. print('one loop done', nloss/len(train))
  62. p = 0
  63. for i in range(len(test)):
  64. va = getem(test.iloc[i]['question'])
  65. Scores = np.zeros(len(answerCache))
  66. for j in range(len(answerCache)):
  67. vb = getem(answerCache[j])
  68. vaa = torch.Tensor(va).reshape([1, 1, len(va)])
  69. vbb = torch.Tensor(vb).reshape([1, 1, len(vb)])
  70. output = model(vaa, vbb)
  71. Scores[j] = output
  72. for k in range(2):
  73. if test.iloc[i]['label'] == 1:
  74. vc = test.iloc[i]['answer']
  75. else:
  76. vc = ''
  77. tt = Scores.argmax()
  78. if Scores[tt] > 0.5:
  79. vb = answerCache[tt]
  80. Scores[tt] = -1
  81. else:
  82. vb = ''
  83. if vb == vc:
  84. p += 1
  85. break
  86. print(p/len(test))