|
- import torch
- import dashscope
- from http import HTTPStatus
- from dashscope import TextEmbedding
- from torch import nn
- import torch.optim as optim
- import pandas as pd
- import numpy as np
- dashscope.api_key = 'sk-44ccc9ab5e754eddb545cade12b632cf'
- cache = {}
- answerCache = []
-
-
- def getem(question):
- global cache
- if question in cache.keys():
- return cache[question]
- resp = TextEmbedding.call(model=TextEmbedding.Models.text_embedding_v1,
- input=question,
- text_type='query')
- if resp.status_code == HTTPStatus.OK:
- cache[question] = resp['output']['embeddings'][0]['embedding']
- return resp['output']['embeddings'][0]['embedding']
-
-
-
-
- # 创建模型实例
- model = ConvNet()
-
- # 定义损失函数和优化器
- criterion = nn.MSELoss()
- optimizer = optim.Adam(model.parameters(), lr=0.001)
-
- train = []
- test = []
- dataall = pd.read_csv('data.csv')
- dataall = dataall.iloc[:, 1:4]
-
- dataall = dataall.sample(frac=1)
- train = dataall.iloc[0:300]
- train = train.reset_index(drop=True)
- test = dataall.iloc[300:]
- test = test.reset_index(drop=True)
- nlossLast = 0
- for i in range(10):
- nloss = 0
- for k in range(len(train)):
- va = getem(train.iloc[k]['question'])
- vb = getem(train.iloc[k]['answer'])
- if train.iloc[k]['answer'] not in answerCache:
- answerCache.append(train.iloc[k]['answer'])
- va = torch.Tensor(va).reshape([1, 1, 1536])
- vb = torch.Tensor(vb).reshape([1, 1, 1536])
- output = model(va, vb)
- # 计算损失
- if train.iloc[k]['label'] == 1:
- loss = criterion(output,
- torch.tensor(1).float())
- else:
- loss = criterion(output,
- torch.tensor(0).float())
- # 反向传播并更新权重
- optimizer.zero_grad()
- loss.backward()
- optimizer.step()
- nloss += loss
- if k % 50 == 0:
- print(i, k, 'done')
- print('one loop done', nloss/len(train))
-
-
- p = 0
- for i in range(len(test)):
- va = getem(test.iloc[i]['question'])
- Scores = np.zeros(len(answerCache))
- for j in range(len(answerCache)):
- vb = getem(answerCache[j])
- vaa = torch.Tensor(va).reshape([1, 1, len(va)])
- vbb = torch.Tensor(vb).reshape([1, 1, len(vb)])
- output = model(vaa, vbb)
- Scores[j] = output
- for k in range(2):
- if test.iloc[i]['label'] == 1:
- vc = test.iloc[i]['answer']
- else:
- vc = ''
- tt = Scores.argmax()
- if Scores[tt] > 0.5:
- vb = answerCache[tt]
- Scores[tt] = -1
- else:
- vb = ''
- if vb == vc:
- p += 1
- break
- print(p/len(test))
|