| @@ -0,0 +1,160 @@ | |||
| # Byte-compiled / optimized / DLL files | |||
| __pycache__/ | |||
| *.py[cod] | |||
| *$py.class | |||
| # C extensions | |||
| *.so | |||
| # Distribution / packaging | |||
| .Python | |||
| build/ | |||
| develop-eggs/ | |||
| dist/ | |||
| downloads/ | |||
| eggs/ | |||
| .eggs/ | |||
| lib/ | |||
| lib64/ | |||
| parts/ | |||
| sdist/ | |||
| var/ | |||
| wheels/ | |||
| share/python-wheels/ | |||
| *.egg-info/ | |||
| .installed.cfg | |||
| *.egg | |||
| MANIFEST | |||
| # PyInstaller | |||
| # Usually these files are written by a python script from a template | |||
| # before PyInstaller builds the exe, so as to inject date/other infos into it. | |||
| *.manifest | |||
| *.spec | |||
| # Installer logs | |||
| pip-log.txt | |||
| pip-delete-this-directory.txt | |||
| # Unit test / coverage reports | |||
| htmlcov/ | |||
| .tox/ | |||
| .nox/ | |||
| .coverage | |||
| .coverage.* | |||
| .cache | |||
| nosetests.xml | |||
| coverage.xml | |||
| *.cover | |||
| *.py,cover | |||
| .hypothesis/ | |||
| .pytest_cache/ | |||
| cover/ | |||
| # Translations | |||
| *.mo | |||
| *.pot | |||
| # Django stuff: | |||
| *.log | |||
| local_settings.py | |||
| db.sqlite3 | |||
| db.sqlite3-journal | |||
| # Flask stuff: | |||
| instance/ | |||
| .webassets-cache | |||
| # Scrapy stuff: | |||
| .scrapy | |||
| # Sphinx documentation | |||
| docs/_build/ | |||
| # PyBuilder | |||
| .pybuilder/ | |||
| target/ | |||
| # Jupyter Notebook | |||
| .ipynb_checkpoints | |||
| # IPython | |||
| profile_default/ | |||
| ipython_config.py | |||
| # pyenv | |||
| # For a library or package, you might want to ignore these files since the code is | |||
| # intended to run in multiple environments; otherwise, check them in: | |||
| # .python-version | |||
| # pipenv | |||
| # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. | |||
| # However, in case of collaboration, if having platform-specific dependencies or dependencies | |||
| # having no cross-platform support, pipenv may install dependencies that don't work, or not | |||
| # install all needed dependencies. | |||
| #Pipfile.lock | |||
| # poetry | |||
| # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. | |||
| # This is especially recommended for binary packages to ensure reproducibility, and is more | |||
| # commonly ignored for libraries. | |||
| # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control | |||
| #poetry.lock | |||
| # pdm | |||
| # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. | |||
| #pdm.lock | |||
| # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it | |||
| # in version control. | |||
| # https://pdm.fming.dev/#use-with-ide | |||
| .pdm.toml | |||
| # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm | |||
| __pypackages__/ | |||
| # Celery stuff | |||
| celerybeat-schedule | |||
| celerybeat.pid | |||
| # SageMath parsed files | |||
| *.sage.py | |||
| # Environments | |||
| .env | |||
| .venv | |||
| env/ | |||
| venv/ | |||
| ENV/ | |||
| env.bak/ | |||
| venv.bak/ | |||
| # Spyder project settings | |||
| .spyderproject | |||
| .spyproject | |||
| # Rope project settings | |||
| .ropeproject | |||
| # mkdocs documentation | |||
| /site | |||
| # mypy | |||
| .mypy_cache/ | |||
| .dmypy.json | |||
| dmypy.json | |||
| # Pyre type checker | |||
| .pyre/ | |||
| # pytype static type analyzer | |||
| .pytype/ | |||
| # Cython debug symbols | |||
| cython_debug/ | |||
| # PyCharm | |||
| # JetBrains specific template is maintained in a separate JetBrains.gitignore that can | |||
| # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore | |||
| # and can be added to the global gitignore or merged into this file. For a more nuclear | |||
| # option (not recommended) you can uncomment the following to ignore the entire idea folder. | |||
| #.idea/ | |||
| @@ -0,0 +1,17 @@ | |||
| # coding=utf-8 | |||
| from embeddings.embedding import Embedding | |||
| import dashscope | |||
| from http import HTTPStatus | |||
| from dashscope import TextEmbedding | |||
| class Dashscopeembedding(Embedding): | |||
| def __init__(self, **param): | |||
| dashscope.api_key = param['api_key'] | |||
| def getem(self, question): | |||
| resp = TextEmbedding.call(model=TextEmbedding.Models.text_embedding_v1, | |||
| input=question, | |||
| text_type='query') | |||
| if resp.status_code == HTTPStatus.OK: | |||
| return resp['output']['embeddings'][0]['embedding'] | |||
| @@ -0,0 +1,12 @@ | |||
| # coding=utf-8 | |||
| from abc import ABC, abstractmethod | |||
| class Embedding(ABC): | |||
| @abstractmethod | |||
| def __init__(self, **param): | |||
| pass | |||
| @abstractmethod | |||
| def getem(self, question): | |||
| pass | |||
| @@ -0,0 +1,22 @@ | |||
| # coding=utf-8 | |||
| from embeddings.embedding import Embedding | |||
| import requests | |||
| import json | |||
| class Liandongembedding(Embedding): | |||
| def __init__(self, **param): | |||
| self.url = param['api_url'] | |||
| def getem(self, question): | |||
| headers = { | |||
| 'Content-Type': 'application/json', | |||
| 'Content-Length': '<calculated when request is sent>' | |||
| } | |||
| data = { | |||
| "text": question | |||
| } | |||
| result = requests.post(self.url, | |||
| headers=headers, | |||
| data=json.dumps(data)) | |||
| return json.loads(result.content)['data'] | |||
| @@ -0,0 +1,13 @@ | |||
| # coding=utf-8 | |||
| import zhipuai | |||
| from embeddings.embedding import Embedding | |||
| class Zhipuembedding(Embedding): | |||
| def __init__(self, **param): | |||
| zhipuai.api_key = param['api_key'] | |||
| def getem(self, question): | |||
| response = zhipuai.model_api.invoke(model="text_embedding", | |||
| prompt=question) | |||
| return response['data']['embedding'] | |||
| @@ -0,0 +1,222 @@ | |||
| # coding=utf-8 | |||
| import numpy as np | |||
| import configparser | |||
| from datetime import datetime | |||
| import json | |||
| from tools.logger import Logger | |||
| LTHRESHOLD = 0.4 | |||
| MTHRESHOLD = 0.6 | |||
| HTHRESHOLD = 0.8 | |||
| DB = 'xy-cloud1' | |||
| INDEX = 'dm_q_and_a,dm_questions' | |||
| cache = {} | |||
| class Knowledge(): | |||
| def __init__(self, model): | |||
| self.model = model + 'llm' | |||
| self.config = configparser.ConfigParser() | |||
| self.config.read("settings.ini", encoding="utf-8") | |||
| self.sourcename = self.config.get('config', 'source') + 'source' | |||
| source_config = dict(self.config.items(self.sourcename)) | |||
| source_config['index'] = INDEX | |||
| source_config['db'] = DB | |||
| self.sources = __import__('sources.%s' % self.sourcename, | |||
| fromlist=['sources']) | |||
| string = 'self.sources.' + self.sourcename.capitalize() | |||
| self.source = eval(string)(**source_config) | |||
| self.logger = Logger(self.config.get("config", "logger_path")) | |||
| self.emname = self.config.get('config', 'embedding') + 'embedding' | |||
| embedding_config = dict(self.config.items(self.emname)) | |||
| self.ems = __import__('embeddings.%s' % self.emname, | |||
| fromlist=['embeddings']) | |||
| string = 'self.ems.' + self.emname.capitalize() | |||
| self.embedding = eval(string)(**embedding_config) | |||
| self.llmname = self.model | |||
| llm_config = dict(self.config.items(self.llmname)) | |||
| self.llms = __import__('llms.%s' % self.llmname, | |||
| fromlist=['llms']) | |||
| string = 'self.llms.' + self.llmname.capitalize() | |||
| self.llm = eval(string)(**llm_config) | |||
| def combine(self, result, question, accurate, tenantId): | |||
| Result = {'code': 200, 'target': 1} | |||
| data = {} | |||
| if not result == '': | |||
| data['result'] = result | |||
| data['question'] = question | |||
| data['accurate'] = accurate | |||
| data['llm'] = self.config.get('config', 'model') | |||
| data['tenantId'] = tenantId | |||
| Result['data'] = data | |||
| return Result | |||
| def log_header(self, method): | |||
| timestamp = datetime.strftime(datetime.now(), '%Y-%m-%dT%H:%M:%S:%f') | |||
| timestamp = '"timestamp":"' + timestamp + '",' | |||
| tenant = '"tentant_id":' + str(self.tenant_id) + ',' | |||
| em = '"embedding":"' + self.emname + '",' | |||
| model = '"model":"' + self.llmname + '",' | |||
| source = '"source":"' + self.sourcename + '",' | |||
| questionstr = '"question":"' + self.question + '","answers":[' | |||
| logs = ',"method":' + method + ',' | |||
| logs = timestamp + tenant + em + model + source + logs + questionstr | |||
| return logs | |||
| def emsearch(self): | |||
| data_all = self.source.getdata(tenant_id=self.tenant_id) | |||
| logs = self.log_header('"emlist"') | |||
| for i in data_all: | |||
| item = '"' + i['name'] + '",' + i['id'] | |||
| logs = logs + '[' + item + '],' | |||
| logs = logs[0:-1] if logs[-1] == ',' else logs | |||
| self.logger.info(logs + ']') | |||
| em_list = [] | |||
| score_em_max = -2 | |||
| if len(data_all) > 0: | |||
| all_em_score = np.zeros(len(data_all)) | |||
| v1 = self.embedding.getem(self.question) | |||
| for i in range(len(data_all)): | |||
| if data_all[i]['name'] in cache.keys(): | |||
| v2 = cache[data_all[i]['name']] | |||
| else: | |||
| v2 = self.embedding.getem(data_all[i]['name']) | |||
| cache[data_all[i]['name']] = v2 | |||
| numerator = np.dot(v1, v2) | |||
| denominator = (np.linalg.norm(v1) * np.linalg.norm(v2)) | |||
| all_em_score[i] = numerator / denominator | |||
| logs = self.log_header('"emsearch"') | |||
| for i in range(6): | |||
| t = np.argmax(all_em_score) | |||
| if all_em_score[t] < LTHRESHOLD: | |||
| break | |||
| if i == 0: | |||
| score_em_max = all_em_score[t] | |||
| if all_em_score[t] <= -1: | |||
| break | |||
| em_list.append(data_all[t]) | |||
| item = '"' + data_all[t]['name'] + '",' | |||
| item = item + data_all[t]['id'] | |||
| item = item + ',%.3f' % all_em_score[t] | |||
| logs = logs + '[' + item + '],' | |||
| all_em_score[t] = -2 | |||
| logs = logs[0:-1] if logs[-1] == ',' else logs | |||
| self.logger.info(logs + ']') | |||
| return [em_list, score_em_max] | |||
| def recommend(self): | |||
| logs = self.log_header('"searchbegin"') | |||
| self.logger.info(logs + ']') | |||
| [result_list, score_em_max] = self.emsearch() | |||
| if score_em_max > HTHRESHOLD: | |||
| result_list[0]['highlight'] = 1 | |||
| logs = self.log_header('"recommend"') | |||
| for i in result_list: | |||
| logs = logs + '["' + i['name'] + '",' + i['id'] + '],' | |||
| logs = logs[0:-1] if logs[-1] == ',' else logs | |||
| self.logger.info(logs + ']') | |||
| return self.combine(result_list[0:4], | |||
| self.question, | |||
| 1, | |||
| self.tenant_id) | |||
| if score_em_max > MTHRESHOLD: | |||
| result_list[0]['highlight'] = 1 | |||
| logs = self.log_header('"recommendlist"') | |||
| for i in result_list: | |||
| logs = logs + '["' + i['name'] + '",' + i['id'] + '],' | |||
| logs = logs[0:-1] if logs[-1] == ',' else logs | |||
| self.logger.info(logs + ']') | |||
| if len(result_list) == 0: | |||
| logs = self.log_header('"recommend"') | |||
| self.logger.info(logs + ']') | |||
| return self.combine("", | |||
| self.question, | |||
| 0, | |||
| self.tenant_id) | |||
| if len(result_list) <= 4: | |||
| logs = self.log_header('"recommend"') | |||
| for i in result_list: | |||
| logs = logs + '["' + i['name'] + '",' + i['id'] + '],' | |||
| logs = logs[0:-1] if logs[-1] == ',' else logs | |||
| self.logger.info(logs + ']') | |||
| return self.combine(result_list, | |||
| self.question, | |||
| 0, | |||
| self.tenant_id) | |||
| if self.config.get('config', 'usellm') == "0": | |||
| logs = self.log_header('"recommend"') | |||
| for i in result_list: | |||
| logs = logs + '["' + i['name'] + '",' + i['id'] + '],' | |||
| logs = logs[0:-1] if logs[-1] == ',' else logs | |||
| self.logger.info(logs + ']') | |||
| return self.combine(result_list[0:4], | |||
| self.question, | |||
| 0, | |||
| self.tenant_id) | |||
| L = '' | |||
| for i in result_list: | |||
| L = L + '"' + i['name'] + '",' | |||
| L = L[0:-1] | |||
| Q1 = "请从列表{" | |||
| Q2 = "}选出与'" | |||
| Q3 = "'意图最接近的四句话,将结果以{question1:,question2:,question3:,question4:}输出。" | |||
| Q4 = '输出为JSON格式。答案只能来自于列表。不要返回代码。不要输出JSON之外的东西' | |||
| Q = Q1 + L + Q2 + self.question + Q3 + Q4 | |||
| logs = self.log_header('"llmin"') | |||
| self.logger.info(logs + '"' + Q + '"]') | |||
| answer, tokens = self.llm.link(Q) | |||
| logs = self.log_header('"llmout"') | |||
| tokens = str(tokens).replace("'", '"') | |||
| logs = logs[0:-1] + repr(answer) + ',"tokens":' + str(tokens) | |||
| self.logger.info(logs) | |||
| begin = answer.find('{') | |||
| end = answer.rfind('}') | |||
| answer = answer[begin:end+1] | |||
| answer = answer.replace('\\n', '') | |||
| answer = answer.replace('\\"', '"') | |||
| logs = self.log_header('"llmsearch"') | |||
| answer = answer.replace("'", '"') | |||
| try: | |||
| data = json.loads(answer) | |||
| result = [] | |||
| for key in data: | |||
| for i in result_list: | |||
| if data[key] == i['name']: | |||
| if i not in result: | |||
| result.append(i) | |||
| except Exception: | |||
| result = result_list[0:4] | |||
| err_logs = self.log_header('"llmsearch"') | |||
| err_logs = err_logs + '"Can not trans to JSON.]"' | |||
| self.logger.error(err_logs) | |||
| for i in result: | |||
| logs = logs + '["' + i['name'] + '",' + i['id'] + '],' | |||
| while len(result) < 4: | |||
| for i in result_list: | |||
| if i not in result: | |||
| result.append(i) | |||
| break | |||
| if len(result) > 4: | |||
| result = result[0:4] | |||
| logs = logs[0:-1] if logs[-1] == ',' else logs | |||
| self.logger.info(logs + ']') | |||
| logs = self.log_header('"recommend"') | |||
| for i in result: | |||
| logs = logs + '["' + i['name'] + '",' + i['id'] + '],' | |||
| logs = logs[0:-1] if logs[-1] == ',' else logs | |||
| self.logger.info(logs + ']') | |||
| return self.combine(result, | |||
| self.question, | |||
| 0, | |||
| self.tenant_id) | |||
| @@ -0,0 +1,44 @@ | |||
| # coding=utf-8 | |||
| from llms.llm import Llm | |||
| import json | |||
| import requests | |||
| class Baidullm(Llm): | |||
| def __init__(self, **param): | |||
| self.access_url = param['access_url'] | |||
| self.api_url = param['api_url'] | |||
| self.api_key = param['api_key'] | |||
| self.api_secret = param['api_secret'] | |||
| def get_access_token(self): | |||
| # 获取accesstoken | |||
| url = self.access_url + self.api_key | |||
| url = url + '&client_secret=' + self.api_secret | |||
| payload = json.dumps("") | |||
| headers = { | |||
| 'Content-Type': 'application/json', | |||
| 'Accept': 'application/json' | |||
| } | |||
| response = requests.request("POST", url, headers=headers, data=payload) | |||
| return response.json().get("access_token") | |||
| def link(self, question): | |||
| url = self.api_url + self.get_access_token() | |||
| payload = json.dumps({ | |||
| "messages": [ | |||
| { | |||
| "role": "user", | |||
| "content": question | |||
| } | |||
| ] | |||
| }) | |||
| headers = { | |||
| 'Content-Type': 'application/json' | |||
| } | |||
| response = requests.request("POST", url, headers=headers, data=payload) | |||
| data = json.loads(response.text) | |||
| answer = data['result'] | |||
| tokens = data['usage']['total_tokens'] | |||
| return answer, tokens | |||
| @@ -0,0 +1,29 @@ | |||
| # coding=utf-8 | |||
| from llms.llm import Llm | |||
| import requests | |||
| import json | |||
| class Liandongllm(Llm): | |||
| def __init__(self, **param): | |||
| self.api_url = param['api_url'] | |||
| def link(self, question): | |||
| headers = { | |||
| 'Content-Type': 'json', | |||
| 'Content-Length': '<calculated when request is sent>' | |||
| } | |||
| data = { | |||
| "category": "bj_unicom", | |||
| "messages": [ | |||
| { | |||
| "role": "user", | |||
| "content": question | |||
| } | |||
| ] | |||
| } | |||
| result = requests.post(self.api_url, | |||
| headers=headers, | |||
| data=json.dumps(data)) | |||
| result_json = json.loads(result.text) | |||
| return result_json['data'][0]['text'], 0 | |||
| @@ -0,0 +1,12 @@ | |||
| # coding=utf-8 | |||
| from abc import ABC, abstractmethod | |||
| class Llm(ABC): | |||
| @abstractmethod | |||
| def __init__(self, **param): | |||
| pass | |||
| @abstractmethod | |||
| def link(self, question): | |||
| pass | |||
| @@ -0,0 +1,154 @@ | |||
| # coding=utf-8 | |||
| import _thread as thread | |||
| import base64 | |||
| import hashlib | |||
| import hmac | |||
| import json | |||
| from urllib.parse import urlparse | |||
| import ssl | |||
| from datetime import datetime | |||
| from time import mktime | |||
| from urllib.parse import urlencode | |||
| from wsgiref.handlers import format_date_time | |||
| import websocket | |||
| from llms.llm import Llm | |||
| S = '' | |||
| tokens = '' | |||
| class Ws_Param(object): | |||
| def __init__(self, APPID, APIKey, APISecret, gpt_url): | |||
| self.APPID = APPID | |||
| self.APIKey = APIKey | |||
| self.APISecret = APISecret | |||
| self.host = urlparse(gpt_url).netloc | |||
| self.path = urlparse(gpt_url).path | |||
| self.gpt_url = gpt_url | |||
| def create_url(self): | |||
| now = datetime.now() | |||
| date = format_date_time(mktime(now.timetuple())) | |||
| signature_origin = "host: " + self.host + "\n" | |||
| signature_origin += "date: " + date + "\n" | |||
| signature_origin += "GET " + self.path + " HTTP/1.1" | |||
| signature_sha = hmac.new(self.APISecret.encode('utf-8'), | |||
| signature_origin.encode('utf-8'), | |||
| digestmod=hashlib.sha256).digest() | |||
| signature_sha_base64 = base64.b64encode(signature_sha) | |||
| signature_sha_base64 = signature_sha_base64.decode(encoding='utf-8') | |||
| api_key = f'api_key="{self.APIKey}", ' | |||
| algorithm = 'algorithm="hmac-sha256", ' | |||
| headers = 'headers="host date request-line", ' | |||
| signature = f'signature="{signature_sha_base64}"' | |||
| authorization_origin = api_key + algorithm + headers + signature | |||
| authorization = base64.b64encode(authorization_origin.encode('utf-8')) | |||
| authorization = authorization.decode(encoding='utf-8') | |||
| v = { | |||
| "authorization": authorization, | |||
| "date": date, | |||
| "host": self.host | |||
| } | |||
| url = self.gpt_url + '?' + urlencode(v) | |||
| return url | |||
| def on_error(ws, error): | |||
| print("### error:", error) | |||
| def on_close(ws): | |||
| print("### closed ###") | |||
| def on_open(ws): | |||
| thread.start_new_thread(run, (ws,)) | |||
| def run(ws, *args): | |||
| data = json.dumps(gen_params(appid=ws.appid, | |||
| question=ws.question, | |||
| domain=ws.domain)) | |||
| ws.send(data) | |||
| def on_message(ws, message): | |||
| global S | |||
| global tokens | |||
| data = json.loads(message) | |||
| code = data['header']['code'] | |||
| if code != 0: | |||
| print(f'request fail: {code}, {data}') | |||
| ws.close() | |||
| else: | |||
| choices = data["payload"]["choices"] | |||
| status = choices["status"] | |||
| content = choices["text"][0]["content"] | |||
| S = S + content | |||
| if 'usage' in data['payload'].keys(): | |||
| tokens = data['payload']['usage']['text']['total_tokens'] | |||
| if status == 2: | |||
| ws.close() | |||
| def gen_params(appid, question, domain): | |||
| data = { | |||
| "header": { | |||
| "app_id": appid, | |||
| "uid": "1234" | |||
| }, | |||
| "parameter": { | |||
| "chat": { | |||
| "domain": domain, | |||
| "random_threshold": 0.5, | |||
| "max_tokens": 2048, | |||
| "auditing": "default" | |||
| } | |||
| }, | |||
| "payload": { | |||
| "message": { | |||
| "text": [ | |||
| {"role": "user", "content": question} | |||
| ] | |||
| } | |||
| } | |||
| } | |||
| return data | |||
| class Sparkllm(Llm): | |||
| def __init__(self, **param): | |||
| self.api_url = param['api_url'] | |||
| self.app_id = param['app_id'] | |||
| self.api_key = param['api_key'] | |||
| self.api_secret = param['api_secret'] | |||
| self.domain = param['domain'] | |||
| def link(self, question): | |||
| global tokens | |||
| global S | |||
| wsParam = Ws_Param(self.app_id, | |||
| self.api_key, | |||
| self.api_secret, | |||
| self.api_url) | |||
| websocket.enableTrace(False) | |||
| wsUrl = wsParam.create_url() | |||
| ws = websocket.WebSocketApp(wsUrl, | |||
| on_message=on_message, | |||
| on_error=on_error, | |||
| on_close=on_close, | |||
| on_open=on_open) | |||
| ws.appid = self.app_id | |||
| ws.question = question | |||
| ws.domain = self.domain | |||
| ws.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE}) | |||
| a = S | |||
| t = tokens | |||
| tokens = '' | |||
| S = '' | |||
| return a, t | |||
| @@ -0,0 +1,17 @@ | |||
| # coding=utf-8 | |||
| import zhipuai | |||
| from llms.llm import Llm | |||
| class Zhipullm(Llm): | |||
| def __init__(self, **param): | |||
| zhipuai.api_key = param['api_key'] | |||
| def link(self, question): | |||
| response = zhipuai.model_api.invoke( | |||
| model="chatglm_std", | |||
| prompt=[{"role": "user", "content": question}], | |||
| temperature=0.9) | |||
| answer = response['data']['choices'][0]['content'] | |||
| tokens = response['data']['usage']['total_tokens'] | |||
| return answer, tokens | |||
| @@ -0,0 +1,63 @@ | |||
| # -*- coding: utf-8 -*- | |||
| import redis | |||
| import json | |||
| import copy | |||
| import configparser | |||
| from knowledge import Knowledge | |||
| from sources import essource | |||
| redis_conn = redis.Redis( | |||
| host='192.168.10.244', | |||
| port=6381, | |||
| db=0, | |||
| decode_responses=True) | |||
| config = configparser.ConfigParser() | |||
| config.read("settings.ini", encoding="utf-8") | |||
| llmname = ['baidu', 'spark', 'zhipu'] | |||
| llmdict = {} | |||
| kldict = {} | |||
| for i in llmname: | |||
| llm_config = dict(config.items(i + 'llm')) | |||
| llms = __import__('llms.%s' % i + 'llm', | |||
| fromlist=['llms']) | |||
| string = 'llms.' + (i + 'llm').capitalize() | |||
| llmdict[i] = eval(string)(**llm_config) | |||
| kldict[i] = Knowledge(i) | |||
| con = dict(config.items('essource')) | |||
| con['index'] = 'a' | |||
| es = essource.Essource(**con) | |||
| while True: | |||
| a = redis_conn.lpop("es_question_mark_log") | |||
| if a is not None: | |||
| jo = json.loads(a) | |||
| types = jo['type'] | |||
| for k in llmname: | |||
| if k == jo['llm']: | |||
| if jo['like'] < 0: | |||
| redis_conn.rpush('es_question_marked_log', a) | |||
| else: | |||
| es.upload(a) | |||
| else: | |||
| b = copy.deepcopy(jo) | |||
| if b['type'] == 0: | |||
| b['answer'] = llmdict[k].link(b['question'])[0] | |||
| else: | |||
| temp = kldict[k].recommend(b['tenant_id'], | |||
| b['question']) | |||
| if len(temp['data']) > 0: | |||
| answers = [] | |||
| for tempanswer in temp['data']['result']: | |||
| answers.append(tempanswer['name']) | |||
| b['answer'] = answers | |||
| else: | |||
| continue | |||
| b['llm'] = k | |||
| b['like'] = -1 | |||
| print(b) | |||
| redis_conn.rpush('es_question_marked_log', str(b)) | |||
| else: | |||
| print("none") | |||
| break | |||
| @@ -0,0 +1,9 @@ | |||
| dashscope==1.5.0 | |||
| elasticsearch==8.9.0 | |||
| Flask==2.2.2 | |||
| numpy==1.23.5 | |||
| PyMySQL==1.0.2 | |||
| Requests==2.31.0 | |||
| websocket_client==0.58.0 | |||
| zhipuai==1.0.7 | |||
| werkzeug==2.2.2 | |||
| @@ -0,0 +1,33 @@ | |||
| [config] | |||
| source=sql | |||
| model=spark | |||
| embedding=dashscope | |||
| logger_path=/usr/share/knowledge/ | |||
| usellm=1 | |||
| [dashscopeembedding] | |||
| api_key=sk-44ccc9ab5e754eddb545cade12b632cf | |||
| [zhipuembedding] | |||
| api_key=e1c759ec620a9045360d7a90d697b88f.pjn1S2MqSS8lNmzj | |||
| [liandongembedding] | |||
| api_url=http://125.34.89.79:8000/index/embeddings | |||
| [essource] | |||
| api_url=http://8.140.53.237:9200 | |||
| [sqlsource] | |||
| api_url=192.168.10.244:3308 | |||
| api_id=root | |||
| api_password=Digimeta@123 | |||
| [sparkllm] | |||
| api_url=ws://spark-api.xf-yun.com/v2.1/chat | |||
| domain=generalv2 | |||
| app_id=948cf4b6 | |||
| api_key=54f6e81f40a31d66d976496de895a7a4 | |||
| api_secret=ZDYyMjNmMTlkYTE0YWRmOWUwZTYxNjYz | |||
| [baidullm] | |||
| access_url=https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id= | |||
| api_key=TUuGVkpzzotFAhIIGIa0OCUO | |||
| api_secret=fv6LTCRcYhtxYb4Frs55jttOYICenCQG | |||
| api_url=https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/eb-instant?access_token= | |||
| [liandongllm] | |||
| api_url=http://125.34.89.79:8000/search/questionAnswer | |||
| [zhipullm] | |||
| api_key=e1c759ec620a9045360d7a90d697b88f.pjn1S2MqSS8lNmzj | |||
| @@ -0,0 +1,45 @@ | |||
| # coding=utf-8 | |||
| from sources.source import Source | |||
| from elasticsearch import Elasticsearch | |||
| from datetime import datetime | |||
| class Essource(Source): | |||
| def __init__(self, **param): | |||
| self.es = Elasticsearch(param['api_url']) | |||
| self.index = param['index'].split(',') | |||
| def getdata(self, **param): | |||
| result_list = [] | |||
| query = { | |||
| "bool": { | |||
| "must": [ | |||
| { | |||
| "term": {"tenant_id": param['tenant_id']} | |||
| }, | |||
| { | |||
| "term": {"del_flag": 0} | |||
| }, | |||
| { | |||
| "term": {"status": '0'} | |||
| } | |||
| ] | |||
| } | |||
| } | |||
| result = self.es.search(index=self.index, | |||
| query=query, | |||
| size=10000) | |||
| for items in result['hits']['hits']: | |||
| temp = {"name": items['_source']['name'], | |||
| "answer": items['_source']['answer'], | |||
| "id": items['_id'], | |||
| "model_type": items['_source']['model_type'], | |||
| "knowledge_lib": items['_source']['knowledge_lib']} | |||
| result_list.append(temp) | |||
| return result_list | |||
| def upload(self, msg): | |||
| timestamp = datetime.strftime(datetime.now(), '%Y%m%d%H%M%S%f') | |||
| self.es.create(index=self.index, | |||
| id=timestamp, | |||
| document=msg) | |||
| @@ -0,0 +1,12 @@ | |||
| # coding=utf-8 | |||
| from abc import ABC, abstractmethod | |||
| class Source(ABC): | |||
| @abstractmethod | |||
| def __init__(self, **param): | |||
| pass | |||
| @abstractmethod | |||
| def getdata(self, **param): | |||
| pass | |||
| @@ -0,0 +1,41 @@ | |||
| # coding=utf-8 | |||
| from sources.source import Source | |||
| import pymysql | |||
| class Sqlsource(Source): | |||
| def __init__(self, **param): | |||
| self.index = param['index'].split(',') | |||
| self.conn = pymysql.connect(host=param['api_url'].split(':')[0], | |||
| port=int(param['api_url'].split(':')[1]), | |||
| user=param['api_id'], | |||
| password=param['api_password'], | |||
| db=param['db'], | |||
| charset='utf8') | |||
| self.cursor = self.conn.cursor() | |||
| def getdata(self, **param): | |||
| result_list = [] | |||
| sql = 'SELECT a.name,a.answer,a.id,a.type,b.name FROM ' | |||
| sql = sql + '%s as a ' % self.index[0] | |||
| sql = sql + 'LEFT JOIN %s as b ON a.knowledge_id=b.id ' % self.index[1] | |||
| sql = sql + 'WHERE a.del_flag=0 and a.status=0' | |||
| sql = sql + ' and a.tenant_id=%s;' % param['tenant_id'] | |||
| self.cursor.execute(sql) | |||
| results = self.cursor.fetchall() | |||
| for items in results: | |||
| temp = {"name": items[0], | |||
| "answer": items[1], | |||
| "id": str(items[2]), | |||
| "model_type": items[3], | |||
| "knowledge_lib": items[4]} | |||
| result_list.append(temp) | |||
| return result_list | |||
| def getten(self, devId): | |||
| sql = 'SELECT t_id FROM %s' % self.index[0] | |||
| sql = sql + ' WHERE device_id="%s"' % devId | |||
| self.cursor.execute(sql) | |||
| results = self.cursor.fetchall() | |||
| return results | |||
| @@ -0,0 +1,28 @@ | |||
| import json | |||
| from kafka import KafkaProducer, KafkaConsumer | |||
| from kafka.errors import kafka_errors | |||
| import traceback | |||
| class Kafka(): | |||
| def __init__(self): | |||
| self.kafkaCon = KafkaConsumer('es_question_mark_log', | |||
| bootstrap_servers='8.140.53.237:9092', | |||
| api_version=(0, 11, 5), | |||
| group_id='test_kl') | |||
| self.kafkaPro = KafkaProducer(bootstrap_servers=['8.140.53.237:9092'], | |||
| api_version=(0, 11, 5), | |||
| key_serializer=lambda k: json.dumps(k).encode(), | |||
| value_serializer=lambda v: json.dumps(v).encode()) | |||
| def upload(self, key, msg): | |||
| request = self.kafkaPro.send("es_question_mark_log", | |||
| key=key, | |||
| value=str(msg)) | |||
| try: | |||
| request.get(timeout=10) | |||
| except kafka_errors: | |||
| traceback.format_exc() | |||
| def download(self): | |||
| return self.kafkaCon | |||
| @@ -0,0 +1,17 @@ | |||
| # coding=utf-8 | |||
| class Logger(): | |||
| def __init__(self, logger_path): | |||
| self.info_path = logger_path + 'nlt_info.log' | |||
| self.error_path = logger_path + 'nlt_error.log' | |||
| def info(self, msg): | |||
| f = open(self.info_path, 'a') | |||
| f.write(msg) | |||
| f.write('\n') | |||
| f.close() | |||
| def error(self, msg): | |||
| f = open(self.error_path, 'a') | |||
| f.write(msg) | |||
| f.write('\n') | |||
| f.close() | |||
| @@ -0,0 +1,18 @@ | |||
| # coding=utf-8 | |||
| import threading | |||
| # 自定义线程 | |||
| class MyThread(threading.Thread): | |||
| def __init__(self, func, args=()): | |||
| super(MyThread, self).__init__() | |||
| self.func = func | |||
| self.args = args | |||
| self.result = [[], -1] | |||
| def run(self): | |||
| self.result = self.func() | |||
| def get_result(self): | |||
| threading.Thread.join(self) | |||
| return self.result | |||