From 0dc4b0633cf746b742c36571d93fc1ee9ef31164 Mon Sep 17 00:00:00 2001 From: luisgulo Date: Fri, 1 Dec 2023 20:25:51 +0100 Subject: [PATCH] Subiendo ficheros --- .gitignore | 1 + Dockerfile | 19 ++++++++++++++ README.md | 0 genera_imagen.sh | 2 ++ ia.sh | 62 +++++++++++++++++++++++++++++++++++++++++++++ llama_cpu_server.py | 46 +++++++++++++++++++++++++++++++++ 6 files changed, 130 insertions(+) create mode 100644 .gitignore create mode 100644 Dockerfile create mode 100644 README.md create mode 100755 genera_imagen.sh create mode 100755 ia.sh create mode 100644 llama_cpu_server.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8036fc8 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +llama-2-7b-chat.Q2_K.gguf diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..6685641 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,19 @@ +# Use an official Python runtime as a parent image +FROM python + +# Set the working directory in the container +WORKDIR /app + +# Copy the current directory contents into the container at /app +COPY ./llama_cpu_server.py /app/llama_cpu_server.py +#COPY ./llama-2-7b-chat.Q2_K.gguf /app/llama-2-7b-chat.Q2_K.gguf + +# Install any needed packages specified in requirements.txt +RUN pip install llama-cpp-python +RUN pip install Flask + +# Expose port 5000 to the world outside this container +EXPOSE 5000 + +# Run app.py when the container launches +CMD ["python", "llama_cpu_server.py"] diff --git a/README.md b/README.md new file mode 100644 index 0000000..e69de29 diff --git a/genera_imagen.sh b/genera_imagen.sh new file mode 100755 index 0000000..df1a994 --- /dev/null +++ b/genera_imagen.sh @@ -0,0 +1,2 @@ + #!/bin/bash + docker build -t llama2 . diff --git a/ia.sh b/ia.sh new file mode 100755 index 0000000..9b01e0c --- /dev/null +++ b/ia.sh @@ -0,0 +1,62 @@ +#!/bin/bash +# Establecer VALORES API de ChatGPT Local +SCLGPT_URL="http://127.0.0.1:5000/llama" +MAX_TOKENS=100 +SALUDO_IA="Eres un asistente genial" #You are a helpful assistant + +if [ "$1" == "" ] ; then + MAX_TOKENS=100 +else + if [ $1 -gt 100 ] ; then + MAX_TOKENS=$1 + else + MAX_TOKENS=100 + fi +fi + +# Funciones docker IA +function StarIA() { + echo "Iniciando IA ..." + StopIA + #docker run -d --name sclgpt -ti -p 5000:5000 llama2 2>/dev/null 1>/dev/null + docker run -d --name sclgpt -ti -p 5000:5000 -v $(pwd)/llama-2-7b-chat.Q2_K.gguf:/app/llama-2-7b-chat.Q2_K.gguf llama2 +} + +function StopIA() { + docker stop sclgpt 2>/dev/null + docker rm sclgpt 2>/dev/null +} + +function TimeIA() { + echo -e "\nProcesado en: $(docker logs sclgpt |grep 'total time'|tail -1|awk '{print $5" milisegundos"}')" +} + +# ----- Main / Principal --------------------------- +StarIA +clear +echo +echo "----------------------------------------" +echo " Conversar con SoloConLinux-GPT [SCL-GPT]" +echo "----------------------------------------" +echo + +# Bucle principal del programa +while true; do + # Solicitar una pregunta al usuario + echo -n "[$USER] escribe tu pregunta (o 'salir'): " + read SCL_PREGUNTA + + # Si escribe "salir", salimos del programa + if [ "$SCL_PREGUNTA" == "salir" ]; then + break + fi + # Utiliza la herramienta "curl" en modo silencioso para enviar la pregunta al API de ChatGPT y obtener la respuesta del chatbot + echo -e "\n ... SCL-GPT pensando ...\n" + # Extrae la respuesta del chatbot de la respuesta JSON de la API + # Muestra la respuesta del chatbot en la consola + echo -e "[SCL-GPT]:\n$(curl -s $SCLGPT_URL -H "Content-Type: application/json" -d "{\"system_message\": \"$SALUDO_IA\", \"user_message\": \"$SCL_PREGUNTA\", \"max_tokens\": $MAX_TOKENS}" | grep '"text"' | sed 's/`//g' | awk -F 'SYS>>' '{print $3}' | awk -F 'INST]' '{print $2}')" + TimeIA + echo -e "\n----------------------------------------------------------------------------------" +done +StopIA + diff --git a/llama_cpu_server.py b/llama_cpu_server.py new file mode 100644 index 0000000..3a64a4f --- /dev/null +++ b/llama_cpu_server.py @@ -0,0 +1,46 @@ +from flask import Flask, request, jsonify +from llama_cpp import Llama + +# Create a Flask object +app = Flask("Llama server") +model = None + + +@app.route('/llama', methods=['POST']) +def generate_response(): + global model + + try: + data = request.get_json() + + # Check if the required fields are present in the JSON data + if 'system_message' in data and 'user_message' in data and 'max_tokens' in data: + system_message = data['system_message'] + user_message = data['user_message'] + max_tokens = int(data['max_tokens']) + + # Prompt creation + prompt = f"""[INST] <> + {system_message} + <> + {user_message} [/INST]""" + + # Create the model if it was not previously created + if model is None: + model_path = "./llama-2-7b-chat.Q2_K.gguf" + model = Llama(model_path=model_path) + + # Run the model + output = model(prompt, max_tokens=max_tokens, echo=True) + + return jsonify(output) + + else: + return jsonify({"error": "Missing required parameters"}), 400 + + except Exception as e: + return jsonify({"Error": str(e)}), 500 + +if __name__ == '__main__': + app.run(host='0.0.0.0', port=5000, debug=True) +