Subiendo ficheros

This commit is contained in:
luisgulo 2023-12-01 20:25:51 +01:00
commit 0dc4b0633c
6 changed files with 130 additions and 0 deletions

1
.gitignore vendored Normal file
View file

@ -0,0 +1 @@
llama-2-7b-chat.Q2_K.gguf

19
Dockerfile Normal file
View file

@ -0,0 +1,19 @@
# Use an official Python runtime as a parent image
FROM python
# Set the working directory in the container
WORKDIR /app
# Copy the current directory contents into the container at /app
COPY ./llama_cpu_server.py /app/llama_cpu_server.py
#COPY ./llama-2-7b-chat.Q2_K.gguf /app/llama-2-7b-chat.Q2_K.gguf
# Install any needed packages specified in requirements.txt
RUN pip install llama-cpp-python
RUN pip install Flask
# Expose port 5000 to the world outside this container
EXPOSE 5000
# Run app.py when the container launches
CMD ["python", "llama_cpu_server.py"]

0
README.md Normal file
View file

2
genera_imagen.sh Executable file
View file

@ -0,0 +1,2 @@
#!/bin/bash
docker build -t llama2 .

62
ia.sh Executable file
View file

@ -0,0 +1,62 @@
#!/bin/bash
# Establecer VALORES API de ChatGPT Local
SCLGPT_URL="http://127.0.0.1:5000/llama"
MAX_TOKENS=100
SALUDO_IA="Eres un asistente genial" #You are a helpful assistant
if [ "$1" == "" ] ; then
MAX_TOKENS=100
else
if [ $1 -gt 100 ] ; then
MAX_TOKENS=$1
else
MAX_TOKENS=100
fi
fi
# Funciones docker IA
function StarIA() {
echo "Iniciando IA ..."
StopIA
#docker run -d --name sclgpt -ti -p 5000:5000 llama2 2>/dev/null 1>/dev/null
docker run -d --name sclgpt -ti -p 5000:5000 -v $(pwd)/llama-2-7b-chat.Q2_K.gguf:/app/llama-2-7b-chat.Q2_K.gguf llama2
}
function StopIA() {
docker stop sclgpt 2>/dev/null
docker rm sclgpt 2>/dev/null
}
function TimeIA() {
echo -e "\nProcesado en: $(docker logs sclgpt |grep 'total time'|tail -1|awk '{print $5" milisegundos"}')"
}
# ----- Main / Principal ---------------------------
StarIA
clear
echo
echo "----------------------------------------"
echo " Conversar con SoloConLinux-GPT [SCL-GPT]"
echo "----------------------------------------"
echo
# Bucle principal del programa
while true; do
# Solicitar una pregunta al usuario
echo -n "[$USER] escribe tu pregunta (o 'salir'): "
read SCL_PREGUNTA
# Si escribe "salir", salimos del programa
if [ "$SCL_PREGUNTA" == "salir" ]; then
break
fi
# Utiliza la herramienta "curl" en modo silencioso para enviar la pregunta al API de ChatGPT y obtener la respuesta del chatbot
echo -e "\n ... SCL-GPT pensando ...\n"
# Extrae la respuesta del chatbot de la respuesta JSON de la API
# Muestra la respuesta del chatbot en la consola
echo -e "[SCL-GPT]:\n$(curl -s $SCLGPT_URL -H "Content-Type: application/json" -d "{\"system_message\": \"$SALUDO_IA\", \"user_message\": \"$SCL_PREGUNTA\", \"max_tokens\": $MAX_TOKENS}" | grep '"text"' | sed 's/`//g' | awk -F 'SYS>>' '{print $3}' | awk -F 'INST]' '{print $2}')"
TimeIA
echo -e "\n----------------------------------------------------------------------------------"
done
StopIA

46
llama_cpu_server.py Normal file
View file

@ -0,0 +1,46 @@
from flask import Flask, request, jsonify
from llama_cpp import Llama
# Create a Flask object
app = Flask("Llama server")
model = None
@app.route('/llama', methods=['POST'])
def generate_response():
global model
try:
data = request.get_json()
# Check if the required fields are present in the JSON data
if 'system_message' in data and 'user_message' in data and 'max_tokens' in data:
system_message = data['system_message']
user_message = data['user_message']
max_tokens = int(data['max_tokens'])
# Prompt creation
prompt = f"""<s>[INST] <<SYS>>
{system_message}
<</SYS>>
{user_message} [/INST]"""
# Create the model if it was not previously created
if model is None:
model_path = "./llama-2-7b-chat.Q2_K.gguf"
model = Llama(model_path=model_path)
# Run the model
output = model(prompt, max_tokens=max_tokens, echo=True)
return jsonify(output)
else:
return jsonify({"error": "Missing required parameters"}), 400
except Exception as e:
return jsonify({"Error": str(e)}), 500
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000, debug=True)