Subiendo ficheros

2023-12-01 20:25:51 +01:00 · 2023-12-01 20:25:51 +01:00 · 0dc4b0633c
commit 0dc4b0633c
6 changed files with 130 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1 @@
 llama-2-7b-chat.Q2_K.gguf
--- a/19
+++ b/19
@ -0,0 +1,19 @@
 # Use an official Python runtime as a parent image
 FROM python
 # Set the working directory in the container
 WORKDIR /app
 # Copy the current directory contents into the container at /app
 COPY ./llama_cpu_server.py /app/llama_cpu_server.py
 #COPY ./llama-2-7b-chat.Q2_K.gguf /app/llama-2-7b-chat.Q2_K.gguf
 # Install any needed packages specified in requirements.txt
 RUN pip install llama-cpp-python
 RUN pip install Flask
 # Expose port 5000 to the world outside this container
 EXPOSE 5000
 # Run app.py when the container launches
 CMD ["python", "llama_cpu_server.py"]
--- a/README.md
+++ b/README.md
--- a/genera_imagen.sh
+++ b/genera_imagen.sh
@ -0,0 +1,2 @@
 #!/bin/bash
 docker build -t llama2 .
--- a/ia.sh
+++ b/ia.sh
@ -0,0 +1,62 @@
 #!/bin/bash
 # Establecer VALORES API de ChatGPT Local
 SCLGPT_URL="http://127.0.0.1:5000/llama"
 MAX_TOKENS=100
 SALUDO_IA="Eres un asistente genial" #You are a helpful assistant
 if [ "$1" == "" ] ; then
  MAX_TOKENS=100
 else
  if [ $1 -gt 100 ] ; then
    MAX_TOKENS=$1
  else
    MAX_TOKENS=100
  fi
 fi
 # Funciones docker IA
 function StarIA() {
  echo "Iniciando IA ..."
  StopIA
  #docker run -d --name sclgpt -ti -p 5000:5000 llama2 2>/dev/null 1>/dev/null
  docker run -d --name sclgpt -ti -p 5000:5000 -v $(pwd)/llama-2-7b-chat.Q2_K.gguf:/app/llama-2-7b-chat.Q2_K.gguf llama2
 }
 function StopIA() {
  docker stop sclgpt 2>/dev/null
  docker rm sclgpt 2>/dev/null
 }
 function TimeIA() {
  echo -e "\nProcesado en: $(docker logs sclgpt |grep 'total time'|tail -1|awk '{print $5" milisegundos"}')"
 }
 # ----- Main / Principal ---------------------------
 StarIA
 clear
 echo
 echo "----------------------------------------" 
 echo " Conversar con SoloConLinux-GPT [SCL-GPT]"
 echo "----------------------------------------" 
 echo 
 # Bucle principal del programa
 while true; do
  # Solicitar una pregunta al usuario
  echo -n "[$USER] escribe tu pregunta (o 'salir'): "
  read SCL_PREGUNTA
  # Si escribe "salir", salimos del programa
  if [ "$SCL_PREGUNTA" == "salir" ]; then
    break
  fi
  # Utiliza la herramienta "curl" en modo silencioso para enviar la pregunta al API de ChatGPT y obtener la respuesta del chatbot
  echo -e "\n  ... SCL-GPT pensando ...\n"
  # Extrae la respuesta del chatbot de la respuesta JSON de la API
   # Muestra la respuesta del chatbot en la consola
  echo -e "[SCL-GPT]:\n$(curl -s $SCLGPT_URL -H "Content-Type: application/json" -d "{\"system_message\": \"$SALUDO_IA\", \"user_message\": \"$SCL_PREGUNTA\", \"max_tokens\": $MAX_TOKENS}" | grep '"text"' | sed 's/`//g' | awk -F 'SYS>>' '{print $3}' | awk -F 'INST]' '{print $2}')"
  TimeIA
  echo -e "\n----------------------------------------------------------------------------------"
 done
 StopIA
--- a/llama_cpu_server.py
+++ b/llama_cpu_server.py
@ -0,0 +1,46 @@
 from flask import Flask, request, jsonify
 from llama_cpp import Llama
 # Create a Flask object
 app = Flask("Llama server")
 model = None
@app.route('/llama', methods=['POST'])
 def generate_response():
    global model
    try:
        data = request.get_json()
        # Check if the required fields are present in the JSON data
        if 'system_message' in data and 'user_message' in data and 'max_tokens' in data:
            system_message = data['system_message']
            user_message = data['user_message']
            max_tokens = int(data['max_tokens'])
            # Prompt creation
            prompt = f"""<s>[INST] <<SYS>>
            {system_message}
            <</SYS>>
            {user_message} [/INST]"""
            # Create the model if it was not previously created
            if model is None:
                model_path = "./llama-2-7b-chat.Q2_K.gguf"
                model = Llama(model_path=model_path)
            # Run the model
            output = model(prompt, max_tokens=max_tokens, echo=True)
            return jsonify(output)
        else:
            return jsonify({"error": "Missing required parameters"}), 400
    except Exception as e:
        return jsonify({"Error": str(e)}), 500
 if __name__ == '__main__':
    app.run(host='0.0.0.0', port=5000, debug=True)