git clone https://github.com/ggerganov/llama.cpp cd llama.cpp make ./server -m models/7B/ggml-model.gguf -c 2048