copilot-meeting/ssug-meeting-summarizer/main.py
Michael Bobbitt 42c5f87a1d init
2025-11-10 22:34:17 -05:00

131 lines
4.9 KiB
Python

# makes a request to a local ollama server and streams the response
import requests
import sys
import time
import json
import argparse
OLLAMA_SERVER = "10.48.9.106"
OLLAMA_PORT = 11434
SYSTEM_PROMPTS = [
"Always answer in a CLI-friendly, plain text format. Use Markdown code blocks for code, and ensure code is easy to copy and paste. Do not use colors or special formatting that breaks copy-paste. Only use plain Markdown for code.",
]
HIST_CONTEXT = []
def ping_server():
url = f"http://{OLLAMA_SERVER}:{OLLAMA_PORT}"
try:
response = requests.get(url)
if response.status_code == 200:
print("Ollama server is reachable.")
return True
else:
print(f"Failed to reach Ollama server: {response.status_code}")
return False
except requests.exceptions.RequestException as e:
print(f"Error connecting to Ollama server: {e}")
return False
def stream_ollama_response(model, prompt, output_format="cli"):
url = f"http://{OLLAMA_SERVER}:{OLLAMA_PORT}/api/chat"
headers = {
"Content-Type": "application/json",
}
data = {
"model": model,
"messages": [],
"stream": True
}
for msg in SYSTEM_PROMPTS:
data["messages"].append({"system": msg})
for hist in HIST_CONTEXT:
if hist["prompt"] and hist["response"]:
data["messages"].append({"role": "user", "content": hist["prompt"]})
if hist["thought"]:
data["messages"].append({"role": "assistant", "content": hist["thought"]})
data["messages"].append({"role": "assistant", "content": hist["response"]})
data["messages"].append({"role": "user", "content": prompt})
response = requests.post(url, headers=headers, json=data, stream=True)
if response.status_code != 200:
print(f"Error: {response.status_code} - {response.text}")
return ""
print("Thinking (plain text, code blocks are copy-paste ready):")
in_thinking = True
thoughts = ""
response_text = ""
message = {}
for line in response.iter_lines():
if line:
decoded_line = line.decode('utf-8')
if decoded_line.startswith("data: "):
decoded_line = decoded_line[6:]
if decoded_line == "[DONE]":
break
try:
chunk = json.loads(decoded_line)
if 'message' in chunk:
message = chunk['message']
# Stream 'thinking' tokens live
if 'thinking' in message and message['thinking']:
sys.stdout.write(message['thinking'])
sys.stdout.flush()
thoughts += message['thinking']
# When content starts, print a newline and stream content live
if 'content' in message and message['content']:
if in_thinking:
print("\n\nResponse (copy-paste code blocks as needed):")
in_thinking = False
sys.stdout.write(message['content'])
sys.stdout.flush()
response_text += message['content']
except json.JSONDecodeError:
continue
print() # for newline after completion
# add to history context
HIST_CONTEXT.append({
"response_time": time.strftime("%Y-%m-%d %H:%M:%S"),
"prompt": prompt,
"thought": thoughts,
"response": response_text
})
return response_text
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Stream response from local Ollama server.")
parser.add_argument("model", type=str, help="The model to use (e.g., 'llama2', 'gpt4o').")
args = parser.parse_args()
if not args.model:
print("Model is required.")
sys.exit(1)
if not ping_server():
sys.exit(1)
conversation_start_time = time.strftime("%Y%m%d-%H%M%S")
output_filename = f"conversation_{conversation_start_time}.txt"
print(f"\nEnter your prompt below. Type 'q', 'quit', 'done', or 'bye' to end the conversation.\nAll responses will be saved to {output_filename}.\n")
with open(output_filename, "w", encoding="utf-8") as f:
f.write(f"Conversation started at {conversation_start_time}\nModel: {args.model}\n\n")
while True:
try:
prompt = input("You: ").strip()
except (EOFError, KeyboardInterrupt):
print("\nConversation ended.")
break
if prompt.lower() in {"q", "quit", "done", "bye"}:
print("Conversation ended.")
break
if not prompt:
continue
f.write(f"You: {prompt}\n")
response_text = stream_ollama_response(args.model, prompt, output_format="cli")
f.write(f"AI: {response_text}\n\n")