Changes
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..7ee7b97
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,6 @@
+__pycache__/
+*.py[cod]
+*.class
+.venv/
+venv/
+env/
diff --git a/model-switcher/main.py b/model-switcher/main.py
new file mode 100644
index 0000000..2a03ed5
--- /dev/null
+++ b/model-switcher/main.py
@@ -0,0 +1,174 @@
+import http.server
+import socketserver
+import json
+import os
+import subprocess
+import argparse
+import sys
+from pathlib import Path
+
+# Global constants
+DEFAULT_PORT = 7330
+DEFAULT_HOST = ""
+CONF_DIR = "/etc/llama.cpp.d"
+LINK_PATH = "/etc/llama.cpp.conf"
+SERVICE_NAME = "llama.cpp"
+STATIC_DIR = os.path.join(os.path.dirname(__file__), "static")
+
+class ModelManager:
+ @staticmethod
+ def getModels():
+ """Returns a list of model config names and the currently active one."""
+ if not os.path.exists(CONF_DIR):
+ return [], None
+
+ models = [f for f in os.listdir(CONF_DIR) if f.endswith(".conf")]
+ models.sort()
+
+ current_model = None
+ if os.path.islink(LINK_PATH):
+ target = os.readlink(LINK_PATH)
+ current_model = os.path.basename(target)
+
+ return models, current_model
+
+ @staticmethod
+ def switchModel(model_name):
+ """Updates the symlink and restarts the systemd service."""
+ target_path = os.path.join(CONF_DIR, model_name)
+ if not os.path.exists(target_path):
+ raise FileNotFoundError(f"Config file {model_name} not found")
+
+ # Update symlink
+ # Use 'ln -sf' via subprocess to handle potential permission needs or atomic switch
+ # But we'll try os.symlink first, though it requires removing existing link
+ try:
+ if os.path.exists(LINK_PATH) or os.path.islink(LINK_PATH):
+ os.remove(LINK_PATH)
+ os.symlink(target_path, LINK_PATH)
+ except PermissionError as e:
+ return False, f"Permission denied: {str(e)}"
+ except Exception as e:
+ return False, str(e)
+
+ # Restart service
+ try:
+ subprocess.run(["systemctl", "restart", SERVICE_NAME], check=True)
+ return True, "Success"
+ except subprocess.CalledProcessError as e:
+ return False, f"Failed to restart service: {str(e)}"
+ except Exception as e:
+ return False, str(e)
+
+class RequestHandler(http.server.BaseHTTPRequestHandler):
+ def do_GET(self):
+ if self.path == "/api/models":
+ self.handleGetModels()
+ else:
+ self.serveStatic()
+
+ def do_POST(self):
+ if self.path == "/api/switch":
+ self.handleSwitchModel()
+ else:
+ self.send_error(404)
+
+ def serveStatic(self):
+ """Serves files from the static directory."""
+ path = self.path.split('?')[0]
+ if path == "/":
+ path = "/index.html"
+
+ file_path = os.path.join(STATIC_DIR, path.lstrip("/"))
+
+ if os.path.exists(file_path) and os.path.isfile(file_path):
+ content_type = self.getContentType(file_path)
+ self.send_response(200)
+ self.send_header("Content-type", content_type)
+ self.end_headers()
+ with open(file_path, "rb") as f:
+ self.wfile.write(f.read())
+ else:
+ self.send_error(404)
+
+ def getContentType(self, file_path):
+ if file_path.endswith(".html"): return "text/html"
+ if file_path.endswith(".css"): return "text/css"
+ if file_path.endswith(".js"): return "application/javascript"
+ return "application/octet-stream"
+
+ def handleGetModels(self):
+ try:
+ models, current = ModelManager.getModels()
+ response = {
+ "models": models,
+ "current": current
+ }
+ self.send_json_response(200, response)
+ except Exception as e:
+ self.send_json_response(500, {"error": str(e)})
+
+ def handleSwitchModel(self):
+ content_length = int(self.headers.get('Content-Length', 0))
+ post_data = self.rfile.read(content_length)
+ try:
+ data = json.loads(post_data)
+ model_name = data.get("model")
+ if not model_name:
+ self.send_json_response(400, {"error": "Model name required"})
+ return
+
+ success, message = ModelManager.switchModel(model_name)
+ if success:
+ self.send_json_response(200, {"status": "ok"})
+ else:
+ self.send_json_response(500, {"error": message})
+ except Exception as e:
+ self.send_json_response(500, {"error": str(e)})
+
+ def send_json_response(self, status_code, data):
+ self.send_response(status_code)
+ self.send_header("Content-type", "application/json")
+ self.end_headers()
+ self.wfile.write(json.dumps(data).encode("utf-8"))
+
+def runServer():
+ parser = argparse.ArgumentParser(description="LLM Model Switcher")
+ parser.add_argument("--host", default=DEFAULT_HOST, help="Host address to bind to. If a path (contains /), it will be treated as a unix socket. (default: '%(default)s')")
+ parser.add_argument("--port", type=int, default=DEFAULT_PORT, help="Port to bind to (TCP only). (default: %(default)s)")
+ args = parser.parse_args()
+
+ if os.geteuid() != 0:
+ print("WARNING: Not running as root. This script requires sudo to modify /etc and restart systemd services.")
+
+ # Check if host looks like a unix socket path
+ if '/' in args.host:
+ socket_path = args.host
+ if os.path.exists(socket_path):
+ os.unlink(socket_path)
+
+ with socketserver.UnixStreamServer(socket_path, RequestHandler) as httpd:
+ # Set permissions for the socket so others can access it if needed (e.g. nginx)
+ # Default to 777 for maximum accessibility given the local tool nature, or 666
+ os.chmod(socket_path, 0o666)
+ print(f"LLM Model Switcher started on unix socket: {socket_path}")
+ try:
+ httpd.serve_forever()
+ except KeyboardInterrupt:
+ pass
+ finally:
+ if os.path.exists(socket_path):
+ os.unlink(socket_path)
+ else:
+ # TCP Server
+ socketserver.TCPServer.allow_reuse_address = True
+ with socketserver.TCPServer((args.host, args.port), RequestHandler) as httpd:
+ host_display = args.host if args.host else "0.0.0.0"
+ print(f"LLM Model Switcher started at http://{host_display}:{args.port}")
+ try:
+ httpd.serve_forever()
+ except KeyboardInterrupt:
+ pass
+
+if __name__ == "__main__":
+ runServer()
diff --git a/model-switcher/prompts.md b/model-switcher/prompts.md
new file mode 100644
index 0000000..376ad69
--- /dev/null
+++ b/model-switcher/prompts.md
@@ -0,0 +1,38 @@
+Create a web app with Python backend to switch LLM models.
+
+I have a AI server running llama.cpp on 3 GTX1080 GPUs. The vram is
+only enough to run a small model. I have downloaded some small models
+and created llama.cpp parameters for them. This web app will enable me
+to change which model llama.cpp runs without me SSHing into the AI
+server and manually change the settings. The way it works goes as
+follows:
+
+* The model files (.gguf) are in the `/mnt/data/models/llm` directory.
+* The directory `/etc/llama.cpp.d` stores the llama.cpp parameters for
+ these models; one file for each model. These files contains the env
+ variables that the systemd service file reads, which gets
+ substituted into the llama.cpp commandline. For example, the file
+ `/etc/llama.cpp.d/mistral-venice-edition.conf` contains the
+ parameters for the `Dolphin-Mistral-24B-Venice-Edition-Q5_K_M.gguf`
+ model (the path of the model file is a parameter in the conf file).
+* `/etc/llama.cpp.conf` is a symbolic link to one of the conf files.
+ The systemd service reads this file for parameters. Therefore,
+ the file this links to is the current configuration
+
+The web app you are creating will show a list of all the models (just
+use the name of the conf file as model names), and I would be able to
+choose a model, and click a button. The backend would then link
+`/etc/llama.cpp.conf` to the correct conf file, and restart llama.cpp
+service. That’s it.
+
+Put all the files in the `model-switcher` sub-directory.
+
+Naming style:
+
+* Use `CapitalizedCase` for classes.
+* Use `snake_case` for local variables.
+* Use `UPPER_CASE` for global constants.
+* Use `camelCase` for functions.
+
+Give me a plan of how you would implement this. Don’t edit anything
+yet.
diff --git a/model-switcher/static/index.html b/model-switcher/static/index.html
new file mode 100644
index 0000000..4c8755e
--- /dev/null
+++ b/model-switcher/static/index.html
@@ -0,0 +1,128 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+ <meta charset="UTF-8">
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
+ <title>LLM Model Switcher</title>
+ <link rel="stylesheet" href="style.css">
+</head>
+<body>
+ <div id="app"></div>
+
+ <script type="module">
+ import { h, render } from 'https://esm.sh/preact';
+ import { useState, useEffect } from 'https://esm.sh/preact/hooks';
+ import htm from 'https://esm.sh/htm';
+
+ const html = htm.bind(h);
+
+ function App() {
+ const [models, setModels] = useState([]);
+ const [currentModel, setCurrentModel] = useState(null);
+ const [selectedModel, setSelectedModel] = useState(null);
+ const [loading, setLoading] = useState(true);
+ const [switching, setSwitching] = useState(false);
+ const [message, setMessage] = useState({ text: '', type: '' });
+
+ const fetchModels = async () => {
+ try {
+ setLoading(true);
+ const response = await fetch('/api/models');
+ const data = await response.json();
+ if (data.error) throw new Error(data.error);
+
+ setModels(data.models || []);
+ setCurrentModel(data.current);
+ if (data.current) setSelectedModel(data.current);
+ } catch (err) {
+ setMessage({ text: 'Failed to load models: ' + err.message, type: 'error' });
+ } finally {
+ setLoading(false);
+ }
+ };
+
+ const switchModel = async () => {
+ if (!selectedModel || selectedModel === currentModel) return;
+
+ try {
+ setSwitching(true);
+ setMessage({ text: 'Switching model and restarting service...', type: 'info' });
+
+ const response = await fetch('/api/switch', {
+ method: 'POST',
+ headers: { 'Content-Type': 'application/json' },
+ body: JSON.stringify({ model: selectedModel })
+ });
+
+ const data = await response.json();
+ if (data.error) throw new Error(data.error);
+
+ setMessage({ text: 'Model switched successfully!', type: 'success' });
+ setCurrentModel(selectedModel);
+ } catch (err) {
+ setMessage({ text: 'Failed to switch model: ' + err.message, type: 'error' });
+ } finally {
+ setSwitching(false);
+ }
+ };
+
+ useEffect(() => {
+ fetchModels();
+ }, []);
+
+ if (loading) return html`<div class="container"><p>Loading models...</p></div>`;
+
+ return html`
+ <div class="container">
+ <header>
+ <h1>LLM Model Switcher</h1>
+ <p class="status">
+ Current Model: <strong>${currentModel || 'None'}</strong>
+ </p>
+ </header>
+
+ <main>
+ <div class="model-list">
+ ${models.length === 0 ? html`<p>No models found in <code>/etc/llama.cpp.d</code></p>` :
+ models.map(model => html`
+ <label class="model-item ${selectedModel === model ? 'selected' : ''} ${currentModel === model ? 'active' : ''}">
+ <input
+ type="radio"
+ name="model"
+ value="${model}"
+ checked="${selectedModel === model}"
+ onChange="${() => setSelectedModel(model)}"
+ disabled="${switching}"
+ />
+ <span class="model-name">${model}</span>
+ ${currentModel === model && html`<span class="badge">Active</span>`}
+ </label>
+ `)
+ }
+ </div>
+
+ <div class="actions">
+ <button
+ onClick="${switchModel}"
+ disabled="${switching || !selectedModel || selectedModel === currentModel}"
+ class="${switching ? 'loading' : ''}"
+ >
+ ${switching ? 'Restarting...' : 'Apply Selection'}
+ </button>
+ <button class="secondary" onClick="${fetchModels}" disabled="${switching}">Refresh</button>
+ </div>
+
+ ${message.text && html`
+ <div class="message ${message.type}">
+ ${message.text}
+ </div>
+ `}
+ </main>
+ </div>
+ `;
+ }
+
+ render(html`<${App} />`, document.getElementById('app'));
+ </script>
+</body>
+</html>
diff --git a/model-switcher/static/style.css b/model-switcher/static/style.css
new file mode 100644
index 0000000..a66f781
--- /dev/null
+++ b/model-switcher/static/style.css
@@ -0,0 +1,170 @@
+:root {
+ --bg-color: #f4f7f6;
+ --card-bg: #ffffff;
+ --text-color: #333;
+ --primary-color: #2563eb;
+ --primary-hover: #1d4ed8;
+ --secondary-color: #64748b;
+ --success-color: #10b981;
+ --error-color: #ef4444;
+ --info-color: #3b82f6;
+ --border-color: #e2e8f0;
+}
+
+body {
+ font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif;
+ background-color: var(--bg-color);
+ color: var(--text-color);
+ margin: 0;
+ display: flex;
+ justify-content: center;
+ padding: 2rem 1rem;
+}
+
+.container {
+ width: 100%;
+ max-width: 600px;
+ background: var(--card-bg);
+ padding: 2rem;
+ border-radius: 12px;
+ box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06);
+}
+
+header {
+ border-bottom: 1px solid var(--border-color);
+ margin-bottom: 1.5rem;
+ padding-bottom: 1rem;
+}
+
+h1 {
+ margin: 0;
+ font-size: 1.5rem;
+ color: var(--primary-color);
+}
+
+code {
+ font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
+ background: #f1f5f9;
+ padding: 0.2rem 0.4rem;
+ border-radius: 4px;
+ font-size: 0.9em;
+}
+
+.status {
+ margin-top: 0.5rem;
+ font-size: 0.9rem;
+ color: var(--secondary-color);
+}
+
+.model-list {
+ display: flex;
+ flex-direction: column;
+ gap: 0.75rem;
+ margin-bottom: 1.5rem;
+}
+
+.model-item {
+ display: flex;
+ align-items: center;
+ padding: 1rem;
+ border: 2px solid var(--border-color);
+ border-radius: 8px;
+ cursor: pointer;
+ transition: all 0.2s;
+ position: relative;
+}
+
+.model-item:hover {
+ border-color: var(--primary-color);
+ background-color: #eff6ff;
+}
+
+.model-item.selected {
+ border-color: var(--primary-color);
+ background-color: #eff6ff;
+}
+
+.model-item.active {
+ background-color: #f0fdf4;
+ border-color: var(--success-color);
+}
+
+.model-item input {
+ margin-right: 1rem;
+}
+
+.model-name {
+ font-weight: 500;
+ flex-grow: 1;
+}
+
+.badge {
+ background-color: var(--success-color);
+ color: white;
+ font-size: 0.7rem;
+ padding: 2px 8px;
+ border-radius: 12px;
+ text-transform: uppercase;
+ font-weight: bold;
+}
+
+.actions {
+ display: flex;
+ gap: 1rem;
+}
+
+button {
+ flex: 1;
+ padding: 0.75rem;
+ border: none;
+ border-radius: 6px;
+ font-weight: 600;
+ cursor: pointer;
+ transition: background-color 0.2s;
+}
+
+button:not(.secondary) {
+ background-color: var(--primary-color);
+ color: white;
+}
+
+button:not(.secondary):hover:not(:disabled) {
+ background-color: var(--primary-hover);
+}
+
+button.secondary {
+ background-color: var(--secondary-color);
+ color: white;
+}
+
+button:disabled {
+ opacity: 0.5;
+ cursor: not-allowed;
+}
+
+.message {
+ margin-top: 1.5rem;
+ padding: 1rem;
+ border-radius: 6px;
+ font-size: 0.9rem;
+}
+
+.message.info { background-color: #dbeafe; color: #1e40af; }
+.message.success { background-color: #dcfce7; color: #166534; }
+.message.error { background-color: #fee2e2; color: #991b1b; }
+
+@keyframes spin {
+ to { transform: rotate(360deg); }
+}
+
+.loading::after {
+ content: "";
+ display: inline-block;
+ width: 12px;
+ height: 12px;
+ border: 2px solid rgba(255,255,255,0.3);
+ border-radius: 50%;
+ border-top-color: #fff;
+ animation: spin 1s linear infinite;
+ margin-left: 8px;
+}