retrotoon-studio/server/llmConfig.ts

/**
 * LLM Configuration Module
 * Reads the admin-configured LLM model from the database and provides
 * a unified invokeLLM wrapper that routes to the correct provider.
 *
 * Supported providers:
 * - "built-in": Uses the default Manus Forge API (default)
 * - "gemini-flash": Uses Google Gemini 2.5 Flash via the Gemini API
 * - "gpt-4o", "claude-sonnet", "gemini-pro": Placeholder for future providers
 */

import { ENV } from "./_core/env";
import { invokeLLM as forgeInvokeLLM, type InvokeParams, type InvokeResult } from "./_core/llm";

export interface LlmConfig {
  model: string;
  behavior: string;
}

let cachedConfig: LlmConfig | null = null;
let cacheTimestamp = 0;
const CACHE_TTL = 30_000; // 30 seconds

/**
 * Get the current LLM configuration from the database
 */
export async function getLlmConfig(): Promise<LlmConfig> {
  const now = Date.now();
  if (cachedConfig && (now - cacheTimestamp) < CACHE_TTL) {
    return cachedConfig;
  }

  try {
    const { getDb } = await import("./db");
    const db = await getDb();
    if (db) {
      const { appConfig } = await import("../drizzle/schema");
      const { eq } = await import("drizzle-orm");
      const result = await db.select().from(appConfig).where(eq(appConfig.key, "llm_config")).limit(1);
      if (result[0]?.value) {
        const parsed = JSON.parse(result[0].value);
        cachedConfig = {
          model: parsed.model || "built-in",
          behavior: parsed.behavior || "guided",
        };
        cacheTimestamp = now;
        return cachedConfig;
      }
    }
  } catch (err) {
    console.warn("[LLM Config] Failed to load config:", err);
  }

  // Default config
  cachedConfig = { model: "built-in", behavior: "guided" };
  cacheTimestamp = now;
  return cachedConfig;
}

/**
 * Invalidate the cached LLM config (call after admin saves)
 */
export function invalidateLlmConfigCache(): void {
  cachedConfig = null;
  cacheTimestamp = 0;
}

/**
 * Invoke LLM using the configured provider
 * Falls back to built-in if the configured provider fails
 */
export async function invokeConfiguredLLM(params: InvokeParams): Promise<InvokeResult> {
  const config = await getLlmConfig();

  if (config.model === "gemini-flash" && ENV.geminiApiKey) {
    return invokeGemini(params);
  }

  // Default: use built-in Forge LLM
  return forgeInvokeLLM(params);
}

/**
 * Invoke Google Gemini API directly using the configured API key
 */
async function invokeGemini(params: InvokeParams): Promise<InvokeResult> {
  const apiKey = ENV.geminiApiKey;
  if (!apiKey) {
    throw new Error("GEMINI_API_KEY is not configured");
  }

  // Use the OpenAI-compatible endpoint for Gemini
  const url = `https://generativelanguage.googleapis.com/v1beta/openai/chat/completions`;

  const messages = params.messages.map(msg => {
    if (typeof msg.content === "string") {
      return { role: msg.role, content: msg.content };
    }
    const parts = Array.isArray(msg.content) ? msg.content : [msg.content];
    const mapped = parts.map(p => {
      if (typeof p === "string") return { type: "text" as const, text: p };
      if (p.type === "text") return p;
      if (p.type === "image_url") return p;
      return { type: "text" as const, text: "" };
    }).filter(p => p.type !== "text" || ("text" in p && p.text));
    if (mapped.length === 1 && mapped[0].type === "text") {
      return { role: msg.role, content: (mapped[0] as any).text };
    }
    return { role: msg.role, content: mapped };
  });

  const payload: Record<string, unknown> = {
    model: "gemini-2.5-flash",
    messages,
    max_tokens: 8192,
  };

  if (params.response_format || params.responseFormat) {
    payload.response_format = params.response_format || params.responseFormat;
  }

  const response = await fetch(url, {
    method: "POST",
    headers: {
      "Content-Type": "application/json",
      "Authorization": `Bearer ${apiKey}`,
    },
    body: JSON.stringify(payload),
  });

  if (!response.ok) {
    const errorText = await response.text();
    console.error("[Gemini] API error:", response.status, errorText);
    // Fallback to built-in on error
    console.warn("[Gemini] Falling back to built-in LLM");
    return forgeInvokeLLM(params);
  }

  return (await response.json()) as InvokeResult;
}