Source: main/ipc/assistantHandlers.js

/**
 * @fileoverview IPC handlers for AI assistant (Ollama, External API, Local GGUF)
 * 
 * This module provides IPC handlers for chatting with various LLM providers.
 * Now uses a modular provider system for easy extensibility.
 * 
 * @author CTrace GUI Team
 * @version 1.0.0
 */

const { ipcMain } = require('electron');
const https = require('https');
const http = require('http');
const providerRegistry = require('../external_llm/ProviderRegistry');

// Local GGUF state
let localLLM = null;
let loadedModelPath = null;
let loadedGpuLayers = null;

/**
 * Setup IPC handlers for assistant chat
 * @param {BrowserWindow} mainWindow - Main window reference
 */
function setupAssistantHandlers(mainWindow) {
  /**
   * Handle assistant chat request
   * Input: { provider, message, config }
   * config contains: provider-specific configuration
   */
  ipcMain.handle('assistant-chat', async (event, { provider, message, config }) => {
    try {
      if (provider === 'ollama') {
        return await handleOllamaChat(message, config);
      } else if (provider === 'external') {
        // Use modular provider system for external APIs
        return await handleModularProvider(message, config);
      } else if (provider === 'local') {
        return await handleLocalChat(message, config);
      } else {
        return {
          success: false,
          error: 'Unknown provider or assistant not configured'
        };
      }
    } catch (error) {
      console.error('Error in assistant-chat handler:', error);
      return {
        success: false,
        error: error.message || 'Unknown error'
      };
    }
  });

  /**
   * Get list of available providers
   */
  ipcMain.handle('assistant-get-providers', async () => {
    try {
      return {
        success: true,
        providers: providerRegistry.getAllProviders()
      };
    } catch (error) {
      console.error('Error getting providers:', error);
      return {
        success: false,
        error: error.message
      };
    }
  });

  /**
   * Test a provider connection
   */
  ipcMain.handle('assistant-test-provider', async (event, { providerId, config }) => {
    try {
      const provider = providerRegistry.createProvider(providerId, config);
      return await provider.testConnection();
    } catch (error) {
      console.error('Error testing provider:', error);
      return {
        success: false,
        error: error.message
      };
    }
  });

  /**
   * Unload local model when requested
   */
  ipcMain.handle('assistant-unload-local', async () => {
    try {
      if (localLLM) {
        if (localLLM.context) await localLLM.context.dispose();
        if (localLLM.model) await localLLM.model.dispose();
        localLLM = null;
        loadedModelPath = null;
        loadedGpuLayers = null;
        return { success: true };
      }
      return { success: true };
    } catch (error) {
      console.error('Error unloading local model:', error);
      return { success: false, error: error.message };
    }
  });
}

/**
 * Handle Ollama chat request
 * @param {string} message - User message
 * @param {Object} config - Config with ollamaHost
 * @returns {Promise<Object>}
 */
async function handleOllamaChat(message, config) {
  const host = config.ollamaHost || 'http://localhost:11434';
  const url = new URL('/api/chat', host);
  
  const messages = [];
  
  // Add system prompt if provided
  if (config.systemPrompt) {
    messages.push({ role: 'system', content: config.systemPrompt });
  }
  
  messages.push({ role: 'user', content: message });
  
  const body = JSON.stringify({
    model: 'llama2', // default model, can be made configurable
    messages: messages,
    stream: false
  });

  return new Promise((resolve) => {
    const protocol = url.protocol === 'https:' ? https : http;
    const options = {
      hostname: url.hostname,
      port: url.port || (url.protocol === 'https:' ? 443 : 80),
      path: url.pathname,
      method: 'POST',
      headers: {
        'Content-Type': 'application/json',
        'Content-Length': Buffer.byteLength(body)
      },
      timeout: 60000
    };

    const req = protocol.request(options, (res) => {
      let data = '';
      res.on('data', (chunk) => { data += chunk; });
      res.on('end', () => {
        try {
          const parsed = JSON.parse(data);
          if (parsed.message && parsed.message.content) {
            resolve({ success: true, reply: parsed.message.content });
          } else {
            resolve({ success: false, error: 'Invalid response from Ollama' });
          }
        } catch (err) {
          resolve({ success: false, error: 'Failed to parse Ollama response: ' + err.message });
        }
      });
    });

    req.on('error', (err) => {
      resolve({ success: false, error: 'Ollama request failed: ' + err.message });
    });

    req.on('timeout', () => {
      req.destroy();
      resolve({ success: false, error: 'Ollama request timed out' });
    });

    req.write(body);
    req.end();
  });
}

/**
 * Handle external API chat using modular provider system
 * @param {string} message - User message
 * @param {Object} config - Config with providerId and provider-specific settings
 * @returns {Promise<Object>}
 */
async function handleModularProvider(message, config) {
  const providerId = config.providerId || 'openai';
  
  try {
    // Create provider instance
    const provider = providerRegistry.createProvider(providerId, config);
    
    // Validate configuration
    const validation = provider.validateConfig(config);
    if (!validation.valid) {
      return {
        success: false,
        error: 'Configuration error: ' + validation.errors.join(', ')
      };
    }
    
    // Send chat message
    return await provider.chat(message, {
      systemPrompt: config.systemPrompt,
      temperature: config.temperature,
      maxTokens: config.maxTokens
    });
  } catch (error) {
    console.error('Error with modular provider:', error);
    return {
      success: false,
      error: error.message || 'Provider error'
    };
  }
}

/**
 * Handle local GGUF model chat using node-llama-cpp
 * @param {string} message - User message
 * @param {Object} config - Config with localModelPath and gpuLayers
 * @returns {Promise<Object>}
 */
async function handleLocalChat(message, config) {
  const modelPath = config.localModelPath;

  if (!modelPath) {
    return { success: false, error: 'Local model path is required' };
  }

  try {
    // Dynamic import of node-llama-cpp (ESM module)
    const { getLlama, LlamaChatSession } = await import('node-llama-cpp');

    // Get GPU layers setting (default to 0 if not specified)
    const gpuLayers = config.gpuLayers !== undefined && config.gpuLayers !== null ? config.gpuLayers : 0;
    
    // Get context size setting (default to 8192 if not specified)
    const contextSize = config.contextSize !== undefined && config.contextSize !== null ? config.contextSize : 8192;

    // Load model if not loaded, if path changed, OR if GPU layers setting changed
    if (!localLLM || loadedModelPath !== modelPath || loadedGpuLayers !== gpuLayers) {
      console.log('Loading local GGUF model from:', modelPath);
      
      // Dispose old instance if any
      if (localLLM) {
        console.log('Disposing previous model instance...');
        try {
          if (localLLM.context) await localLLM.context.dispose();
          if (localLLM.model) await localLLM.model.dispose();
        } catch (disposeErr) {
          console.warn('Error during disposal:', disposeErr);
        }
        localLLM = null;
      }

      // Initialize llama
      const llama = await getLlama({
        // Enable verbose logging to see GPU initialization
        logLevel: 'debug'
      });
      
      console.log('=== GPU Detection ===');
      console.log('Attempting to load model with GPU layers:', gpuLayers);
      
      if (gpuLayers === -1) {
        console.log('Mode: Offloading ALL layers to GPU');
      } else if (gpuLayers === 0) {
        console.log('Mode: CPU only (no GPU acceleration)');
      } else {
        console.log(`Mode: Offloading ${gpuLayers} layers to GPU`);
      }
      
      console.log('Note: Watch for [node-llama-cpp] logs below for GPU/Vulkan device info');
      console.log('=====================');
      
      // Load the model with GPU configuration
      // The underlying llama.cpp will log GPU information like:
      // "ggml_vulkan: Using Vulkan1 (NVIDIA GeForce RTX 4070) | vulkan"
      const model = await llama.loadModel({
        modelPath: modelPath,
        gpuLayers: gpuLayers, // 0 = CPU only, -1 = all layers, or specific number
        // Try to enable verbose output
        onLoadProgress: (progress) => {
          console.log(`Loading model: ${(progress * 100).toFixed(1)}%`);
        }
      });
      
      console.log('✓ Model loaded');
      console.log('  GPU layers used:', gpuLayers === -1 ? 'All layers' : gpuLayers);
      console.log('  Model size:', model.size, 'parameters');
      console.log('  Model default context:', model.trainContextSize, 'tokens');
      console.log('  Using context size:', contextSize, 'tokens (saves VRAM)');

      // Create context with custom size to reduce VRAM usage
      const context = await model.createContext({
        contextSize: contextSize // Use configured context size instead of model default (40960)
      });
      
      // Create chat session
      const session = new LlamaChatSession({
        contextSequence: context.getSequence()
      });

      localLLM = { model, context, session };
      loadedModelPath = modelPath;
      loadedGpuLayers = gpuLayers;
      console.log('✓ Model ready for inference');
      console.log('=====================');
    } else {
      console.log('Using cached model (same path and GPU layers)');
    }

    // Generate response
    console.log('Generating response for:', message);
    
    // Prepend system prompt to the message if provided
    let fullMessage = message;
    if (config.systemPrompt) {
      fullMessage = `System: ${config.systemPrompt}\n\nUser: ${message}`;
    }
    
    const response = await localLLM.session.prompt(fullMessage);
    
    return {
      success: true,
      reply: response || '(no response)'
    };
  } catch (error) {
    console.error('Error with local GGUF model:', error);
    console.error('Error stack:', error.stack);
    
    // Clean up on error
    if (localLLM) {
      try {
        await localLLM.context?.dispose();
        await localLLM.model?.dispose();
      } catch (disposeError) {
        console.error('Error disposing model:', disposeError);
      }
    }
    localLLM = null;
    loadedModelPath = null;
    loadedGpuLayers = null;

    return {
      success: false,
      error: 'Local model error: ' + error.message
    };
  }
}

module.exports = { setupAssistantHandlers };