HEX
Server: Apache/2.4.6 (CentOS) OpenSSL/1.0.2k-fips PHP/7.4.30
System: Linux iZj6c1151k3ad370bosnmsZ 3.10.0-1160.76.1.el7.x86_64 #1 SMP Wed Aug 10 16:21:17 UTC 2022 x86_64
User: root (0)
PHP: 7.4.30
Disabled: NONE
Upload Files
File: /var/www/html/www.winghung.com/wp-content/plugins/mxchat-basic/includes/class-mxchat-utils.php
<?php
if (!defined('ABSPATH')) {
    exit; // Exit if accessed directly
}

class MxChat_Utils {

/**
 * UPDATED: Submit or update content (and its embedding) in the database.
 * Stores in Pinecone if enabled, otherwise stores in WordPress DB.
 *
 * @param string $content    The content to be embedded.
 * @param string $source_url The source URL of the content.
 * @param string $api_key    The API key used for generating embeddings.
 * @param string $vector_id  Optional vector ID for Pinecone (if not provided, will use md5 of URL)
 * @param string $bot_id     The bot ID for multi-bot support
 * @return bool|WP_Error True on success, WP_Error on failure
 */
public static function submit_content_to_db($content, $source_url, $api_key, $vector_id = null, $bot_id = 'default') {
    global $wpdb;
    $table_name = $wpdb->prefix . 'mxchat_system_prompt_content';
    
    //error_log('[MXCHAT-DB] Starting database submission for URL: ' . $source_url . ' (Bot: ' . $bot_id . ')');
    //error_log('[MXCHAT-DB] Content length: ' . strlen($content) . ' bytes');
    
    // Sanitize the source URL
    $source_url = esc_url_raw($source_url);
    
    // Just ensure UTF-8 validity without aggressive escaping
    $safe_content = wp_check_invalid_utf8($content);
    // Remove only null bytes and other control characters, but preserve newlines (\n = \x0A) and carriage returns (\r = \x0D)
    $safe_content = preg_replace('/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/u', '', $safe_content);

    // UPDATED: Generate the embedding using bot-specific configuration
    $embedding_vector = self::generate_embedding($content, $api_key, $bot_id);
    
    if (!is_array($embedding_vector)) {
        //error_log('[MXCHAT-DB] Error: Embedding generation failed');
        return new WP_Error('embedding_failed', 'Failed to generate embedding for content');
    }
    
    //error_log('[MXCHAT-DB] Embedding generated successfully');
    
    // UPDATED: Check if Pinecone is enabled for this specific bot
    if (self::is_pinecone_enabled_for_bot($bot_id)) {
        //error_log('[MXCHAT-DB] Pinecone is enabled for bot ' . $bot_id . ' - using Pinecone storage');
        // Store in Pinecone only
        return self::store_in_pinecone_only($embedding_vector, $content, $source_url, $vector_id, $bot_id);
    } else {
        //error_log('[MXCHAT-DB] Pinecone not enabled for bot ' . $bot_id . ' - using WordPress storage');
        // Store in WordPress database only
        $embedding_vector_serialized = maybe_serialize($embedding_vector);
        return self::store_in_wordpress_db($safe_content, $source_url, $embedding_vector_serialized, $table_name);
    }
}

/**
 * UPDATED: Check if Pinecone is enabled and properly configured for a specific bot
 */
private static function is_pinecone_enabled_for_bot($bot_id = 'default') {
    // For default bot or when multi-bot is not active, use original method
    if ($bot_id === 'default' || !class_exists('MxChat_Multi_Bot_Manager')) {
        return self::is_pinecone_enabled();
    }
    
    // Get bot-specific Pinecone configuration
    $bot_pinecone_config = apply_filters('mxchat_get_bot_pinecone_config', array(), $bot_id);
    
    if (empty($bot_pinecone_config)) {
        // Fallback to default configuration
        return self::is_pinecone_enabled();
    }
    
    $enabled_check = !empty($bot_pinecone_config['use_pinecone']) && $bot_pinecone_config['use_pinecone'];
    $api_key_check = !empty($bot_pinecone_config['api_key']);
    $host_check = !empty($bot_pinecone_config['host']);
    
    return $enabled_check && $api_key_check && $host_check;
}

/**
 * Check if Pinecone is enabled and properly configured (original method for default bot)
 */
private static function is_pinecone_enabled() {
    $pinecone_options = get_option('mxchat_pinecone_addon_options');
    
    if (empty($pinecone_options)) {
        return false;
    }
    
    $enabled_check = !empty($pinecone_options['mxchat_use_pinecone']) && $pinecone_options['mxchat_use_pinecone'] !== '0';
    $api_key_check = !empty($pinecone_options['mxchat_pinecone_api_key']);
    $host_check = !empty($pinecone_options['mxchat_pinecone_host']);
    
    return $enabled_check && $api_key_check && $host_check;
}

/**
 * UPDATED: Store content in Pinecone only with bot support
 */
private static function store_in_pinecone_only($embedding_vector, $content, $source_url, $vector_id = null, $bot_id = 'default') {
    //error_log('[MXCHAT-PINECONE] ===== Using Pinecone-only storage for bot ' . $bot_id . ' =====');
    
    // Get bot-specific Pinecone configuration
    if ($bot_id === 'default' || !class_exists('MxChat_Multi_Bot_Manager')) {
        $pinecone_options = get_option('mxchat_pinecone_addon_options');
        $api_key = $pinecone_options['mxchat_pinecone_api_key'];
        $environment = $pinecone_options['mxchat_pinecone_environment'] ?? '';
        $index_name = $pinecone_options['mxchat_pinecone_index'] ?? '';
        $namespace = $pinecone_options['mxchat_pinecone_namespace'] ?? '';
    } else {
        $bot_pinecone_config = apply_filters('mxchat_get_bot_pinecone_config', array(), $bot_id);
        if (empty($bot_pinecone_config)) {
            // Fallback to default configuration
            $pinecone_options = get_option('mxchat_pinecone_addon_options');
            $api_key = $pinecone_options['mxchat_pinecone_api_key'];
            $environment = $pinecone_options['mxchat_pinecone_environment'] ?? '';
            $index_name = $pinecone_options['mxchat_pinecone_index'] ?? '';
            $namespace = $pinecone_options['mxchat_pinecone_namespace'] ?? '';
        } else {
            $api_key = $bot_pinecone_config['api_key'];
            $environment = ''; // Not used in new Pinecone API
            $index_name = ''; // Not used in new Pinecone API
            $namespace = $bot_pinecone_config['namespace'] ?? '';
        }
    }
    
    $result = self::store_in_pinecone_main(
        $embedding_vector,
        $content,
        $source_url,
        $api_key,
        $environment,
        $index_name,
        $vector_id,
        $bot_id,
        $namespace
    );
    
    if (is_wp_error($result)) {
        //error_log('[MXCHAT-PINECONE] Pinecone storage failed for bot ' . $bot_id . ': ' . $result->get_error_message());
        return $result;
    }
    
    //error_log('[MXCHAT-PINECONE] Pinecone storage completed successfully for bot ' . $bot_id);
    return true;
}

/**
 * Store content in WordPress database with progressive fallback (unchanged)
 */
private static function store_in_wordpress_db($safe_content, $source_url, $embedding_vector_serialized, $table_name) {
    global $wpdb;
    
    //error_log('[MXCHAT-DB] ===== Using WordPress-only storage =====');
    
    // ===== FIXED: Generate unique identifier for manual content =====
    $original_source_url = $source_url;
    $is_manual_content = empty($source_url) || $source_url === '' || !filter_var($source_url, FILTER_VALIDATE_URL);
    
    if ($is_manual_content) {
        // Generate unique identifier for manual content to prevent overwrites
        $source_url = 'mxchat://manual-content/' . time() . '-' . wp_generate_password(8, false);
        //error_log('[MXCHAT-DB] Generated unique ID for manual content: ' . $source_url);
    }
    
    // Only check for duplicates if we have a valid source URL (not manual content)
    $existing_id = null;
    if (!$is_manual_content) {
        $existing_id = $wpdb->get_var(
            $wpdb->prepare(
                "SELECT id FROM {$table_name} WHERE source_url = %s LIMIT 1",
                $source_url
            )
        );
        //error_log('[MXCHAT-DB] Checked for existing URL, found ID: ' . ($existing_id ?: 'none'));
    } else {
        //error_log('[MXCHAT-DB] Manual content - will create new entry (no duplicate check)');
    }
    // ===== END FIX =====
    
    // Progressive fallback mechanism for problematic content
    $attempt = 1;
    $max_attempts = 3;
    $current_content = $safe_content;
    $result = false;
    
    while ($attempt <= $max_attempts && $result === false) {
        try {
            if ($existing_id) {
                //error_log('[MXCHAT-DB] Found existing entry (ID: ' . $existing_id . '). Updating... (Attempt ' . $attempt . ')');
                
                // Update the existing row
                $result = $wpdb->update(
                    $table_name,
                    array(
                        'url'              => $source_url,
                        'article_content'  => $current_content,
                        'embedding_vector' => $embedding_vector_serialized,
                        'source_url'       => $source_url,
                        'timestamp'        => current_time('mysql'),
                    ),
                    array('id' => $existing_id),
                    array('%s','%s','%s','%s','%s'),
                    array('%d')
                );
            } else {
                //error_log('[MXCHAT-DB] No existing entry found. Inserting new row... (Attempt ' . $attempt . ')');
                //error_log('[MXCHAT-DB] Content sample: ' . substr($current_content, 0, 1000));
                
                // Insert a new row (using generated unique ID for manual content)
                $result = $wpdb->insert(
                    $table_name,
                    array(
                        'url'              => $source_url, // Now unique for manual content
                        'article_content'  => $current_content,
                        'embedding_vector' => $embedding_vector_serialized,
                        'source_url'       => $source_url, // Now unique for manual content
                        'timestamp'        => current_time('mysql'),
                    ),
                    array('%s','%s','%s','%s','%s')
                );
            }
            
            if ($result === false) {
                //error_log('[MXCHAT-DB] Database operation failed (Attempt ' . $attempt . '): ' . $wpdb->last_error);
                
                // Progressively apply more aggressive sanitization on failure
                if ($attempt === 1) {
                    // First fallback: Use a more aggressive character filter and shorten
                    $current_content = preg_replace('/[^\p{L}\p{N}\s.,;:!?()-]/u', '', $current_content);
                    $current_content = substr($current_content, 0, 50000);
                } else if ($attempt === 2) {
                    // Second fallback: Keep only alphanumeric and basic punctuation, shorten further
                    $current_content = preg_replace('/[^a-zA-Z0-9\s.,;:!?()-]/u', '', $current_content);
                    $current_content = substr($current_content, 0, 30000);
                }
                
                $attempt++;
            }
        } catch (Exception $e) {
            //error_log('[MXCHAT-DB] Exception during database operation: ' . $e->getMessage());
            $attempt++;
        }
    }
    
    if ($result === false) {
        //error_log('[MXCHAT-DB] All database operation attempts failed');
        return new WP_Error('database_failed', 'Failed to store content in WordPress database after ' . $max_attempts . ' attempts');
    }
    
    //error_log('[MXCHAT-DB] WordPress database operation completed successfully (Attempt ' . ($attempt - 1) . ')');
    return true;
}

/**
 * UPDATED: Store content in Pinecone database with bot support
 */
private static function store_in_pinecone_main($embedding_vector, $content, $url, $api_key, $environment, $index_name, $vector_id = null, $bot_id = 'default', $namespace = '') {
    //error_log('[MXCHAT-PINECONE-MAIN] ===== Starting Pinecone storage for bot ' . $bot_id . ' =====');
    
    // ===== UPDATED: Handle manual content with unique vector IDs =====
    if ($vector_id) {
        // Use provided vector ID
        //error_log('[MXCHAT-PINECONE-MAIN] Using provided vector ID: ' . $vector_id);
    } elseif (!empty($url) && filter_var($url, FILTER_VALIDATE_URL)) {
        // For valid URLs, use URL-based ID (existing behavior)
        $vector_id = md5($url);
        //error_log('[MXCHAT-PINECONE-MAIN] Generated vector ID from URL: ' . $vector_id);
    } else {
        // For manual content (empty/invalid URL), generate unique ID
        $vector_id = 'manual_' . time() . '_' . substr(md5($content . microtime(true)), 0, 8);
        //error_log('[MXCHAT-PINECONE-MAIN] Generated unique vector ID for manual content: ' . $vector_id);
    }
    // ===== END UPDATE =====
    
    // Get host from bot-specific config or fallback to default
    if ($bot_id === 'default' || !class_exists('MxChat_Multi_Bot_Manager')) {
        $options = get_option('mxchat_pinecone_addon_options');
        $host = $options['mxchat_pinecone_host'] ?? '';
    } else {
        $bot_pinecone_config = apply_filters('mxchat_get_bot_pinecone_config', array(), $bot_id);
        if (!empty($bot_pinecone_config)) {
            $host = $bot_pinecone_config['host'] ?? '';
        } else {
            $options = get_option('mxchat_pinecone_addon_options');
            $host = $options['mxchat_pinecone_host'] ?? '';
        }
    }
    
    //error_log('[MXCHAT-PINECONE-MAIN] Host: ' . $host);
    //error_log('[MXCHAT-PINECONE-MAIN] API key length: ' . strlen($api_key));
    //error_log('[MXCHAT-PINECONE-MAIN] Bot ID: ' . $bot_id);
    //error_log('[MXCHAT-PINECONE-MAIN] Namespace: ' . $namespace);

    if (empty($host)) {
        //error_log('[MXCHAT-PINECONE-MAIN] ERROR: Host is empty');
        return new WP_Error('pinecone_config', 'Pinecone host is not configured. Please set the host in your bot settings.');
    }

    // ===== UPDATED: Determine content type more accurately =====
    $is_product = false;
    $content_type = 'manual'; // Default for manual content
    
    if (!empty($url) && filter_var($url, FILTER_VALIDATE_URL)) {
        $is_product = (strpos($url, '/product/') !== false || strpos($url, '/shop/') !== false);
        $content_type = $is_product ? 'product' : 'content';
    }
    
    //error_log('[MXCHAT-PINECONE-MAIN] Content type: ' . $content_type);
    // ===== END UPDATE =====

    $api_endpoint = "https://{$host}/vectors/upsert";
    //error_log('[MXCHAT-PINECONE-MAIN] API endpoint: ' . $api_endpoint);
    
    // UPDATED: Add bot_id to metadata and handle namespace
    $metadata = array(
        'text' => $content,
        'source_url' => $url, // Can be empty for manual content
        'type' => $content_type, // 'manual', 'content', or 'product'
        'last_updated' => time(),
        'created_at' => time(), // Add creation timestamp
        'bot_id' => $bot_id // Add bot identification
    );
    
    $vector_data = array(
        'id' => $vector_id,
        'values' => $embedding_vector,
        'metadata' => $metadata
    );
    
    $request_body = array(
        'vectors' => array($vector_data)
    );
    
    // Add namespace if specified for multi-bot separation
    if (!empty($namespace)) {
        $request_body['namespace'] = $namespace;
        //error_log('[MXCHAT-PINECONE-MAIN] Using namespace: ' . $namespace);
    }
    
    //error_log('[MXCHAT-PINECONE-MAIN] Request body prepared (embedding dimensions: ' . count($embedding_vector) . ')');

    $response = wp_remote_post($api_endpoint, array(
        'headers' => array(
            'Api-Key' => $api_key,
            'accept' => 'application/json',
            'content-type' => 'application/json'
        ),
        'body' => wp_json_encode($request_body),
        'timeout' => 30,
        'data_format' => 'body'
    ));

    if (is_wp_error($response)) {
        //error_log('[MXCHAT-PINECONE-MAIN] WordPress request error: ' . $response->get_error_message());
        return new WP_Error('pinecone_request', $response->get_error_message());
    }

    $response_code = wp_remote_retrieve_response_code($response);
    //error_log('[MXCHAT-PINECONE-MAIN] Response code: ' . $response_code);
    
    if ($response_code !== 200) {
        $body = wp_remote_retrieve_body($response);
        //error_log('[MXCHAT-PINECONE-MAIN] API error - Response body: ' . $body);
        return new WP_Error('pinecone_api', sprintf(
            'Pinecone API error (HTTP %d): %s',
            $response_code,
            $body
        ));
    }

    $response_body = wp_remote_retrieve_body($response);
    //error_log('[MXCHAT-PINECONE-MAIN] Success response: ' . $response_body);
    //error_log('[MXCHAT-PINECONE-MAIN] Successfully stored in Pinecone for bot ' . $bot_id);
    //error_log('[MXCHAT-PINECONE-MAIN] ===== Pinecone storage complete =====');
    
    return true;
}

/**
 * UPDATED: Generate an embedding for the given text using bot-specific configuration.
 *
 * @param string $text    The text to be embedded.
 * @param string $api_key The API key used for generating embeddings.
 * @param string $bot_id  The bot ID for multi-bot support
 * @return array|null     The embedding vector or null on failure.
 */
private static function generate_embedding($text, $api_key, $bot_id = 'default') {
    // Get bot-specific options
    if ($bot_id === 'default' || !class_exists('MxChat_Multi_Bot_Manager')) {
        $options = get_option('mxchat_options');
    } else {
        $bot_options = apply_filters('mxchat_get_bot_options', array(), $bot_id);
        $options = !empty($bot_options) ? $bot_options : get_option('mxchat_options');
    }
    
    $selected_model = $options['embedding_model'] ?? 'text-embedding-ada-002';
    
    // Determine endpoint and API key based on model
    if (strpos($selected_model, 'voyage') === 0) {
        $endpoint = 'https://api.voyageai.com/v1/embeddings';
        $api_key = $options['voyage_api_key'] ?? '';
    } elseif (strpos($selected_model, 'gemini-embedding') === 0) {
        $endpoint = 'https://generativelanguage.googleapis.com/v1beta/models/' . $selected_model . ':embedContent';
        $api_key = $options['gemini_api_key'] ?? '';
    } else {
        $endpoint = 'https://api.openai.com/v1/embeddings';
        // Use the bot-specific API key or fallback to passed API key
        $api_key = $options['api_key'] ?? $api_key;
    }
    
    // Prepare request body based on provider
    if (strpos($selected_model, 'gemini-embedding') === 0) {
        // Gemini API format
        $request_body = [
            'model' => 'models/' . $selected_model,
            'content' => [
                'parts' => [
                    ['text' => $text]
                ]
            ],
            'outputDimensionality' => 1536
        ];
        
        // Prepare headers for Gemini (API key as query parameter)
        $endpoint .= '?key=' . $api_key;
        $headers = [
            'Content-Type' => 'application/json'
        ];
    } else {
        // OpenAI/Voyage API format
        $request_body = [
            'input' => $text,
            'model' => $selected_model
        ];
        
        // Add output_dimension for voyage-3-large
        if ($selected_model === 'voyage-3-large') {
            $request_body['output_dimension'] = 2048;
        }
        
        // Prepare headers for OpenAI/Voyage
        $headers = [
            'Content-Type' => 'application/json',
            'Authorization' => 'Bearer ' . $api_key
        ];
    }
    
    $args = [
        'body'        => wp_json_encode($request_body),
        'headers'     => $headers,
        'timeout'     => 60,
        'redirection' => 5,
        'blocking'    => true,
        'httpversion' => '1.0',
        'sslverify'   => true,
    ];
    
    $response = wp_remote_post($endpoint, $args);
    
    if (is_wp_error($response)) {
        //error_log('Error generating embedding for bot ' . $bot_id . ': ' . $response->get_error_message());
        return null;
    }
    
    $response_body = json_decode(wp_remote_retrieve_body($response), true);
    
    // Handle different response formats based on provider
    if (strpos($selected_model, 'gemini-embedding') === 0) {
        // Gemini API response format
        if (isset($response_body['embedding']['values']) && is_array($response_body['embedding']['values'])) {
            return $response_body['embedding']['values'];
        } else {
            //error_log('Invalid response received from Gemini embedding API for bot ' . $bot_id . ': ' . wp_json_encode($response_body));
            return null;
        }
    } else {
        // OpenAI/Voyage API response format
        if (isset($response_body['data'][0]['embedding']) && is_array($response_body['data'][0]['embedding'])) {
            return $response_body['data'][0]['embedding'];
        } else {
            //error_log('Invalid response received from embedding API for bot ' . $bot_id . ': ' . wp_json_encode($response_body));
            return null;
        }
    }
}
}