LLM支持优化

2026-05-26 17:43:42 +08:00
parent a08bc809bb
commit 4331b9306e
26 changed files with 2309 additions and 669 deletions
--- a/src/llm/kimi.rs
+++ b/src/llm/kimi.rs
@@ -1,284 +1,571 @@
-use super::{create_http_client, LlmProvider};
-use anyhow::{bail, Context, Result};
-use async_trait::async_trait;
-use serde::{Deserialize, Serialize};
-use std::time::Duration;
-
-/// Kimi API client (Moonshot AI)
-pub struct KimiClient {
-    base_url: String,
-    api_key: String,
-    model: String,
-    client: reqwest::Client,
-    thinking_enabled: bool,
-}
-
-#[derive(Debug, Serialize)]
-struct ChatCompletionRequest {
-    model: String,
-    messages: Vec<Message>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    max_tokens: Option<u32>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    temperature: Option<f32>,
-    stream: bool,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    thinking: Option<ThinkingConfig>,
-}
-
-#[derive(Debug, Serialize)]
-struct ThinkingConfig {
-    #[serde(rename = "type")]
-    thinking_type: String,
-}
-
-#[derive(Debug, Serialize, Deserialize)]
-struct Message {
-    role: String,
-    content: String,
-}
-
-#[derive(Debug, Deserialize)]
-struct ChatCompletionResponse {
-    choices: Vec<Choice>,
-}
-
-#[derive(Debug, Deserialize)]
-struct Choice {
-    message: Message,
-    #[serde(default)]
-    reasoning_content: Option<String>,
-}
-
-#[derive(Debug, Deserialize)]
-struct ErrorResponse {
-    error: ApiError,
-}
-
-#[derive(Debug, Deserialize)]
-struct ApiError {
-    message: String,
-    #[serde(rename = "type")]
-    error_type: String,
-}
-
-impl KimiClient {
-    /// Create new Kimi client
-    pub fn new(api_key: &str, model: &str) -> Result<Self> {
-        let client = create_http_client(Duration::from_secs(60))?;
-
-        Ok(Self {
-            base_url: "https://api.moonshot.cn/v1".to_string(),
-            api_key: api_key.to_string(),
-            model: model.to_string(),
-            client,
-            thinking_enabled: false,
-        })
-    }
-
-    /// Create with custom base URL
-    pub fn with_base_url(api_key: &str, model: &str, base_url: &str) -> Result<Self> {
-        let client = create_http_client(Duration::from_secs(60))?;
-
-        Ok(Self {
-            base_url: base_url.trim_end_matches('/').to_string(),
-            api_key: api_key.to_string(),
-            model: model.to_string(),
-            client,
-            thinking_enabled: false,
-        })
-    }
-
-    /// Set timeout
-    pub fn with_timeout(mut self, timeout: Duration) -> Result<Self> {
-        self.client = create_http_client(timeout)?;
-        Ok(self)
-    }
-
-    /// Enable or disable thinking mode
-    pub fn with_thinking(mut self, enabled: bool) -> Self {
-        self.thinking_enabled = enabled;
-        self
-    }
-
-    /// List available models
-    pub async fn list_models(&self) -> Result<Vec<String>> {
-        let url = format!("{}/models", self.base_url);
-
-        let response = self.client
-            .get(&url)
-            .header("Authorization", format!("Bearer {}", self.api_key))
-            .send()
-            .await
-            .context("Failed to list Kimi models")?;
-
-        if !response.status().is_success() {
-            let status = response.status();
-            let text = response.text().await.unwrap_or_default();
-            bail!("Kimi API error: {} - {}", status, text);
-        }
-
-        #[derive(Deserialize)]
-        struct ModelsResponse {
-            data: Vec<Model>,
-        }
-
-        #[derive(Deserialize)]
-        struct Model {
-            id: String,
-        }
-
-        let result: ModelsResponse = response
-            .json()
-            .await
-            .context("Failed to parse Kimi response")?;
-
-        Ok(result.data.into_iter().map(|m| m.id).collect())
-    }
-
-    /// Validate API key
-    pub async fn validate_key(&self) -> Result<bool> {
-        match self.list_models().await {
-            Ok(_) => Ok(true),
-            Err(e) => {
-                let err_str = e.to_string();
-                if err_str.contains("401") || err_str.contains("Unauthorized") {
-                    Ok(false)
-                } else {
-                    Err(e)
-                }
-            }
-        }
-    }
-}
-
-#[async_trait]
-impl LlmProvider for KimiClient {
-    async fn generate(&self, prompt: &str) -> Result<String> {
-        let messages = vec![
-            Message {
-                role: "user".to_string(),
-                content: prompt.to_string(),
-            },
-        ];
-
-        self.chat_completion(messages).await
-    }
-
-    async fn generate_with_system(&self, system: &str, user: &str) -> Result<String> {
-        let mut messages = vec![];
-
-        if !system.is_empty() {
-            messages.push(Message {
-                role: "system".to_string(),
-                content: system.to_string(),
-            });
-        }
-
-        messages.push(Message {
-            role: "user".to_string(),
-            content: user.to_string(),
-        });
-
-        self.chat_completion(messages).await
-    }
-
-    async fn is_available(&self) -> bool {
-        self.validate_key().await.unwrap_or(false)
-    }
-
-    fn name(&self) -> &str {
-        "kimi"
-    }
-}
-
-impl KimiClient {
-    async fn chat_completion(&self, messages: Vec<Message>) -> Result<String> {
-        let url = format!("{}/chat/completions", self.base_url);
-
-        let thinking = if self.thinking_enabled {
-            Some(ThinkingConfig {
-                thinking_type: "enabled".to_string(),
-            })
-        } else {
-            None
-        };
-
-        let request = ChatCompletionRequest {
-            model: self.model.clone(),
-            messages,
-            max_tokens: Some(500),
-            temperature: Some(1.0),
-            stream: false,
-            thinking,
-        };
-
-        let response = self.client
-            .post(&url)
-            .header("Authorization", format!("Bearer {}", self.api_key))
-            .header("Content-Type", "application/json")
-            .json(&request)
-            .send()
-            .await
-            .context("Failed to send request to Kimi")?;
-
-        let status = response.status();
-
-        if !status.is_success() {
-            let text = response.text().await.unwrap_or_default();
-
-            if let Ok(error) = serde_json::from_str::<ErrorResponse>(&text) {
-                bail!("Kimi API error: {} ({})", error.error.message, error.error.error_type);
-            }
-
-            bail!("Kimi API error: {} - {}", status, text);
-        }
-
-        let result: ChatCompletionResponse = response
-            .json()
-            .await
-            .context("Failed to parse Kimi response")?;
-
-        result.choices
-            .into_iter()
-            .next()
-            .map(|c| {
-                let content = c.message.content.trim().to_string();
-                if content.is_empty() {
-                    c.reasoning_content
-                        .map(|r| r.trim().to_string())
-                        .unwrap_or_default()
-                } else {
-                    content
-                }
-            })
-            .filter(|s| !s.is_empty())
-            .ok_or_else(|| anyhow::anyhow!(
-                "No response from Kimi. \
-                 If thinking mode is enabled, try disabling it or ensure the model supports it."
-            ))
-    }
-}
-
-/// Available Kimi models
-pub const KIMI_MODELS: &[&str] = &[
-    "moonshot-v1-8k",
-    "moonshot-v1-32k",
-    "moonshot-v1-128k",
-];
-
-/// Check if a model name is valid
-pub fn is_valid_model(model: &str) -> bool {
-    KIMI_MODELS.contains(&model)
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_model_validation() {
-        assert!(is_valid_model("moonshot-v1-8k"));
-        assert!(!is_valid_model("invalid-model"));
-    }
-}
+use super::thinking::ThinkingStateManager;
+use super::{create_http_client, LlmProvider};
+use anyhow::{bail, Context, Result};
+use async_trait::async_trait;
+use serde::{Deserialize, Serialize};
+use std::sync::Arc;
+use std::time::Duration;
+
+/// Kimi API client (Moonshot AI)
+pub struct KimiClient {
+    base_url: String,
+    api_key: String,
+    model: String,
+    client: reqwest::Client,
+    thinking_enabled: bool,
+    max_tokens: u32,
+    temperature: f32,
+    thinking_state: Option<Arc<ThinkingStateManager>>,
+}
+
+#[derive(Debug, Serialize)]
+struct ChatCompletionRequest {
+    model: String,
+    messages: Vec<Message>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    max_tokens: Option<u32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    temperature: Option<f32>,
+    stream: bool,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    thinking: Option<ThinkingConfig>,
+}
+
+#[derive(Debug, Serialize)]
+struct ThinkingConfig {
+    #[serde(rename = "type")]
+    thinking_type: String,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+struct Message {
+    role: String,
+    content: String,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    reasoning_content: Option<String>,
+}
+
+#[derive(Debug, Deserialize)]
+struct ChatCompletionResponse {
+    choices: Vec<Choice>,
+}
+
+#[derive(Debug, Deserialize)]
+struct Choice {
+    message: Message,
+    #[serde(default)]
+    reasoning_content: Option<String>,
+}
+
+// --- Streaming response structures ---
+
+#[derive(Debug, Deserialize)]
+struct StreamChunk {
+    choices: Vec<StreamChoice>,
+}
+
+#[derive(Debug, Deserialize)]
+struct StreamChoice {
+    delta: StreamDelta,
+    #[serde(default)]
+    finish_reason: Option<String>,
+    index: Option<u32>,
+}
+
+#[derive(Debug, Deserialize, Default)]
+struct StreamDelta {
+    #[serde(default)]
+    content: Option<String>,
+    #[serde(default)]
+    reasoning_content: Option<String>,
+}
+
+#[derive(Debug, Deserialize)]
+struct ErrorResponse {
+    error: ApiError,
+}
+
+#[derive(Debug, Deserialize)]
+struct ApiError {
+    message: String,
+    #[serde(rename = "type")]
+    error_type: String,
+}
+
+impl KimiClient {
+    pub fn new(api_key: &str, model: &str) -> Result<Self> {
+        let client = create_http_client(Duration::from_secs(300))?;
+
+        Ok(Self {
+            base_url: "https://api.moonshot.cn/v1".to_string(),
+            api_key: api_key.to_string(),
+            model: model.to_string(),
+            client,
+            thinking_enabled: false,
+            max_tokens: 500,
+            temperature: 1.0,
+            thinking_state: None,
+        })
+    }
+
+    pub fn with_base_url(api_key: &str, model: &str, base_url: &str) -> Result<Self> {
+        let client = create_http_client(Duration::from_secs(300))?;
+
+        Ok(Self {
+            base_url: base_url.trim_end_matches('/').to_string(),
+            api_key: api_key.to_string(),
+            model: model.to_string(),
+            client,
+            thinking_enabled: false,
+            max_tokens: 500,
+            temperature: 1.0,
+            thinking_state: None,
+        })
+    }
+
+    pub fn with_timeout(mut self, timeout: Duration) -> Result<Self> {
+        self.client = create_http_client(timeout)?;
+        Ok(self)
+    }
+
+    pub fn with_thinking(mut self, enabled: bool) -> Self {
+        self.thinking_enabled = enabled;
+        self
+    }
+
+    pub fn with_max_tokens(mut self, max_tokens: u32) -> Self {
+        self.max_tokens = max_tokens;
+        self
+    }
+
+    pub fn with_temperature(mut self, temperature: f32) -> Self {
+        self.temperature = temperature;
+        self
+    }
+
+    pub fn with_thinking_state(mut self, state: Arc<ThinkingStateManager>) -> Self {
+        self.thinking_state = Some(state);
+        self
+    }
+
+    pub async fn list_models(&self) -> Result<Vec<String>> {
+        let url = format!("{}/models", self.base_url);
+
+        let response = self
+            .client
+            .get(&url)
+            .header("Authorization", format!("Bearer {}", self.api_key))
+            .send()
+            .await
+            .context("Failed to list Kimi models")?;
+
+        if !response.status().is_success() {
+            let status = response.status();
+            let text = response.text().await.unwrap_or_default();
+            bail!("Kimi API error: {} - {}", status, text);
+        }
+
+        #[derive(Deserialize)]
+        struct ModelsResponse {
+            data: Vec<ModelId>,
+        }
+
+        #[derive(Deserialize)]
+        struct ModelId {
+            id: String,
+        }
+
+        let result: ModelsResponse = response
+            .json()
+            .await
+            .context("Failed to parse Kimi response")?;
+
+        Ok(result.data.into_iter().map(|m| m.id).collect())
+    }
+
+    pub async fn validate_key(&self) -> Result<bool> {
+        match self.list_models().await {
+            Ok(_) => Ok(true),
+            Err(e) => {
+                let err_str = e.to_string();
+                if err_str.contains("401") || err_str.contains("Unauthorized") {
+                    Ok(false)
+                } else {
+                    Err(e)
+                }
+            }
+        }
+    }
+}
+
+#[async_trait]
+impl LlmProvider for KimiClient {
+    async fn generate(&self, prompt: &str) -> Result<String> {
+        let messages = vec![Message {
+            role: "user".to_string(),
+            content: prompt.to_string(),
+            reasoning_content: None,
+        }];
+
+        self.chat_completion_with_retry(messages).await
+    }
+
+    async fn generate_with_system(&self, system: &str, user: &str) -> Result<String> {
+        let mut messages = vec![];
+
+        if !system.is_empty() {
+            messages.push(Message {
+                role: "system".to_string(),
+                content: system.to_string(),
+                reasoning_content: None,
+            });
+        }
+
+        messages.push(Message {
+            role: "user".to_string(),
+            content: user.to_string(),
+            reasoning_content: None,
+        });
+
+        self.chat_completion_with_retry(messages).await
+    }
+
+    async fn is_available(&self) -> bool {
+        self.validate_key().await.unwrap_or(false)
+    }
+
+    fn name(&self) -> &str {
+        "kimi"
+    }
+}
+
+impl KimiClient {
+    async fn chat_completion_with_retry(&self, messages: Vec<Message>) -> Result<String> {
+        let mut last_error = None;
+
+        for attempt in 1..=3 {
+            match self.chat_completion(messages.clone()).await {
+                Ok(result) => return Ok(result),
+                Err(e) => {
+                    let err_msg = e.to_string();
+                    let is_retryable = err_msg.contains("timeout")
+                        || err_msg.contains("connection")
+                        || err_msg.contains("temporary")
+                        || err_msg.contains("5")
+                            && (err_msg.contains("500")
+                                || err_msg.contains("502")
+                                || err_msg.contains("503")
+                                || err_msg.contains("504"));
+
+                    if !is_retryable || attempt == 3 {
+                        last_error = Some(e);
+                        break;
+                    }
+
+                    tokio::time::sleep(Duration::from_millis(500 * 2u64.pow(attempt - 1))).await;
+                }
+            }
+        }
+
+        Err(last_error.unwrap_or_else(|| anyhow::anyhow!("Request failed after retries")))
+    }
+
+    async fn chat_completion(&self, messages: Vec<Message>) -> Result<String> {
+        let url = format!("{}/chat/completions", self.base_url);
+
+        let thinking = if self.thinking_enabled {
+            Some(ThinkingConfig {
+                thinking_type: "enabled".to_string(),
+            })
+        } else {
+            None
+        };
+
+        // 对于 kimi-k2.6 等支持思考模式的模型，使用默认 temperature 即可
+        // 思考模式下不显式指定 temperature
+        let temperature = if self.thinking_enabled {
+            None
+        } else {
+            Some(self.temperature)
+        };
+
+        let request = ChatCompletionRequest {
+            model: self.model.clone(),
+            messages: messages.clone(),
+            max_tokens: Some(self.max_tokens),
+            temperature,
+            stream: self.thinking_enabled,
+            thinking,
+        };
+
+        if self.thinking_enabled {
+            self.streaming_chat_completion(&url, &request).await
+        } else {
+            self.non_streaming_chat_completion(&url, &request).await
+        }
+    }
+
+    /// 非流式请求（非思考模式）
+    async fn non_streaming_chat_completion(
+        &self,
+        url: &str,
+        request: &ChatCompletionRequest,
+    ) -> Result<String> {
+        let response = self
+            .client
+            .post(url)
+            .header("Authorization", format!("Bearer {}", self.api_key))
+            .header("Content-Type", "application/json")
+            .json(request)
+            .send()
+            .await
+            .context("Failed to send request to Kimi")?;
+
+        let status = response.status();
+
+        if !status.is_success() {
+            let text = response.text().await.unwrap_or_default();
+
+            if let Ok(error) = serde_json::from_str::<ErrorResponse>(&text) {
+                bail!(
+                    "Kimi API error: {} ({})",
+                    error.error.message,
+                    error.error.error_type
+                );
+            }
+
+            bail!("Kimi API error: {} - {}", status, text);
+        }
+
+        let result: ChatCompletionResponse = response
+            .json()
+            .await
+            .context("Failed to parse Kimi response")?;
+
+        result
+            .choices
+            .into_iter()
+            .next()
+            .map(|c| c.message.content.trim().to_string())
+            .filter(|s| !s.is_empty())
+            .ok_or_else(|| anyhow::anyhow!("No response from Kimi"))
+    }
+
+    /// 流式请求（思考模式），处理 reasoning_content 和 content
+    async fn streaming_chat_completion(
+        &self,
+        url: &str,
+        request: &ChatCompletionRequest,
+    ) -> Result<String> {
+        let response = self
+            .client
+            .post(url)
+            .header("Authorization", format!("Bearer {}", self.api_key))
+            .header("Content-Type", "application/json")
+            .header("Accept", "text/event-stream")
+            .json(request)
+            .send()
+            .await
+            .context("Failed to send streaming request to Kimi")?;
+
+        let status = response.status();
+
+        if !status.is_success() {
+            let text = response.text().await.unwrap_or_default();
+
+            if let Ok(error) = serde_json::from_str::<ErrorResponse>(&text) {
+                bail!(
+                    "Kimi API error: {} ({})",
+                    error.error.message,
+                    error.error.error_type
+                );
+            }
+
+            bail!("Kimi API error: {} - {}", status, text);
+        }
+
+        let mut content_buffer = String::new();
+        let mut has_reasoning = false;
+        let mut has_content = false;
+        let mut stream_ended = false;
+
+        let thinking_state = self.thinking_state.as_ref();
+
+        let mut byte_stream = response.bytes_stream();
+        let mut line_buffer = String::new();
+
+        use futures_util::StreamExt;
+
+        while let Some(chunk) = byte_stream.next().await {
+            let chunk = chunk.context("Failed to read streaming response chunk")?;
+            let chunk_str =
+                String::from_utf8(chunk.to_vec()).context("Invalid UTF-8 in stream chunk")?;
+
+            line_buffer.push_str(&chunk_str);
+
+            while let Some(line_end) = line_buffer.find('\n') {
+                let line = line_buffer[..line_end].trim().to_string();
+                line_buffer = line_buffer[line_end + 1..].to_string();
+
+                if line.is_empty() {
+                    continue;
+                }
+
+                if line == "data: [DONE]" {
+                    stream_ended = true;
+                    break;
+                }
+
+                if let Some(json_str) = line.strip_prefix("data: ") {
+                    match serde_json::from_str::<StreamChunk>(json_str) {
+                        Ok(chunk) => {
+                            for choice in &chunk.choices {
+                                if let Some(ref reasoning) = choice.delta.reasoning_content
+                                    && !reasoning.is_empty() {
+                                        if !has_reasoning {
+                                            has_reasoning = true;
+                                            if let Some(state) = thinking_state {
+                                                state.start_thinking();
+                                            }
+                                        }
+                                        continue;
+                                    }
+
+                                if let Some(ref content) = choice.delta.content
+                                    && !content.is_empty() {
+                                        if has_reasoning && !has_content
+                                            && let Some(state) = thinking_state {
+                                                state.end_thinking();
+                                            }
+                                        has_content = true;
+                                        content_buffer.push_str(content);
+                                    }
+
+                                if let Some(ref reason) = choice.finish_reason
+                                    && reason == "stop" {
+                                        stream_ended = true;
+                                    }
+                            }
+                        }
+                        Err(_) => {
+                            // 忽略无法解析的行
+                        }
+                    }
+                }
+            }
+
+            if stream_ended {
+                break;
+            }
+        }
+
+        // 确保思考状态已结束
+        if let Some(state) = thinking_state {
+            state.end_thinking();
+        }
+
+        let result = content_buffer.trim().to_string();
+
+        if result.is_empty() {
+            if has_reasoning && !has_content {
+                bail!(
+                    "Kimi returned reasoning content but no final answer. \
+                     The model may have entered an incomplete thinking state. \
+                     Please try again or disable thinking mode."
+                );
+            }
+            bail!(
+                "No response from Kimi. \
+                 If thinking mode is enabled, try disabling it or ensure the model supports it."
+            );
+        }
+
+        Ok(result)
+    }
+}
+
+/// 可用 Kimi 模型列表
+pub const KIMI_MODELS: &[&str] = &[
+    // K2 系列（推荐）
+    "kimi-k2.6",
+    "kimi-k2.5",
+    "kimi-k2-thinking",
+    "kimi-k2-thinking-turbo",
+    "kimi-k2-instruct",
+    "kimi-k2-instruct-0905",
+    // 兼容旧版模型 ID
+    "moonshot-v1-8k",
+    "moonshot-v1-32k",
+    "moonshot-v1-128k",
+];
+
+pub fn is_valid_model(model: &str) -> bool {
+    KIMI_MODELS.contains(&model)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_model_validation_k2() {
+        assert!(is_valid_model("kimi-k2.6"));
+        assert!(is_valid_model("kimi-k2.5"));
+        assert!(is_valid_model("kimi-k2-thinking"));
+        assert!(is_valid_model("kimi-k2-thinking-turbo"));
+        assert!(is_valid_model("moonshot-v1-8k"));
+        assert!(is_valid_model("moonshot-v1-32k"));
+        assert!(is_valid_model("moonshot-v1-128k"));
+        assert!(!is_valid_model("invalid-model"));
+        assert!(!is_valid_model("kimi-k1.5"));
+    }
+
+    #[test]
+    fn test_client_builder_defaults() {
+        let client = KimiClient::new("test-key", "kimi-k2.6").unwrap();
+        assert!(!client.thinking_enabled);
+        assert_eq!(client.max_tokens, 500);
+        assert_eq!(client.temperature, 1.0);
+        assert!(client.thinking_state.is_none());
+    }
+
+    #[test]
+    fn test_client_builder_with_thinking() {
+        let client = KimiClient::new("test-key", "kimi-k2.6")
+            .unwrap()
+            .with_thinking(true)
+            .with_max_tokens(1000)
+            .with_temperature(0.5);
+
+        assert!(client.thinking_enabled);
+        assert_eq!(client.max_tokens, 1000);
+        assert_eq!(client.temperature, 0.5);
+    }
+
+    #[test]
+    fn test_thinking_config_serialization() {
+        let config = ThinkingConfig {
+            thinking_type: "enabled".to_string(),
+        };
+        let json = serde_json::to_string(&config).unwrap();
+        assert_eq!(json, r#"{"type":"enabled"}"#);
+    }
+
+    #[test]
+    fn test_client_new_defaults() {
+        let client = KimiClient::new("test-key", "kimi-k2.6").unwrap();
+        assert_eq!(client.name(), "kimi");
+        assert!(!client.thinking_enabled);
+    }
+
+    #[test]
+    fn test_message_serialization() {
+        let msg = Message {
+            role: "user".to_string(),
+            content: "Hello".to_string(),
+            reasoning_content: None,
+        };
+        let json = serde_json::to_string(&msg).unwrap();
+        assert!(!json.contains("reasoning_content"));
+    }
+}