Spring AI框架結(jié)合Sentinel實(shí)現(xiàn)限流功能
場景描述
構(gòu)建一個(gè)AI智能客服系統(tǒng),需要對(duì)AI接口調(diào)用進(jìn)行限流保護(hù),防止惡意請(qǐng)求或突發(fā)流量導(dǎo)致系統(tǒng)崩潰。
項(xiàng)目結(jié)構(gòu)和依賴
1. Maven依賴配置
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>3.2.0</version>
<relativePath/>
</parent>
<groupId>com.example</groupId>
<artifactId>ai-customer-service</artifactId>
<version>1.0.0</version>
<dependencies>
<!-- Spring Boot Starter -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<!-- Spring AI -->
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-openai-spring-boot-starter</artifactId>
<version>0.8.1</version>
</dependency>
<!-- Sentinel核心庫 -->
<dependency>
<groupId>com.alibaba.csp</groupId>
<artifactId>sentinel-core</artifactId>
<version>1.8.6</version>
</dependency>
<!-- Sentinel Spring Boot Starter -->
<dependency>
<groupId>com.alibaba.cloud</groupId>
<artifactId>spring-cloud-starter-alibaba-sentinel</artifactId>
<version>2022.0.0.0</version>
</dependency>
<!-- Sentinel Dashboard -->
<dependency>
<groupId>com.alibaba.csp</groupId>
<artifactId>sentinel-transport-simple-http</artifactId>
<version>1.8.6</version>
</dependency>
<!-- Redis for rate limiting -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-redis</artifactId>
</dependency>
<!-- JSON處理 -->
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</dependency>
</dependencies>
</project>
2. 應(yīng)用配置
# application.yml
server:
port: 8080
spring:
application:
name: ai-customer-service
# Spring AI OpenAI配置
ai:
openai:
api-key: ${OPENAI_API_KEY:your-api-key-here}
base-url: https://api.openai.com
chat:
options:
model: gpt-3.5-turbo
temperature: 0.7
max-tokens: 1000
# Redis配置
redis:
host: localhost
port: 6379
database: 0
timeout: 2000ms
jedis:
pool:
max-active: 8
max-idle: 8
min-idle: 0
# Sentinel配置
management:
endpoints:
web:
exposure:
include: "*"
logging:
level:
com.alibaba.csp.sentinel: DEBUG
com.example: DEBUG
3. Sentinel配置類
package com.example.config;
import com.alibaba.csp.sentinel.annotation.aspectj.SentinelResourceAspect;
import com.alibaba.csp.sentinel.slots.block.RuleConstant;
import com.alibaba.csp.sentinel.slots.block.flow.FlowRule;
import com.alibaba.csp.sentinel.slots.block.flow.FlowRuleManager;
import com.alibaba.csp.sentinel.slots.block.flow.param.ParamFlowRule;
import com.alibaba.csp.sentinel.slots.block.flow.param.ParamFlowRuleManager;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import javax.annotation.PostConstruct;
import java.util.ArrayList;
import java.util.List;
@Configuration
public class SentinelConfig {
@Bean
public SentinelResourceAspect sentinelResourceAspect() {
return new SentinelResourceAspect();
}
@PostConstruct
public void initFlowRules() {
initBasicFlowRules();
initParamFlowRules();
}
/**
* 初始化基礎(chǔ)限流規(guī)則
*/
private void initBasicFlowRules() {
List<FlowRule> rules = new ArrayList<>();
// AI聊天接口限流規(guī)則 - 令牌桶算法
FlowRule chatRule = new FlowRule();
chatRule.setResource("ai-chat");
chatRule.setGrade(RuleConstant.FLOW_GRADE_QPS);
chatRule.setCount(10); // 每秒10個(gè)請(qǐng)求
chatRule.setControlBehavior(RuleConstant.CONTROL_BEHAVIOR_RATE_LIMITER);
rules.add(chatRule);
// AI問答接口限流規(guī)則 - 預(yù)熱算法
FlowRule qaRule = new FlowRule();
qaRule.setResource("ai-qa");
qaRule.setGrade(RuleConstant.FLOW_GRADE_QPS);
qaRule.setCount(20);
qaRule.setControlBehavior(RuleConstant.CONTROL_BEHAVIOR_WARM_UP);
qaRule.setWarmUpPeriodSec(30); // 30秒預(yù)熱時(shí)間
rules.add(qaRule);
// 知識(shí)庫查詢接口 - 排隊(duì)等待
FlowRule knowledgeRule = new FlowRule();
knowledgeRule.setResource("knowledge-search");
knowledgeRule.setGrade(RuleConstant.FLOW_GRADE_QPS);
knowledgeRule.setCount(15);
knowledgeRule.setControlBehavior(RuleConstant.CONTROL_BEHAVIOR_RATE_LIMITER);
knowledgeRule.setMaxQueueingTimeMs(500); // 最大等待500ms
rules.add(knowledgeRule);
FlowRuleManager.loadRules(rules);
}
/**
* 初始化熱點(diǎn)參數(shù)限流規(guī)則
*/
private void initParamFlowRules() {
List<ParamFlowRule> rules = new ArrayList<>();
// 基于用戶ID的限流
ParamFlowRule userRule = new ParamFlowRule();
userRule.setResource("ai-chat");
userRule.setParamIdx(0); // 第一個(gè)參數(shù)是用戶ID
userRule.setGrade(RuleConstant.FLOW_GRADE_QPS);
userRule.setCount(3); // 單個(gè)用戶每秒最多3次請(qǐng)求
userRule.setControlBehavior(RuleConstant.CONTROL_BEHAVIOR_RATE_LIMITER);
rules.add(userRule);
ParamFlowRuleManager.loadRules(rules);
}
}
4. 自定義限流異常處理
package com.example.handler;
import com.alibaba.csp.sentinel.slots.block.BlockException;
import com.alibaba.csp.sentinel.slots.block.flow.FlowException;
import com.alibaba.csp.sentinel.slots.block.flow.param.ParamFlowException;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component;
@Slf4j
@Component
public class SentinelBlockHandler {
/**
* AI聊天接口限流降級(jí)處理
*/
public static String handleAiChatBlock(String userId, String message, BlockException ex) {
log.warn("AI聊天接口被限流,用戶ID: {}, 異常類型: {}", userId, ex.getClass().getSimpleName());
if (ex instanceof FlowException) {
return "系統(tǒng)繁忙,請(qǐng)稍后再試。我們正在為您排隊(duì)處理...";
} else if (ex instanceof ParamFlowException) {
return "您的請(qǐng)求過于頻繁,請(qǐng)稍后再試。";
}
return "系統(tǒng)暫時(shí)無法處理您的請(qǐng)求,請(qǐng)稍后重試。";
}
/**
* AI問答接口限流降級(jí)處理
*/
public static String handleAiQaBlock(String question, BlockException ex) {
log.warn("AI問答接口被限流,問題: {}, 異常類型: {}", question, ex.getClass().getSimpleName());
return "當(dāng)前咨詢?nèi)藬?shù)較多,系統(tǒng)正在預(yù)熱中,請(qǐng)稍后再試。";
}
/**
* 知識(shí)庫搜索限流降級(jí)處理
*/
public static String handleKnowledgeSearchBlock(String keyword, BlockException ex) {
log.warn("知識(shí)庫搜索被限流,關(guān)鍵詞: {}", keyword);
return "知識(shí)庫查詢繁忙,請(qǐng)稍后再試或聯(lián)系人工客服。";
}
}
5. AI服務(wù)層
package com.example.service;
import com.alibaba.csp.sentinel.annotation.SentinelResource;
import com.alibaba.csp.sentinel.slots.block.BlockException;
import com.example.handler.SentinelBlockHandler;
import lombok.extern.slf4j.Slf4j;
import org.springframework.ai.chat.ChatClient;
import org.springframework.ai.chat.ChatResponse;
import org.springframework.ai.chat.messages.UserMessage;
import org.springframework.ai.chat.prompt.Prompt;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.time.LocalDateTime;
import java.util.concurrent.CompletableFuture;
@Service
@Slf4j
public class AiCustomerService {
@Autowired
private ChatClient chatClient;
/**
* AI聊天服務(wù) - 使用熱點(diǎn)參數(shù)限流
*/
@SentinelResource(
value = "ai-chat",
blockHandler = "handleAiChatBlock",
blockHandlerClass = SentinelBlockHandler.class
)
public String chatWithAi(String userId, String message) {
log.info("用戶 {} 發(fā)起聊天請(qǐng)求: {}", userId, message);
try {
// 構(gòu)建聊天上下文
String systemPrompt = "你是一個(gè)專業(yè)的客服助手,請(qǐng)禮貌、準(zhǔn)確地回答用戶問題。";
String fullPrompt = systemPrompt + "\n用戶問題: " + message;
Prompt prompt = new Prompt(new UserMessage(fullPrompt));
ChatResponse response = chatClient.call(prompt);
String aiResponse = response.getResult().getOutput().getContent();
log.info("AI回復(fù)用戶 {}: {}", userId, aiResponse);
return aiResponse;
} catch (Exception e) {
log.error("AI聊天服務(wù)異常,用戶ID: {}", userId, e);
return "抱歉,AI服務(wù)暫時(shí)不可用,請(qǐng)聯(lián)系人工客服。";
}
}
/**
* AI問答服務(wù) - 使用預(yù)熱限流
*/
@SentinelResource(
value = "ai-qa",
blockHandler = "handleAiQaBlock",
blockHandlerClass = SentinelBlockHandler.class
)
public String answerQuestion(String question) {
log.info("收到問答請(qǐng)求: {}", question);
try {
String prompt = String.format(
"作為客服專家,請(qǐng)簡潔準(zhǔn)確地回答以下問題:%s\n" +
"要求:1. 回答要專業(yè)且易懂 2. 控制在200字以內(nèi) 3. 如果不確定請(qǐng)說明",
question
);
ChatResponse response = chatClient.call(new Prompt(prompt));
return response.getResult().getOutput().getContent();
} catch (Exception e) {
log.error("AI問答服務(wù)異常", e);
return "抱歉,暫時(shí)無法回答您的問題,請(qǐng)稍后再試。";
}
}
/**
* 知識(shí)庫搜索 - 使用排隊(duì)等待限流
*/
@SentinelResource(
value = "knowledge-search",
blockHandler = "handleKnowledgeSearchBlock",
blockHandlerClass = SentinelBlockHandler.class
)
public String searchKnowledge(String keyword) {
log.info("知識(shí)庫搜索: {}", keyword);
try {
// 模擬知識(shí)庫搜索
Thread.sleep(100); // 模擬搜索耗時(shí)
String searchPrompt = String.format(
"基于關(guān)鍵詞 '%s' 搜索相關(guān)知識(shí),提供簡潔的信息摘要。",
keyword
);
ChatResponse response = chatClient.call(new Prompt(searchPrompt));
return response.getResult().getOutput().getContent();
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
return "搜索被中斷,請(qǐng)重試。";
} catch (Exception e) {
log.error("知識(shí)庫搜索異常", e);
return "知識(shí)庫搜索失敗,請(qǐng)聯(lián)系技術(shù)支持。";
}
}
/**
* 異步AI處理 - 用于處理復(fù)雜請(qǐng)求
*/
public CompletableFuture<String> processComplexRequest(String userId, String request) {
return CompletableFuture.supplyAsync(() -> {
try {
return chatWithAi(userId, request);
} catch (BlockException e) {
return "請(qǐng)求過于頻繁,已加入處理隊(duì)列,請(qǐng)稍后查看結(jié)果。";
}
});
}
}
6. 控制器層
package com.example.controller;
import com.example.service.AiCustomerService;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.*;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.CompletableFuture;
@RestController
@RequestMapping("/api/ai")
@Slf4j
public class AiCustomerController {
@Autowired
private AiCustomerService aiCustomerService;
/**
* AI聊天接口
*/
@PostMapping("/chat")
public ResponseEntity<Map<String, Object>> chat(
@RequestParam String userId,
@RequestBody Map<String, String> request) {
String message = request.get("message");
log.info("接收到聊天請(qǐng)求 - 用戶: {}, 消息: {}", userId, message);
Map<String, Object> response = new HashMap<>();
try {
String aiResponse = aiCustomerService.chatWithAi(userId, message);
response.put("success", true);
response.put("data", aiResponse);
response.put("timestamp", System.currentTimeMillis());
return ResponseEntity.ok(response);
} catch (Exception e) {
log.error("聊天接口異常", e);
response.put("success", false);
response.put("error", "服務(wù)暫時(shí)不可用");
return ResponseEntity.internalServerError().body(response);
}
}
/**
* AI問答接口
*/
@PostMapping("/qa")
public ResponseEntity<Map<String, Object>> qa(@RequestBody Map<String, String> request) {
String question = request.get("question");
log.info("接收到問答請(qǐng)求: {}", question);
Map<String, Object> response = new HashMap<>();
try {
String answer = aiCustomerService.answerQuestion(question);
response.put("success", true);
response.put("question", question);
response.put("answer", answer);
response.put("timestamp", System.currentTimeMillis());
return ResponseEntity.ok(response);
} catch (Exception e) {
log.error("問答接口異常", e);
response.put("success", false);
response.put("error", "問答服務(wù)暫時(shí)不可用");
return ResponseEntity.internalServerError().body(response);
}
}
/**
* 知識(shí)庫搜索接口
*/
@GetMapping("/knowledge/search")
public ResponseEntity<Map<String, Object>> searchKnowledge(@RequestParam String keyword) {
log.info("接收到知識(shí)庫搜索請(qǐng)求: {}", keyword);
Map<String, Object> response = new HashMap<>();
try {
String result = aiCustomerService.searchKnowledge(keyword);
response.put("success", true);
response.put("keyword", keyword);
response.put("result", result);
response.put("timestamp", System.currentTimeMillis());
return ResponseEntity.ok(response);
} catch (Exception e) {
log.error("知識(shí)庫搜索異常", e);
response.put("success", false);
response.put("error", "知識(shí)庫搜索服務(wù)暫時(shí)不可用");
return ResponseEntity.internalServerError().body(response);
}
}
/**
* 異步處理復(fù)雜請(qǐng)求
*/
@PostMapping("/chat/async")
public ResponseEntity<Map<String, Object>> chatAsync(
@RequestParam String userId,
@RequestBody Map<String, String> request) {
String message = request.get("message");
Map<String, Object> response = new HashMap<>();
CompletableFuture<String> future = aiCustomerService.processComplexRequest(userId, message);
response.put("success", true);
response.put("message", "請(qǐng)求已提交,正在處理中...");
response.put("userId", userId);
response.put("taskId", System.currentTimeMillis());
// 實(shí)際應(yīng)用中可以返回任務(wù)ID,客戶端輪詢結(jié)果
return ResponseEntity.ok(response);
}
}
7. 啟動(dòng)類
package com.example;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.scheduling.annotation.EnableAsync;
@SpringBootApplication
@EnableAsync
public class AiCustomerServiceApplication {
public static void main(String[] args) {
// 設(shè)置Sentinel Dashboard地址
System.setProperty("csp.sentinel.dashboard.server", "localhost:8080");
System.setProperty("project.name", "ai-customer-service");
SpringApplication.run(AiCustomerServiceApplication.class, args);
}
}
8. 測(cè)試用例
package com.example.test;
import org.junit.jupiter.api.Test;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.test.context.junit.jupiter.SpringJUnitConfig;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
@SpringJUnitConfig
@SpringBootTest
public class RateLimitTest {
@Test
public void testConcurrentRequests() throws InterruptedException {
ExecutorService executor = Executors.newFixedThreadPool(20);
CountDownLatch latch = new CountDownLatch(50);
for (int i = 0; i < 50; i++) {
final int requestId = i;
executor.submit(() -> {
try {
// 模擬并發(fā)請(qǐng)求
System.out.println("請(qǐng)求 " + requestId + " 開始");
Thread.sleep(100);
System.out.println("請(qǐng)求 " + requestId + " 完成");
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
} finally {
latch.countDown();
}
});
}
latch.await();
executor.shutdown();
}
}
使用示例
1. 啟動(dòng)應(yīng)用
mvn spring-boot:run
2. 測(cè)試接口
聊天接口測(cè)試:
curl -X POST "http://localhost:8080/api/ai/chat?userId=user123" \
-H "Content-Type: application/json" \
-d '{"message": "你好,我想了解產(chǎn)品信息"}'
問答接口測(cè)試:
curl -X POST "http://localhost:8080/api/ai/qa" \
-H "Content-Type: application/json" \
-d '{"question": "如何退換貨?"}'
知識(shí)庫搜索測(cè)試:
curl "http://localhost:8080/api/ai/knowledge/search?keyword=退款政策"
關(guān)鍵特性說明
- 多種限流算法: 演示了令牌桶、預(yù)熱、排隊(duì)等待等不同算法的使用場景
- 熱點(diǎn)參數(shù)限流: 基于用戶ID進(jìn)行個(gè)性化限流
- 優(yōu)雅降級(jí): 提供友好的限流提示而非直接拒絕
- 異步處理: 對(duì)于被限流的復(fù)雜請(qǐng)求提供異步處理選項(xiàng)
- 監(jiān)控友好: 集成Sentinel Dashboard進(jìn)行實(shí)時(shí)監(jiān)控
這個(gè)案例展示了如何在實(shí)際的AI應(yīng)用中合理使用Sentinel的各種限流算法,既保護(hù)了系統(tǒng)穩(wěn)定性,也提供了良好的用戶體驗(yàn)。
到此這篇關(guān)于Spring AI框架結(jié)合Sentinel實(shí)現(xiàn)限流功能的文章就介紹到這了,更多相關(guān)Spring AI Sentinel限流內(nèi)容請(qǐng)搜索腳本之家以前的文章或繼續(xù)瀏覽下面的相關(guān)文章希望大家以后多多支持腳本之家!
相關(guān)文章
Java中如何使用?byte?數(shù)組作為?Map?的?key
本文將討論在使用HashMap時(shí),當(dāng)byte數(shù)組作為key時(shí)所遇到的問題及其解決方案,介紹使用String和List這兩種數(shù)據(jù)結(jié)構(gòu)作為臨時(shí)解決方案的方法,感興趣的朋友跟隨小編一起看看吧2023-06-06
詳解Java中IO字節(jié)流基本操作(復(fù)制文件)并測(cè)試性能
這篇文章主要介紹了Java中IO字節(jié)流基本操作(復(fù)制文件)并測(cè)試性能,文中通過示例代碼介紹的非常詳細(xì),對(duì)大家的學(xué)習(xí)或者工作具有一定的參考學(xué)習(xí)價(jià)值,需要的朋友們下面隨著小編來一起學(xué)習(xí)學(xué)習(xí)吧2019-04-04
SpringBoot+MyBatisPlus+MySQL8實(shí)現(xiàn)樹形結(jié)構(gòu)查詢
這篇文章主要為大家詳細(xì)介紹了SpringBoot+MyBatisPlus+MySQL8實(shí)現(xiàn)樹形結(jié)構(gòu)查詢,文中示例代碼介紹的非常詳細(xì),具有一定的參考價(jià)值,感興趣的小伙伴們可以參考一下2021-06-06
Java實(shí)現(xiàn)發(fā)送短信驗(yàn)證碼功能
這篇文章主要為大家詳細(xì)介紹了Java實(shí)現(xiàn)發(fā)送短信驗(yàn)證碼功能,具有一定的參考價(jià)值,感興趣的小伙伴們可以參考一下2017-11-11
Spring使用IOC與DI實(shí)現(xiàn)完全注解開發(fā)
IOC也是Spring的核心之一了,之前學(xué)的時(shí)候是采用xml配置文件的方式去實(shí)現(xiàn)的,后來其中也多少穿插了幾個(gè)注解,但是沒有說完全采用注解實(shí)現(xiàn)。那么這篇文章就和大家分享一下,全部采用注解來實(shí)現(xiàn)IOC + DI2022-09-09
Spring?Boot整合Kafka+SSE實(shí)現(xiàn)實(shí)時(shí)數(shù)據(jù)展示
本文主要介紹了Spring?Boot整合Kafka+SSE實(shí)現(xiàn)實(shí)時(shí)數(shù)據(jù)展示2024-06-06
簡單講解Java的Socket網(wǎng)絡(luò)編程的多播與廣播實(shí)現(xiàn)
這篇文章主要介紹了Java的Socket網(wǎng)絡(luò)編程的多播與廣播實(shí)現(xiàn),包括網(wǎng)絡(luò)編程發(fā)送和接受數(shù)據(jù)的一些基礎(chǔ)知識(shí)整理,需要的朋友可以參考下2016-01-01

