调用本地大模型初体验

OllamaModelController

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50

import jakarta.annotation.Resource;
import lombok.extern.slf4j.Slf4j;
import org.springframework.ai.chat.model.ChatResponse;
import org.springframework.ai.chat.prompt.Prompt;
import org.springframework.ai.ollama.OllamaChatModel;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import reactor.core.publisher.Flux;

@Slf4j
@RestController
@RequestMapping("/base")
public class OllamaModelController {

@Resource
private OllamaChatModel ollamaChatModel;

/**
* 直接输出全部
*
* @return
*/
@RequestMapping(value = "/chat/simple")
public String simpleChat() {
return ollamaChatModel.call("你是谁");
}

/**
* 流式输出
*
* @return
*/
@RequestMapping(value = "/chat/stream", produces = "text/stream;charset=UTF-8")
public Flux<String> chatStream() {

return ollamaChatModel.stream("你是谁");
}

/**
* 流式输出
*
* @return
*/
@RequestMapping(value = "/chat/stream2", produces = "text/stream;charset=UTF-8")
public Flux<String> chatStream2() {
Flux<ChatResponse> responseEntityFlux = ollamaChatModel.stream(new Prompt("武汉今天的天气?"));
return responseEntityFlux.map(item -> item.getResult().getOutput().getText());
}
}

关闭thinking

目前springai 没有关闭think的参数,后续肯定会有,ollama是在25年0.9的版本后才支持的。

直接调用ollama接口通过think参数来控制

1
2
3
4
5
6
7
{

"prompt":"你好,你是谁",
"model":"qwen3:8b",
"stream":true,
"think":false
}