262 lines
5.5 KiB
Go
262 lines
5.5 KiB
Go
package prompt
|
|
|
|
import (
|
|
"archive/zip"
|
|
"bytes"
|
|
"context"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"strings"
|
|
"time"
|
|
|
|
"prompts-core/common/util"
|
|
"prompts-core/service/gateway"
|
|
|
|
"github.com/gogf/gf/v2/frame/g"
|
|
)
|
|
|
|
// FetchFileTexts 从 URL 列表获取文件内容,支持 zip 内文件
|
|
func FetchFileTexts(ctx context.Context, urls []string) map[string]string {
|
|
result := make(map[string]string)
|
|
|
|
if len(urls) == 0 {
|
|
return result
|
|
}
|
|
|
|
client := &http.Client{
|
|
Timeout: time.Duration(g.Cfg().MustGet(ctx, "userFiles.httpTimeoutSec", 8).Int()) * time.Second,
|
|
}
|
|
|
|
for _, rawURL := range urls {
|
|
url := util.SanitizeURL(rawURL)
|
|
if url == "" {
|
|
continue
|
|
}
|
|
|
|
if util.IsBannedExtension(url) {
|
|
continue
|
|
}
|
|
|
|
if util.IsZipExtension(url) {
|
|
zipTexts := fetchZipFileTexts(ctx, client, url)
|
|
for k, v := range zipTexts {
|
|
result[k] = v
|
|
}
|
|
continue
|
|
}
|
|
|
|
text, err := fetchFileContent(ctx, client, url)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
|
|
if text == "" {
|
|
continue
|
|
}
|
|
|
|
text = util.CleanSymbols(text)
|
|
result[url] = text
|
|
}
|
|
|
|
return result
|
|
}
|
|
|
|
// fetchZipFileTexts 下载并解压 zip 文件,提取可读文本内容
|
|
func fetchZipFileTexts(ctx context.Context, client *http.Client, url string) map[string]string {
|
|
result := make(map[string]string)
|
|
|
|
zipBytes, err := downloadFile(client, url,
|
|
int64(g.Cfg().MustGet(ctx, "userFiles.zipMaxSizeMB", 10).Int())*1024*1024,
|
|
)
|
|
if err != nil {
|
|
return result
|
|
}
|
|
|
|
reader, err := zip.NewReader(bytes.NewReader(zipBytes), int64(len(zipBytes)))
|
|
if err != nil {
|
|
return result
|
|
}
|
|
|
|
entryMaxSize := int64(g.Cfg().MustGet(ctx, "userFiles.zipEntryMaxSizeKB", 500).Int()) * 1024
|
|
|
|
for _, file := range reader.File {
|
|
if file.FileInfo().IsDir() {
|
|
continue
|
|
}
|
|
|
|
fileName := file.Name
|
|
|
|
if util.IsBannedExtension(fileName) {
|
|
continue
|
|
}
|
|
|
|
if util.IsZipExtension(fileName) {
|
|
continue
|
|
}
|
|
|
|
rc, err := file.Open()
|
|
if err != nil {
|
|
continue
|
|
}
|
|
|
|
content, err := io.ReadAll(io.LimitReader(rc, entryMaxSize))
|
|
rc.Close()
|
|
if err != nil {
|
|
continue
|
|
}
|
|
|
|
contentType := http.DetectContentType(content)
|
|
if !util.IsReadableContentType(contentType) {
|
|
continue
|
|
}
|
|
|
|
text := util.CleanSymbols(string(content))
|
|
if text == "" {
|
|
continue
|
|
}
|
|
|
|
key := url + "::" + fileName
|
|
result[key] = text
|
|
}
|
|
|
|
return result
|
|
}
|
|
|
|
// downloadFile 下载文件,限制最大大小
|
|
func downloadFile(client *http.Client, url string, maxSize int64) ([]byte, error) {
|
|
req, err := http.NewRequest(http.MethodGet, url, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
resp, err := client.Do(req)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
|
return nil, fmt.Errorf("HTTP %d", resp.StatusCode)
|
|
}
|
|
|
|
return io.ReadAll(io.LimitReader(resp.Body, maxSize))
|
|
}
|
|
|
|
// fetchFileContent 获取单个文本文件内容
|
|
func fetchFileContent(ctx context.Context, client *http.Client, url string) (string, error) {
|
|
req, err := http.NewRequest(http.MethodGet, url, nil)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
resp, err := client.Do(req)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
|
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
|
|
}
|
|
|
|
contentType := resp.Header.Get("Content-Type")
|
|
if !util.IsReadableContentType(contentType) {
|
|
return "", fmt.Errorf("unreadable content-type: %s", contentType)
|
|
}
|
|
|
|
body, err := io.ReadAll(
|
|
io.LimitReader(resp.Body,
|
|
int64(g.Cfg().MustGet(ctx, "userFiles.textFileMaxSizeKB", 500).Int())*1024,
|
|
),
|
|
)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
return strings.TrimSpace(string(body)), nil
|
|
}
|
|
|
|
// SkillMdContent 根据 skillName 获取 zip 内所有 md 文件拼接内容
|
|
func SkillMdContent(ctx context.Context, skillName string) string {
|
|
skillResp, err := gateway.GetSkillUser(ctx, skillName)
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
|
|
fullUrl := skillResp.ImgAddressPrefix + skillResp.FileUrl
|
|
|
|
client := &http.Client{
|
|
Timeout: time.Duration(g.Cfg().MustGet(ctx, "skillFiles.httpTimeoutSec", 30).Int()) * time.Second,
|
|
}
|
|
|
|
zipBytes, err := downloadFile(client, fullUrl,
|
|
int64(g.Cfg().MustGet(ctx, "skillFiles.zipMaxSizeMB", 10).Int())*1024*1024,
|
|
)
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
|
|
mdContents, err := extractMdFiles(ctx, zipBytes)
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
|
|
if len(mdContents) == 0 {
|
|
return ""
|
|
}
|
|
|
|
var builder strings.Builder
|
|
builder.WriteString(fmt.Sprintf("# Skill: %s\n\n", skillResp.Name))
|
|
if skillResp.Description != "" {
|
|
builder.WriteString(fmt.Sprintf("> %s\n\n", skillResp.Description))
|
|
}
|
|
|
|
for fileName, content := range mdContents {
|
|
builder.WriteString(fmt.Sprintf("## %s\n\n", fileName))
|
|
builder.WriteString(content)
|
|
builder.WriteString("\n\n---\n\n")
|
|
}
|
|
|
|
return strings.TrimSpace(builder.String())
|
|
}
|
|
|
|
// extractMdFiles 解压 zip 并提取所有 .md 文件内容
|
|
func extractMdFiles(ctx context.Context, zipBytes []byte) (map[string]string, error) {
|
|
result := make(map[string]string)
|
|
|
|
reader, err := zip.NewReader(bytes.NewReader(zipBytes), int64(len(zipBytes)))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
entryMaxSize := int64(g.Cfg().MustGet(ctx, "skillFiles.mdMaxSizeKB", 500).Int()) * 1024
|
|
|
|
for _, file := range reader.File {
|
|
if file.FileInfo().IsDir() {
|
|
continue
|
|
}
|
|
|
|
if !strings.HasSuffix(strings.ToLower(file.Name), ".md") {
|
|
continue
|
|
}
|
|
|
|
rc, err := file.Open()
|
|
if err != nil {
|
|
continue
|
|
}
|
|
|
|
content, err := io.ReadAll(io.LimitReader(rc, entryMaxSize))
|
|
rc.Close()
|
|
if err != nil {
|
|
continue
|
|
}
|
|
|
|
if len(content) > 0 {
|
|
result[file.Name] = strings.TrimSpace(string(content))
|
|
}
|
|
}
|
|
|
|
return result, nil
|
|
}
|