1
0
Files
easy_rag/internal/pkg/textprocessor/textprocessor.go

51 lines
1.2 KiB
Go

package textprocessor
import (
"bytes"
"strings"
"github.com/jonathanhecl/chunker"
)
func CreateChunks(text string) []string {
// Maximum characters per chunk
const maxCharacters = 5000 // too slow otherwise
var chunks []string
var currentChunk strings.Builder
// Use the chunker library to split text into sentences
sentences := chunker.ChunkSentences(text)
for _, sentence := range sentences {
// Check if adding the sentence exceeds the character limit
if currentChunk.Len()+len(sentence) <= maxCharacters {
if currentChunk.Len() > 0 {
currentChunk.WriteString(" ") // Add a space between sentences
}
currentChunk.WriteString(sentence)
} else {
// Add the completed chunk to the chunks slice
chunks = append(chunks, currentChunk.String())
currentChunk.Reset() // Start a new chunk
currentChunk.WriteString(sentence) // Add the sentence to the new chunk
}
}
// Add the last chunk if it has content
if currentChunk.Len() > 0 {
chunks = append(chunks, currentChunk.String())
}
// Return the chunks
return chunks
}
func ConcatenateStrings(strings []string) string {
var result bytes.Buffer
for _, str := range strings {
result.WriteString(str)
}
return result.String()
}