Compare commits

..

2 commits

Author SHA1 Message Date
CDN
2de7a34787
chore: add .gitignore 2025-04-22 14:28:00 +08:00
CDN
68391cf532
feat: chunking + progress bar 2025-04-22 14:27:17 +08:00
7 changed files with 342 additions and 17 deletions

3
.gitignore vendored Normal file
View file

@ -0,0 +1,3 @@
.DS_Store
d10n
d10n.exe

View file

@ -8,16 +8,29 @@ import (
"gopkg.in/yaml.v3" "gopkg.in/yaml.v3"
) )
// ChunkConfig stores configuration for chunked translation
type ChunkConfig struct {
Enabled bool `yaml:"enabled"`
Size int `yaml:"size"`
Prompt string `yaml:"prompt"`
Context int `yaml:"context"`
}
// Config stores the application configuration // Config stores the application configuration
type Config struct { type Config struct {
APIBase string `yaml:"api_base"` APIBase string `yaml:"api_base"`
APIKey string `yaml:"api_key"` APIKey string `yaml:"api_key"`
Model string `yaml:"model"` Model string `yaml:"model"`
SystemPrompt string `yaml:"system_prompt"` SystemPrompt string `yaml:"system_prompt"`
// Concurrency related settings
Concurrency int `yaml:"concurrency"`
// Chunk related settings
Chunk ChunkConfig `yaml:"chunk"`
} }
// Default system prompt as a placeholder // Default system prompt as a placeholder
const DefaultSystemPrompt = "Placeholder" const DefaultSystemPrompt = "Placeholder"
const DefaultChunkPrompt = "Please continue translation"
// LoadConfig loads configuration from ~/.config/d10n.yaml // LoadConfig loads configuration from ~/.config/d10n.yaml
func LoadConfig() (*Config, error) { func LoadConfig() (*Config, error) {
@ -34,6 +47,13 @@ func LoadConfig() (*Config, error) {
if _, err := os.Stat(configPath); os.IsNotExist(err) { if _, err := os.Stat(configPath); os.IsNotExist(err) {
return &Config{ return &Config{
SystemPrompt: DefaultSystemPrompt, SystemPrompt: DefaultSystemPrompt,
Concurrency: 3, // Default concurrency
Chunk: ChunkConfig{
Enabled: false, // Chunking disabled by default
Size: 10240, // Default chunk size in tokens
Prompt: DefaultChunkPrompt, // Default chunk prompt
Context: 2, // Default context size
},
}, nil }, nil
} }
@ -49,10 +69,28 @@ func LoadConfig() (*Config, error) {
return nil, fmt.Errorf("could not parse config file: %w", err) return nil, fmt.Errorf("could not parse config file: %w", err)
} }
// Set default system prompt if not specified // Set default values if not specified
if config.SystemPrompt == "" { if config.SystemPrompt == "" {
config.SystemPrompt = DefaultSystemPrompt config.SystemPrompt = DefaultSystemPrompt
} }
// Set default for concurrency
if config.Concurrency <= 0 {
config.Concurrency = 3
}
// Set defaults for chunk settings
if config.Chunk.Size <= 0 {
config.Chunk.Size = 10240
}
if config.Chunk.Prompt == "" {
config.Chunk.Prompt = DefaultChunkPrompt
}
if config.Chunk.Context <= 0 {
config.Chunk.Context = 2
}
return &config, nil return &config, nil
} }

8
go.mod
View file

@ -3,3 +3,11 @@ module github.com/wholetrans/d10n
go 1.24.1 go 1.24.1
require gopkg.in/yaml.v3 v3.0.1 require gopkg.in/yaml.v3 v3.0.1
require (
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
github.com/rivo/uniseg v0.4.7 // indirect
github.com/schollz/progressbar/v3 v3.18.0 // indirect
golang.org/x/sys v0.29.0 // indirect
golang.org/x/term v0.28.0 // indirect
)

10
go.sum
View file

@ -1,3 +1,13 @@
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ=
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw=
github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
github.com/schollz/progressbar/v3 v3.18.0 h1:uXdoHABRFmNIjUfte/Ex7WtuyVslrw2wVPQmCN62HpA=
github.com/schollz/progressbar/v3 v3.18.0/go.mod h1:IsO3lpbaGuzh8zIMzgY3+J8l4C8GjO0Y9S69eFvNsec=
golang.org/x/sys v0.29.0 h1:TPYlXGxvx1MGTn2GiZDhnjPA9wZzZeGKHHmKhHYvgaU=
golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/term v0.28.0 h1:/Ts8HFuMR2E6IP/jlo7QVLZHggjKQbhu/7H0LJFr3Gg=
golang.org/x/term v0.28.0/go.mod h1:Sw/lC2IAUZ92udQNf3WodGtn4k/XoLyZoh8v/8uiwek=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=

109
main.go
View file

@ -6,7 +6,9 @@ import (
"os" "os"
"path/filepath" "path/filepath"
"strings" "strings"
"sync"
"github.com/schollz/progressbar/v3"
"github.com/wholetrans/d10n/config" "github.com/wholetrans/d10n/config"
"github.com/wholetrans/d10n/translator" "github.com/wholetrans/d10n/translator"
) )
@ -32,6 +34,15 @@ func main() {
systemPromptPtr := flagSet.String("system-prompt", "", "System prompt for the model") systemPromptPtr := flagSet.String("system-prompt", "", "System prompt for the model")
formatPtr := flagSet.String("format", "", "File format to process (e.g., md, txt)") formatPtr := flagSet.String("format", "", "File format to process (e.g., md, txt)")
// Concurrency flag
concurrencyPtr := flagSet.Int("concurrency", 0, "Number of concurrent translation tasks (default is 3)")
// Chunking flags
chunkEnabledPtr := flagSet.Bool("chunk", false, "Enable chunked translation")
chunkSizePtr := flagSet.Int("chunk-size", 0, "Size of each chunk in tokens (default is 10240)")
chunkPromptPtr := flagSet.String("chunk-prompt", "", "Prompt to use for continuing translation (default is 'Please continue translation')")
chunkContextPtr := flagSet.Int("chunk-context", 0, "Number of chunks to include as context (default is 2)")
// Parse flags // Parse flags
if err := flagSet.Parse(os.Args[2:]); err != nil { if err := flagSet.Parse(os.Args[2:]); err != nil {
fmt.Println("Error parsing arguments:", err) fmt.Println("Error parsing arguments:", err)
@ -75,6 +86,28 @@ func main() {
cfg.SystemPrompt = *systemPromptPtr cfg.SystemPrompt = *systemPromptPtr
} }
// Set concurrency options
if *concurrencyPtr > 0 {
cfg.Concurrency = *concurrencyPtr
}
// Set chunking options from command line arguments
if *chunkEnabledPtr {
cfg.Chunk.Enabled = true
}
if *chunkSizePtr > 0 {
cfg.Chunk.Size = *chunkSizePtr
}
if *chunkPromptPtr != "" {
cfg.Chunk.Prompt = *chunkPromptPtr
}
if *chunkContextPtr > 0 {
cfg.Chunk.Context = *chunkContextPtr
}
// Set target path if not provided // Set target path if not provided
targetPath := *targetPathPtr targetPath := *targetPathPtr
if targetPath == "" { if targetPath == "" {
@ -118,6 +151,11 @@ func printUsage() {
fmt.Println(" -api-base <url> API base URL for OpenAI compatible service") fmt.Println(" -api-base <url> API base URL for OpenAI compatible service")
fmt.Println(" -system-prompt <prompt> System prompt for the model") fmt.Println(" -system-prompt <prompt> System prompt for the model")
fmt.Println(" -format <ext> File format to process (e.g., md, txt)") fmt.Println(" -format <ext> File format to process (e.g., md, txt)")
fmt.Println(" -concurrency <num> Number of concurrent translation tasks (default: 3)")
fmt.Println(" -chunk Enable chunked translation")
fmt.Println(" -chunk-size <tokens> Size of each chunk in tokens (default: 10240)")
fmt.Println(" -chunk-prompt <prompt> Prompt for continuing translation (default: 'Please continue translation')")
fmt.Println(" -chunk-context <num> Number of chunks to include as context (default: 2)")
} }
func processFile(sourcePath, targetPath, sourceLanguage, targetLanguage string, trans *translator.Translator) error { func processFile(sourcePath, targetPath, sourceLanguage, targetLanguage string, trans *translator.Translator) error {
@ -126,7 +164,7 @@ func processFile(sourcePath, targetPath, sourceLanguage, targetLanguage string,
return fmt.Errorf("error reading file %s: %w", sourcePath, err) return fmt.Errorf("error reading file %s: %w", sourcePath, err)
} }
translatedContent, err := trans.Translate(string(content), sourceLanguage, targetLanguage) translatedContent, err := trans.Translate(string(content), sourceLanguage, targetLanguage, sourcePath)
if err != nil { if err != nil {
return fmt.Errorf("error translating file %s: %w", sourcePath, err) return fmt.Errorf("error translating file %s: %w", sourcePath, err)
} }
@ -142,7 +180,9 @@ func processFile(sourcePath, targetPath, sourceLanguage, targetLanguage string,
return fmt.Errorf("error writing to file %s: %w", targetPath, err) return fmt.Errorf("error writing to file %s: %w", targetPath, err)
} }
fmt.Printf("Translated %s to %s\n", sourcePath, targetPath) // Remove the progress bar for this file
trans.RemoveProgressBar(sourcePath)
return nil return nil
} }
@ -185,7 +225,29 @@ func processDirectory(sourcePath, targetPath, sourceLanguage, targetLanguage, fo
} }
fmt.Println() fmt.Println()
// Process each matched file with progress updates // Create overall progress bar
overallBar := progressbar.NewOptions(len(matchedFiles),
progressbar.OptionSetDescription("[Overall Progress]"),
progressbar.OptionShowCount(),
progressbar.OptionSetTheme(progressbar.Theme{
Saucer: "#",
SaucerHead: ">",
SaucerPadding: "-",
BarStart: "[",
BarEnd: "]",
}),
)
// Set up a worker pool for concurrent processing
var (
wg sync.WaitGroup
mutex sync.Mutex
errors []error
concurrency = trans.GetConcurrency()
semaphore = make(chan struct{}, concurrency)
)
// Process files concurrently
for i, path := range matchedFiles { for i, path := range matchedFiles {
// Compute relative path from source // Compute relative path from source
relPath, err := filepath.Rel(sourcePath, path) relPath, err := filepath.Rel(sourcePath, path)
@ -202,14 +264,39 @@ func processDirectory(sourcePath, targetPath, sourceLanguage, targetLanguage, fo
return fmt.Errorf("error creating target directory %s: %w", targetFileDir, err) return fmt.Errorf("error creating target directory %s: %w", targetFileDir, err)
} }
// Display progress // Process individual file concurrently
fmt.Printf("[%d/%d] Translating: %s\n", i+1, len(matchedFiles), relPath) wg.Add(1)
go func(idx int, sourcePath, targetPath, relPath string) {
// Process individual file defer wg.Done()
err = processFile(path, targetFilePath, sourceLanguage, targetLanguage, trans)
if err != nil { // Acquire semaphore (limit concurrency)
return err semaphore <- struct{}{}
} defer func() { <-semaphore }()
// Process the file
err := processFile(sourcePath, targetPath, sourceLanguage, targetLanguage, trans)
// Update overall progress bar
mutex.Lock()
overallBar.Add(1)
if err != nil {
errors = append(errors, err)
fmt.Printf("Error translating %s: %v\n", relPath, err)
}
mutex.Unlock()
}(i+1, path, targetFilePath, relPath)
}
// Wait for all translations to complete
wg.Wait()
// Complete the overall progress bar
overallBar.Finish()
fmt.Println() // Add some spacing after progress bars
// Check if any errors occurred
if len(errors) > 0 {
return fmt.Errorf("encountered %d errors during translation", len(errors))
} }
return nil return nil

View file

@ -5,3 +5,13 @@ api_base: "https://api.openai.com" # OpenAI-compatible API base URL
api_key: "your-api-key-here" # API key for the service api_key: "your-api-key-here" # API key for the service
model: "gpt-4o" # Model to use for translation model: "gpt-4o" # Model to use for translation
system_prompt: "You are a professional translator. You are translating from $SOURCE_LANG to $TARGET_LANG. Maintain the original formatting and structure of the text while translating it accurately." # Custom system prompt with variables system_prompt: "You are a professional translator. You are translating from $SOURCE_LANG to $TARGET_LANG. Maintain the original formatting and structure of the text while translating it accurately." # Custom system prompt with variables
# Concurrency settings
concurrency: 3 # Number of concurrent translation tasks (default: 3)
# Chunked translation settings
chunk:
enabled: false # Whether to enable chunked translation (default: false)
size: 10240 # Size of each chunk in tokens (default: 10240)
prompt: "Please continue translation" # Prompt to use for continuing translation
context: 2 # Number of chunks to include as context (default: 2)

View file

@ -4,15 +4,21 @@ import (
"bytes" "bytes"
"encoding/json" "encoding/json"
"fmt" "fmt"
"math"
"net/http" "net/http"
"strings" "strings"
"sync"
"unicode/utf8"
"github.com/schollz/progressbar/v3"
"github.com/wholetrans/d10n/config" "github.com/wholetrans/d10n/config"
) )
// Translator handles communication with the OpenAI API // Translator handles communication with the OpenAI API
type Translator struct { type Translator struct {
config *config.Config config *config.Config
mutex sync.Mutex
progressBars map[string]*progressbar.ProgressBar
} }
// Message represents a message in the OpenAI chat format // Message represents a message in the OpenAI chat format
@ -43,11 +49,38 @@ type ChatResponse struct {
func NewTranslator(cfg *config.Config) *Translator { func NewTranslator(cfg *config.Config) *Translator {
return &Translator{ return &Translator{
config: cfg, config: cfg,
progressBars: make(map[string]*progressbar.ProgressBar),
} }
} }
// GetConcurrency returns the configured concurrency level
func (t *Translator) GetConcurrency() int {
return t.config.Concurrency
}
// Translate translates content from sourceLanguage to targetLanguage // Translate translates content from sourceLanguage to targetLanguage
func (t *Translator) Translate(content, sourceLanguage, targetLanguage string) (string, error) { func (t *Translator) Translate(content, sourceLanguage, targetLanguage string, filePath string) (string, error) {
// If chunking is not enabled, translate the whole content at once
if !t.config.Chunk.Enabled {
return t.translateSingle(content, sourceLanguage, targetLanguage)
}
// Split content into chunks
chunks, err := t.splitIntoChunks(content)
if err != nil {
return "", fmt.Errorf("error splitting content into chunks: %w", err)
}
// If there's only one chunk, translate it directly
if len(chunks) == 1 {
return t.translateSingle(chunks[0], sourceLanguage, targetLanguage)
}
return t.translateChunks(chunks, sourceLanguage, targetLanguage, filePath)
}
// translateSingle translates a single content block
func (t *Translator) translateSingle(content, sourceLanguage, targetLanguage string) (string, error) {
messages := []Message{ messages := []Message{
{ {
Role: "system", Role: "system",
@ -59,6 +92,108 @@ func (t *Translator) Translate(content, sourceLanguage, targetLanguage string) (
}, },
} }
return t.sendChatRequest(messages)
}
// translateChunks translates content in chunks with context
func (t *Translator) translateChunks(chunks []string, sourceLanguage, targetLanguage, filePath string) (string, error) {
systemPrompt := t.getSystemPrompt(sourceLanguage, targetLanguage)
var translations []string
contextStart := 0
// Create a progress bar for this file if it doesn't exist
t.mutex.Lock()
progressBarID := filePath
if _, exists := t.progressBars[progressBarID]; !exists {
t.progressBars[progressBarID] = progressbar.NewOptions(len(chunks),
progressbar.OptionSetDescription(fmt.Sprintf("[Chunks: %s]", filePath)),
progressbar.OptionShowCount(),
progressbar.OptionSetTheme(progressbar.Theme{
Saucer: "=",
SaucerHead: ">",
SaucerPadding: " ",
BarStart: "[",
BarEnd: "]",
}),
)
}
progressBar := t.progressBars[progressBarID]
t.mutex.Unlock()
// Process each chunk
for i, chunk := range chunks {
// Build messages with context
messages := []Message{
{
Role: "system",
Content: systemPrompt,
},
}
// Add context chunks and their translations
for j := contextStart; j < i; j++ {
if j >= 0 && j < len(chunks) {
messages = append(messages, Message{
Role: "user",
Content: chunks[j],
})
if j-contextStart < len(translations) {
messages = append(messages, Message{
Role: "assistant",
Content: translations[j-contextStart],
})
}
}
}
// Add current chunk to translate
messages = append(messages, Message{
Role: "user",
Content: chunk,
})
// For chunks after the first one, add the continuation prompt
if i > 0 {
messages = append(messages, Message{
Role: "user",
Content: t.config.Chunk.Prompt,
})
}
// Send request
translation, err := t.sendChatRequest(messages)
if err != nil {
return "", fmt.Errorf("error translating chunk %d: %w", i+1, err)
}
translations = append(translations, translation)
// Update progress bar
progressBar.Add(1)
// Slide context window if needed
if i >= t.config.Chunk.Context {
contextStart++
}
}
// Finish and clear the progress bar
progressBar.Finish()
// Combine translations
return strings.Join(translations, "\n"), nil
}
// RemoveProgressBar removes a progress bar by its ID
func (t *Translator) RemoveProgressBar(id string) {
t.mutex.Lock()
defer t.mutex.Unlock()
delete(t.progressBars, id)
}
// sendChatRequest sends a chat completion request to the OpenAI API
func (t *Translator) sendChatRequest(messages []Message) (string, error) {
// Create the API request // Create the API request
requestBody := ChatRequest{ requestBody := ChatRequest{
Model: t.config.Model, Model: t.config.Model,
@ -106,6 +241,40 @@ func (t *Translator) Translate(content, sourceLanguage, targetLanguage string) (
return responseBody.Choices[0].Message.Content, nil return responseBody.Choices[0].Message.Content, nil
} }
// splitIntoChunks splits content into chunks based on token size
func (t *Translator) splitIntoChunks(content string) ([]string, error) {
lines := strings.Split(content, "\n")
var chunks []string
var currentChunk strings.Builder
currentTokens := 0
for _, line := range lines {
// Estimate tokens in the line (approximation: ~4 characters per token)
lineTokens := int(math.Ceil(float64(utf8.RuneCountInString(line)) / 4.0))
// If adding this line would exceed chunk size, start a new chunk
if currentTokens > 0 && currentTokens+lineTokens > t.config.Chunk.Size {
chunks = append(chunks, currentChunk.String())
currentChunk.Reset()
currentTokens = 0
}
// Add line to current chunk
if currentChunk.Len() > 0 {
currentChunk.WriteString("\n")
}
currentChunk.WriteString(line)
currentTokens += lineTokens
}
// Add the last chunk if it's not empty
if currentChunk.Len() > 0 {
chunks = append(chunks, currentChunk.String())
}
return chunks, nil
}
// getSystemPrompt constructs the system prompt for translation // getSystemPrompt constructs the system prompt for translation
func (t *Translator) getSystemPrompt(sourceLanguage, targetLanguage string) string { func (t *Translator) getSystemPrompt(sourceLanguage, targetLanguage string) string {
basePrompt := t.config.SystemPrompt basePrompt := t.config.SystemPrompt