sub-cli/internal/format/srt/srt.go
2025-04-23 10:44:08 +08:00

242 lines
6.5 KiB
Go

package srt
import (
"bufio"
"fmt"
"os"
"strconv"
"strings"
"time"
"sub-cli/internal/model"
)
// Parse parses an SRT file and returns a slice of SRTEntries
func Parse(filePath string) ([]model.SRTEntry, error) {
file, err := os.Open(filePath)
if err != nil {
return nil, err
}
defer file.Close()
scanner := bufio.NewScanner(file)
var entries []model.SRTEntry
var currentEntry model.SRTEntry
var isContent bool
var contentBuffer strings.Builder
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if line == "" {
if currentEntry.Number != 0 {
currentEntry.Content = contentBuffer.String()
entries = append(entries, currentEntry)
currentEntry = model.SRTEntry{}
isContent = false
contentBuffer.Reset()
}
continue
}
if currentEntry.Number == 0 {
currentEntry.Number, _ = strconv.Atoi(line)
} else if isEntryTimeStampUnset(currentEntry) {
times := strings.Split(line, " --> ")
if len(times) == 2 {
currentEntry.StartTime = parseSRTTimestamp(times[0])
currentEntry.EndTime = parseSRTTimestamp(times[1])
isContent = true
}
} else if isContent {
if contentBuffer.Len() > 0 {
contentBuffer.WriteString("\n")
}
contentBuffer.WriteString(line)
}
}
// Don't forget the last entry
if currentEntry.Number != 0 && contentBuffer.Len() > 0 {
currentEntry.Content = contentBuffer.String()
entries = append(entries, currentEntry)
}
if err := scanner.Err(); err != nil {
return nil, err
}
return entries, nil
}
// isEntryTimeStampUnset checks if timestamp is unset
func isEntryTimeStampUnset(entry model.SRTEntry) bool {
return entry.StartTime.Hours == 0 &&
entry.StartTime.Minutes == 0 &&
entry.StartTime.Seconds == 0 &&
entry.StartTime.Milliseconds == 0
}
// parseSRTTimestamp parses an SRT timestamp string into a Timestamp struct
func parseSRTTimestamp(timeStr string) model.Timestamp {
timeStr = strings.Replace(timeStr, ",", ".", 1)
format := "15:04:05.000"
t, err := time.Parse(format, timeStr)
if err != nil {
return model.Timestamp{}
}
return model.Timestamp{
Hours: t.Hour(),
Minutes: t.Minute(),
Seconds: t.Second(),
Milliseconds: t.Nanosecond() / 1000000,
}
}
// Generate generates an SRT file from a slice of SRTEntries
func Generate(entries []model.SRTEntry, filePath string) error {
file, err := os.Create(filePath)
if err != nil {
return err
}
defer file.Close()
for _, entry := range entries {
fmt.Fprintf(file, "%d\n", entry.Number)
fmt.Fprintf(file, "%s --> %s\n",
formatSRTTimestamp(entry.StartTime),
formatSRTTimestamp(entry.EndTime))
fmt.Fprintf(file, "%s\n\n", entry.Content)
}
return nil
}
// formatSRTTimestamp formats a Timestamp struct as an SRT timestamp string
func formatSRTTimestamp(ts model.Timestamp) string {
return fmt.Sprintf("%02d:%02d:%02d,%03d",
ts.Hours,
ts.Minutes,
ts.Seconds,
ts.Milliseconds)
}
// Format standardizes and formats an SRT file
func Format(filePath string) error {
// Parse the file
entries, err := Parse(filePath)
if err != nil {
return fmt.Errorf("error parsing SRT file: %w", err)
}
// Standardize entry numbering and ensure consistent formatting
for i := range entries {
entries[i].Number = i + 1 // Ensure sequential numbering
}
// Write back the formatted content
return Generate(entries, filePath)
}
// ConvertToLyrics converts SRT entries to a Lyrics structure
func ConvertToLyrics(entries []model.SRTEntry) model.Lyrics {
lyrics := model.Lyrics{
Metadata: make(map[string]string),
}
for _, entry := range entries {
lyrics.Timeline = append(lyrics.Timeline, entry.StartTime)
lyrics.Content = append(lyrics.Content, entry.Content)
}
return lyrics
}
// ConvertToSubtitle converts SRT entries to our intermediate Subtitle structure
func ConvertToSubtitle(filePath string) (model.Subtitle, error) {
entries, err := Parse(filePath)
if err != nil {
return model.Subtitle{}, fmt.Errorf("error parsing SRT file: %w", err)
}
subtitle := model.NewSubtitle()
subtitle.Format = "srt"
// Convert SRT entries to intermediate representation
for _, entry := range entries {
subtitleEntry := model.NewSubtitleEntry()
subtitleEntry.Index = entry.Number
subtitleEntry.StartTime = entry.StartTime
subtitleEntry.EndTime = entry.EndTime
subtitleEntry.Text = entry.Content
// Look for HTML styling tags and store information about them
if strings.Contains(entry.Content, "<") && strings.Contains(entry.Content, ">") {
// Extract and store HTML styling info
if strings.Contains(entry.Content, "<i>") || strings.Contains(entry.Content, "<I>") {
subtitleEntry.Styles["italic"] = "true"
}
if strings.Contains(entry.Content, "<b>") || strings.Contains(entry.Content, "<B>") {
subtitleEntry.Styles["bold"] = "true"
}
if strings.Contains(entry.Content, "<u>") || strings.Contains(entry.Content, "<U>") {
subtitleEntry.Styles["underline"] = "true"
}
subtitleEntry.FormatData["has_html_tags"] = true
}
subtitle.Entries = append(subtitle.Entries, subtitleEntry)
}
return subtitle, nil
}
// ConvertFromSubtitle converts our intermediate Subtitle representation to SRT format
func ConvertFromSubtitle(subtitle model.Subtitle, filePath string) error {
var entries []model.SRTEntry
// Convert intermediate representation to SRT entries
for i, subtitleEntry := range subtitle.Entries {
entry := model.SRTEntry{
Number: i + 1, // Ensure sequential numbering
StartTime: subtitleEntry.StartTime,
EndTime: subtitleEntry.EndTime,
Content: subtitleEntry.Text,
}
// Use index from original entry if available
if subtitleEntry.Index > 0 {
entry.Number = subtitleEntry.Index
}
// Apply any styling stored in the entry if needed
// Note: SRT only supports basic HTML tags, so we convert style attributes back to HTML
content := entry.Content
if _, ok := subtitleEntry.Styles["italic"]; ok && subtitleEntry.Styles["italic"] == "true" {
if !strings.Contains(content, "<i>") {
content = "<i>" + content + "</i>"
}
}
if _, ok := subtitleEntry.Styles["bold"]; ok && subtitleEntry.Styles["bold"] == "true" {
if !strings.Contains(content, "<b>") {
content = "<b>" + content + "</b>"
}
}
if _, ok := subtitleEntry.Styles["underline"]; ok && subtitleEntry.Styles["underline"] == "true" {
if !strings.Contains(content, "<u>") {
content = "<u>" + content + "</u>"
}
}
// Only update content if we applied styling
if content != entry.Content {
entry.Content = content
}
entries = append(entries, entry)
}
return Generate(entries, filePath)
}