feat: vtt converting
This commit is contained in:
parent
ba2e477dc0
commit
ba66894e42
7 changed files with 693 additions and 107 deletions
|
@ -25,4 +25,5 @@ const ConvertUsage = `Usage: sub-cli convert <source> <target>
|
|||
Target format is determined by file extension. Supported formats:
|
||||
.txt Plain text format (No meta/timeline tags, only support as target format)
|
||||
.srt SubRip Subtitle format
|
||||
.lrc LRC format`
|
||||
.lrc LRC format
|
||||
.vtt WebVTT format`
|
||||
|
|
|
@ -3,12 +3,13 @@ package converter
|
|||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"sub-cli/internal/format/lrc"
|
||||
"sub-cli/internal/format/srt"
|
||||
"sub-cli/internal/format/txt"
|
||||
"sub-cli/internal/format/vtt"
|
||||
"sub-cli/internal/model"
|
||||
)
|
||||
|
||||
|
@ -20,118 +21,47 @@ func Convert(sourceFile, targetFile string) error {
|
|||
sourceFmt := strings.TrimPrefix(filepath.Ext(sourceFile), ".")
|
||||
targetFmt := strings.TrimPrefix(filepath.Ext(targetFile), ".")
|
||||
|
||||
switch sourceFmt {
|
||||
// TXT only supports being a target format
|
||||
if sourceFmt == "txt" {
|
||||
return fmt.Errorf("%w: txt is only supported as a target format", ErrUnsupportedFormat)
|
||||
}
|
||||
|
||||
// Convert source to intermediate representation
|
||||
subtitle, err := convertToIntermediate(sourceFile, sourceFmt)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Convert from intermediate representation to target format
|
||||
return convertFromIntermediate(subtitle, targetFile, targetFmt)
|
||||
}
|
||||
|
||||
// convertToIntermediate converts a source file to our intermediate Subtitle representation
|
||||
func convertToIntermediate(sourceFile, sourceFormat string) (model.Subtitle, error) {
|
||||
switch sourceFormat {
|
||||
case "lrc":
|
||||
return convertFromLRC(sourceFile, targetFile, targetFmt)
|
||||
return lrc.ConvertToSubtitle(sourceFile)
|
||||
case "srt":
|
||||
return convertFromSRT(sourceFile, targetFile, targetFmt)
|
||||
return srt.ConvertToSubtitle(sourceFile)
|
||||
case "vtt":
|
||||
return vtt.ConvertToSubtitle(sourceFile)
|
||||
default:
|
||||
return fmt.Errorf("%w: %s", ErrUnsupportedFormat, sourceFmt)
|
||||
return model.Subtitle{}, fmt.Errorf("%w: %s", ErrUnsupportedFormat, sourceFormat)
|
||||
}
|
||||
}
|
||||
|
||||
// convertFromLRC converts an LRC file to another format
|
||||
func convertFromLRC(sourceFile, targetFile, targetFmt string) error {
|
||||
sourceLyrics, err := lrc.Parse(sourceFile)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error parsing source LRC file: %w", err)
|
||||
}
|
||||
|
||||
switch targetFmt {
|
||||
// convertFromIntermediate converts our intermediate Subtitle representation to a target format
|
||||
func convertFromIntermediate(subtitle model.Subtitle, targetFile, targetFormat string) error {
|
||||
switch targetFormat {
|
||||
case "lrc":
|
||||
return lrc.ConvertFromSubtitle(subtitle, targetFile)
|
||||
case "srt":
|
||||
return srt.ConvertFromSubtitle(subtitle, targetFile)
|
||||
case "vtt":
|
||||
return vtt.ConvertFromSubtitle(subtitle, targetFile)
|
||||
case "txt":
|
||||
return lrcToTxt(sourceLyrics, targetFile)
|
||||
case "srt":
|
||||
return lrcToSRT(sourceLyrics, targetFile)
|
||||
case "lrc":
|
||||
return lrc.Generate(sourceLyrics, targetFile)
|
||||
return txt.GenerateFromSubtitle(subtitle, targetFile)
|
||||
default:
|
||||
return fmt.Errorf("%w: %s", ErrUnsupportedFormat, targetFmt)
|
||||
return fmt.Errorf("%w: %s", ErrUnsupportedFormat, targetFormat)
|
||||
}
|
||||
}
|
||||
|
||||
// convertFromSRT converts an SRT file to another format
|
||||
func convertFromSRT(sourceFile, targetFile, targetFmt string) error {
|
||||
entries, err := srt.Parse(sourceFile)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error parsing source SRT file: %w", err)
|
||||
}
|
||||
|
||||
switch targetFmt {
|
||||
case "txt":
|
||||
return srtToTxt(entries, targetFile)
|
||||
case "lrc":
|
||||
lyrics := srt.ConvertToLyrics(entries)
|
||||
return lrc.Generate(lyrics, targetFile)
|
||||
case "srt":
|
||||
return srt.Generate(entries, targetFile)
|
||||
default:
|
||||
return fmt.Errorf("%w: %s", ErrUnsupportedFormat, targetFmt)
|
||||
}
|
||||
}
|
||||
|
||||
// lrcToTxt converts LRC lyrics to a plain text file
|
||||
func lrcToTxt(lyrics model.Lyrics, targetFile string) error {
|
||||
file, err := os.Create(targetFile)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error creating target file: %w", err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
for _, content := range lyrics.Content {
|
||||
if _, err := fmt.Fprintln(file, content); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// lrcToSRT converts LRC lyrics to an SRT file
|
||||
func lrcToSRT(lyrics model.Lyrics, targetFile string) error {
|
||||
var entries []model.SRTEntry
|
||||
|
||||
for i, content := range lyrics.Content {
|
||||
if i >= len(lyrics.Timeline) {
|
||||
break
|
||||
}
|
||||
|
||||
startTime := lyrics.Timeline[i]
|
||||
endTime := startTime
|
||||
|
||||
// If there's a next timeline entry, use it for end time
|
||||
// Otherwise add a few seconds to the start time
|
||||
if i+1 < len(lyrics.Timeline) {
|
||||
endTime = lyrics.Timeline[i+1]
|
||||
} else {
|
||||
endTime.Seconds += 3
|
||||
}
|
||||
|
||||
entry := model.SRTEntry{
|
||||
Number: i + 1,
|
||||
StartTime: startTime,
|
||||
EndTime: endTime,
|
||||
Content: content,
|
||||
}
|
||||
|
||||
entries = append(entries, entry)
|
||||
}
|
||||
|
||||
return srt.Generate(entries, targetFile)
|
||||
}
|
||||
|
||||
// srtToTxt converts SRT entries to a plain text file
|
||||
func srtToTxt(entries []model.SRTEntry, targetFile string) error {
|
||||
file, err := os.Create(targetFile)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error creating target file: %w", err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
for _, entry := range entries {
|
||||
if _, err := fmt.Fprintln(file, entry.Content); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -180,3 +180,87 @@ func Format(filePath string) error {
|
|||
|
||||
return Generate(lyrics, filePath)
|
||||
}
|
||||
|
||||
// ConvertToSubtitle converts LRC file to our intermediate Subtitle representation
|
||||
func ConvertToSubtitle(filePath string) (model.Subtitle, error) {
|
||||
lyrics, err := Parse(filePath)
|
||||
if err != nil {
|
||||
return model.Subtitle{}, err
|
||||
}
|
||||
|
||||
subtitle := model.NewSubtitle()
|
||||
subtitle.Format = "lrc"
|
||||
|
||||
// Copy metadata
|
||||
for key, value := range lyrics.Metadata {
|
||||
subtitle.Metadata[key] = value
|
||||
}
|
||||
|
||||
// Check for specific LRC metadata we should use for title
|
||||
if title, ok := lyrics.Metadata["ti"]; ok {
|
||||
subtitle.Title = title
|
||||
}
|
||||
|
||||
// Create entries from timeline and content
|
||||
for i, content := range lyrics.Content {
|
||||
if i >= len(lyrics.Timeline) {
|
||||
break
|
||||
}
|
||||
|
||||
entry := model.NewSubtitleEntry()
|
||||
entry.Index = i + 1
|
||||
entry.StartTime = lyrics.Timeline[i]
|
||||
|
||||
// Set end time based on next timeline entry if available, otherwise add a few seconds
|
||||
if i+1 < len(lyrics.Timeline) {
|
||||
entry.EndTime = lyrics.Timeline[i+1]
|
||||
} else {
|
||||
// Default end time: start time + 3 seconds
|
||||
entry.EndTime = model.Timestamp{
|
||||
Hours: entry.StartTime.Hours,
|
||||
Minutes: entry.StartTime.Minutes,
|
||||
Seconds: entry.StartTime.Seconds + 3,
|
||||
Milliseconds: entry.StartTime.Milliseconds,
|
||||
}
|
||||
// Handle overflow
|
||||
if entry.EndTime.Seconds >= 60 {
|
||||
entry.EndTime.Seconds -= 60
|
||||
entry.EndTime.Minutes++
|
||||
}
|
||||
if entry.EndTime.Minutes >= 60 {
|
||||
entry.EndTime.Minutes -= 60
|
||||
entry.EndTime.Hours++
|
||||
}
|
||||
}
|
||||
|
||||
entry.Text = content
|
||||
subtitle.Entries = append(subtitle.Entries, entry)
|
||||
}
|
||||
|
||||
return subtitle, nil
|
||||
}
|
||||
|
||||
// ConvertFromSubtitle converts our intermediate Subtitle representation to LRC format
|
||||
func ConvertFromSubtitle(subtitle model.Subtitle, filePath string) error {
|
||||
lyrics := model.Lyrics{
|
||||
Metadata: make(map[string]string),
|
||||
}
|
||||
|
||||
// Copy metadata
|
||||
for key, value := range subtitle.Metadata {
|
||||
lyrics.Metadata[key] = value
|
||||
}
|
||||
|
||||
// Add title if present and not already in metadata
|
||||
if subtitle.Title != "" && lyrics.Metadata["ti"] == "" {
|
||||
lyrics.Metadata["ti"] = subtitle.Title
|
||||
}
|
||||
|
||||
// Convert entries to timeline and content
|
||||
for _, entry := range subtitle.Entries {
|
||||
lyrics.Timeline = append(lyrics.Timeline, entry.StartTime)
|
||||
lyrics.Content = append(lyrics.Content, entry.Text)
|
||||
}
|
||||
|
||||
return Generate(lyrics, filePath)
|
||||
}
|
||||
|
|
|
@ -152,3 +152,91 @@ func ConvertToLyrics(entries []model.SRTEntry) model.Lyrics {
|
|||
|
||||
return lyrics
|
||||
}
|
||||
|
||||
// ConvertToSubtitle converts SRT entries to our intermediate Subtitle structure
|
||||
func ConvertToSubtitle(filePath string) (model.Subtitle, error) {
|
||||
entries, err := Parse(filePath)
|
||||
if err != nil {
|
||||
return model.Subtitle{}, fmt.Errorf("error parsing SRT file: %w", err)
|
||||
}
|
||||
|
||||
subtitle := model.NewSubtitle()
|
||||
subtitle.Format = "srt"
|
||||
|
||||
// Convert SRT entries to intermediate representation
|
||||
for _, entry := range entries {
|
||||
subtitleEntry := model.NewSubtitleEntry()
|
||||
subtitleEntry.Index = entry.Number
|
||||
subtitleEntry.StartTime = entry.StartTime
|
||||
subtitleEntry.EndTime = entry.EndTime
|
||||
subtitleEntry.Text = entry.Content
|
||||
|
||||
// Look for HTML styling tags and store information about them
|
||||
if strings.Contains(entry.Content, "<") && strings.Contains(entry.Content, ">") {
|
||||
// Extract and store HTML styling info
|
||||
if strings.Contains(entry.Content, "<i>") || strings.Contains(entry.Content, "<I>") {
|
||||
subtitleEntry.Styles["italic"] = "true"
|
||||
}
|
||||
if strings.Contains(entry.Content, "<b>") || strings.Contains(entry.Content, "<B>") {
|
||||
subtitleEntry.Styles["bold"] = "true"
|
||||
}
|
||||
if strings.Contains(entry.Content, "<u>") || strings.Contains(entry.Content, "<U>") {
|
||||
subtitleEntry.Styles["underline"] = "true"
|
||||
}
|
||||
|
||||
subtitleEntry.FormatData["has_html_tags"] = true
|
||||
}
|
||||
|
||||
subtitle.Entries = append(subtitle.Entries, subtitleEntry)
|
||||
}
|
||||
|
||||
return subtitle, nil
|
||||
}
|
||||
|
||||
// ConvertFromSubtitle converts our intermediate Subtitle representation to SRT format
|
||||
func ConvertFromSubtitle(subtitle model.Subtitle, filePath string) error {
|
||||
var entries []model.SRTEntry
|
||||
|
||||
// Convert intermediate representation to SRT entries
|
||||
for i, subtitleEntry := range subtitle.Entries {
|
||||
entry := model.SRTEntry{
|
||||
Number: i + 1, // Ensure sequential numbering
|
||||
StartTime: subtitleEntry.StartTime,
|
||||
EndTime: subtitleEntry.EndTime,
|
||||
Content: subtitleEntry.Text,
|
||||
}
|
||||
|
||||
// Use index from original entry if available
|
||||
if subtitleEntry.Index > 0 {
|
||||
entry.Number = subtitleEntry.Index
|
||||
}
|
||||
|
||||
// Apply any styling stored in the entry if needed
|
||||
// Note: SRT only supports basic HTML tags, so we convert style attributes back to HTML
|
||||
content := entry.Content
|
||||
if _, ok := subtitleEntry.Styles["italic"]; ok && subtitleEntry.Styles["italic"] == "true" {
|
||||
if !strings.Contains(content, "<i>") {
|
||||
content = "<i>" + content + "</i>"
|
||||
}
|
||||
}
|
||||
if _, ok := subtitleEntry.Styles["bold"]; ok && subtitleEntry.Styles["bold"] == "true" {
|
||||
if !strings.Contains(content, "<b>") {
|
||||
content = "<b>" + content + "</b>"
|
||||
}
|
||||
}
|
||||
if _, ok := subtitleEntry.Styles["underline"]; ok && subtitleEntry.Styles["underline"] == "true" {
|
||||
if !strings.Contains(content, "<u>") {
|
||||
content = "<u>" + content + "</u>"
|
||||
}
|
||||
}
|
||||
|
||||
// Only update content if we applied styling
|
||||
if content != entry.Content {
|
||||
entry.Content = content
|
||||
}
|
||||
|
||||
entries = append(entries, entry)
|
||||
}
|
||||
|
||||
return Generate(entries, filePath)
|
||||
}
|
||||
|
|
30
internal/format/txt/txt.go
Normal file
30
internal/format/txt/txt.go
Normal file
|
@ -0,0 +1,30 @@
|
|||
package txt
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"sub-cli/internal/model"
|
||||
)
|
||||
|
||||
// GenerateFromSubtitle converts our intermediate Subtitle to plain text format
|
||||
func GenerateFromSubtitle(subtitle model.Subtitle, filePath string) error {
|
||||
file, err := os.Create(filePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error creating TXT file: %w", err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
// Write title if available
|
||||
if subtitle.Title != "" {
|
||||
fmt.Fprintln(file, subtitle.Title)
|
||||
fmt.Fprintln(file)
|
||||
}
|
||||
|
||||
// Write content without timestamps
|
||||
for _, entry := range subtitle.Entries {
|
||||
fmt.Fprintln(file, entry.Text)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
393
internal/format/vtt/vtt.go
Normal file
393
internal/format/vtt/vtt.go
Normal file
|
@ -0,0 +1,393 @@
|
|||
package vtt
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"os"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"sub-cli/internal/model"
|
||||
)
|
||||
|
||||
// Constants for VTT format
|
||||
const (
|
||||
VTTHeader = "WEBVTT"
|
||||
)
|
||||
|
||||
// Parse parses a WebVTT file into our intermediate Subtitle representation
|
||||
func Parse(filePath string) (model.Subtitle, error) {
|
||||
subtitle := model.NewSubtitle()
|
||||
subtitle.Format = "vtt"
|
||||
|
||||
file, err := os.Open(filePath)
|
||||
if err != nil {
|
||||
return subtitle, fmt.Errorf("error opening VTT file: %w", err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
scanner := bufio.NewScanner(file)
|
||||
|
||||
// Check header
|
||||
if !scanner.Scan() {
|
||||
return subtitle, fmt.Errorf("empty VTT file")
|
||||
}
|
||||
|
||||
header := strings.TrimSpace(scanner.Text())
|
||||
if !strings.HasPrefix(header, VTTHeader) {
|
||||
return subtitle, fmt.Errorf("invalid VTT file: missing WEBVTT header")
|
||||
}
|
||||
|
||||
// Get metadata from header
|
||||
if strings.Contains(header, " - ") {
|
||||
subtitle.Title = strings.TrimSpace(strings.TrimPrefix(header, VTTHeader+" - "))
|
||||
}
|
||||
|
||||
// Process file content
|
||||
var currentEntry model.SubtitleEntry
|
||||
var inCue bool
|
||||
var inStyle bool
|
||||
var styleBuffer strings.Builder
|
||||
var cueTextBuffer strings.Builder
|
||||
|
||||
lineNum := 1
|
||||
for scanner.Scan() {
|
||||
lineNum++
|
||||
line := scanner.Text()
|
||||
|
||||
// Skip empty lines
|
||||
if strings.TrimSpace(line) == "" {
|
||||
if inCue {
|
||||
// End of a cue
|
||||
currentEntry.Text = cueTextBuffer.String()
|
||||
subtitle.Entries = append(subtitle.Entries, currentEntry)
|
||||
currentEntry = model.NewSubtitleEntry()
|
||||
cueTextBuffer.Reset()
|
||||
inCue = false
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Check for style blocks
|
||||
if strings.HasPrefix(line, "STYLE") {
|
||||
inStyle = true
|
||||
continue
|
||||
}
|
||||
|
||||
if inStyle {
|
||||
if line == "" {
|
||||
inStyle = false
|
||||
subtitle.Styles["css"] = styleBuffer.String()
|
||||
styleBuffer.Reset()
|
||||
} else {
|
||||
styleBuffer.WriteString(line)
|
||||
styleBuffer.WriteString("\n")
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Check for NOTE comments
|
||||
if strings.HasPrefix(line, "NOTE") {
|
||||
comment := strings.TrimSpace(strings.TrimPrefix(line, "NOTE"))
|
||||
subtitle.Comments = append(subtitle.Comments, comment)
|
||||
continue
|
||||
}
|
||||
|
||||
// Check for REGION definitions
|
||||
if strings.HasPrefix(line, "REGION") {
|
||||
parts := strings.Split(strings.TrimPrefix(line, "REGION"), ":")
|
||||
if len(parts) >= 2 {
|
||||
regionID := strings.TrimSpace(parts[0])
|
||||
region := model.NewSubtitleRegion(regionID)
|
||||
|
||||
settings := strings.Split(parts[1], " ")
|
||||
for _, setting := range settings {
|
||||
keyValue := strings.Split(setting, "=")
|
||||
if len(keyValue) == 2 {
|
||||
region.Settings[strings.TrimSpace(keyValue[0])] = strings.TrimSpace(keyValue[1])
|
||||
}
|
||||
}
|
||||
|
||||
subtitle.Regions = append(subtitle.Regions, region)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Check for timestamp lines
|
||||
if strings.Contains(line, "-->") {
|
||||
inCue = true
|
||||
|
||||
// Parse timestamps
|
||||
timestamps := strings.Split(line, "-->")
|
||||
if len(timestamps) != 2 {
|
||||
return subtitle, fmt.Errorf("invalid timestamp format at line %d: %s", lineNum, line)
|
||||
}
|
||||
|
||||
startTimeStr := strings.TrimSpace(timestamps[0])
|
||||
|
||||
endTimeAndSettings := strings.TrimSpace(timestamps[1])
|
||||
endTimeStr := endTimeAndSettings
|
||||
settings := ""
|
||||
|
||||
// Check for cue settings after end timestamp
|
||||
if spaceIndex := strings.IndexByte(endTimeAndSettings, ' '); spaceIndex != -1 {
|
||||
endTimeStr = endTimeAndSettings[:spaceIndex]
|
||||
settings = endTimeAndSettings[spaceIndex+1:]
|
||||
}
|
||||
|
||||
// Set timestamps
|
||||
currentEntry.StartTime = parseVTTTimestamp(startTimeStr)
|
||||
currentEntry.EndTime = parseVTTTimestamp(endTimeStr)
|
||||
|
||||
// Parse cue settings
|
||||
if settings != "" {
|
||||
settingPairs := strings.Split(settings, " ")
|
||||
for _, pair := range settingPairs {
|
||||
if pair == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
if strings.Contains(pair, ":") {
|
||||
parts := strings.Split(pair, ":")
|
||||
if len(parts) == 2 {
|
||||
currentEntry.Styles[strings.TrimSpace(parts[0])] = strings.TrimSpace(parts[1])
|
||||
}
|
||||
} else {
|
||||
// Handle non-key-value settings if any
|
||||
currentEntry.FormatData["setting_"+pair] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cueTextBuffer.Reset()
|
||||
continue
|
||||
}
|
||||
|
||||
// Check if we have identifier before timestamp
|
||||
if !inCue && currentEntry.Index == 0 && !strings.Contains(line, "-->") {
|
||||
// This might be a cue identifier
|
||||
if _, err := strconv.Atoi(line); err == nil {
|
||||
// It's likely a numeric identifier
|
||||
num, _ := strconv.Atoi(line)
|
||||
currentEntry.Index = num
|
||||
} else {
|
||||
// It's a string identifier, store it in metadata
|
||||
currentEntry.Metadata["identifier"] = line
|
||||
currentEntry.Index = len(subtitle.Entries) + 1
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// If we're in a cue, add this line to the text
|
||||
if inCue {
|
||||
if cueTextBuffer.Len() > 0 {
|
||||
cueTextBuffer.WriteString("\n")
|
||||
}
|
||||
cueTextBuffer.WriteString(line)
|
||||
}
|
||||
}
|
||||
|
||||
// Don't forget the last entry
|
||||
if inCue && cueTextBuffer.Len() > 0 {
|
||||
currentEntry.Text = cueTextBuffer.String()
|
||||
subtitle.Entries = append(subtitle.Entries, currentEntry)
|
||||
}
|
||||
|
||||
// Process cue text to extract styling
|
||||
processVTTCueTextStyling(&subtitle)
|
||||
|
||||
if err := scanner.Err(); err != nil {
|
||||
return subtitle, fmt.Errorf("error reading VTT file: %w", err)
|
||||
}
|
||||
|
||||
return subtitle, nil
|
||||
}
|
||||
|
||||
// parseVTTTimestamp parses a VTT timestamp string into our Timestamp model
|
||||
func parseVTTTimestamp(timeStr string) model.Timestamp {
|
||||
// VTT timestamps format: 00:00:00.000
|
||||
re := regexp.MustCompile(`(\d+):(\d+):(\d+)\.(\d+)|\d+:(\d+)\.(\d+)`)
|
||||
matches := re.FindStringSubmatch(timeStr)
|
||||
|
||||
var hours, minutes, seconds, milliseconds int
|
||||
|
||||
if len(matches) >= 5 && matches[1] != "" {
|
||||
// Full format: 00:00:00.000
|
||||
hours, _ = strconv.Atoi(matches[1])
|
||||
minutes, _ = strconv.Atoi(matches[2])
|
||||
seconds, _ = strconv.Atoi(matches[3])
|
||||
|
||||
msStr := matches[4]
|
||||
// Ensure milliseconds are treated correctly
|
||||
switch len(msStr) {
|
||||
case 1:
|
||||
milliseconds, _ = strconv.Atoi(msStr + "00")
|
||||
case 2:
|
||||
milliseconds, _ = strconv.Atoi(msStr + "0")
|
||||
case 3:
|
||||
milliseconds, _ = strconv.Atoi(msStr)
|
||||
default:
|
||||
if len(msStr) > 3 {
|
||||
milliseconds, _ = strconv.Atoi(msStr[:3])
|
||||
}
|
||||
}
|
||||
} else if len(matches) >= 7 && matches[5] != "" {
|
||||
// Short format: 00:00.000
|
||||
minutes, _ = strconv.Atoi(matches[5])
|
||||
seconds, _ = strconv.Atoi(matches[6])
|
||||
|
||||
msStr := matches[7]
|
||||
// Ensure milliseconds are treated correctly
|
||||
switch len(msStr) {
|
||||
case 1:
|
||||
milliseconds, _ = strconv.Atoi(msStr + "00")
|
||||
case 2:
|
||||
milliseconds, _ = strconv.Atoi(msStr + "0")
|
||||
case 3:
|
||||
milliseconds, _ = strconv.Atoi(msStr)
|
||||
default:
|
||||
if len(msStr) > 3 {
|
||||
milliseconds, _ = strconv.Atoi(msStr[:3])
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Try another approach with time.Parse
|
||||
layout := "15:04:05.000"
|
||||
t, err := time.Parse(layout, timeStr)
|
||||
if err == nil {
|
||||
hours = t.Hour()
|
||||
minutes = t.Minute()
|
||||
seconds = t.Second()
|
||||
milliseconds = t.Nanosecond() / 1000000
|
||||
}
|
||||
}
|
||||
|
||||
return model.Timestamp{
|
||||
Hours: hours,
|
||||
Minutes: minutes,
|
||||
Seconds: seconds,
|
||||
Milliseconds: milliseconds,
|
||||
}
|
||||
}
|
||||
|
||||
// processVTTCueTextStyling processes the cue text to extract styling tags
|
||||
func processVTTCueTextStyling(subtitle *model.Subtitle) {
|
||||
for i, entry := range subtitle.Entries {
|
||||
// Look for basic HTML tags in the text and extract them to styling attributes
|
||||
text := entry.Text
|
||||
|
||||
// Process <b>, <i>, <u>, etc. tags to collect styling information
|
||||
// For simplicity, we'll just note that styling exists, but we won't modify the text
|
||||
if strings.Contains(text, "<") && strings.Contains(text, ">") {
|
||||
entry.FormatData["has_html_tags"] = true
|
||||
}
|
||||
|
||||
// Update the entry
|
||||
subtitle.Entries[i] = entry
|
||||
}
|
||||
}
|
||||
|
||||
// Generate generates a WebVTT file from our intermediate Subtitle representation
|
||||
func Generate(subtitle model.Subtitle, filePath string) error {
|
||||
file, err := os.Create(filePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error creating VTT file: %w", err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
// Write header
|
||||
if subtitle.Title != "" {
|
||||
fmt.Fprintf(file, "%s - %s\n\n", VTTHeader, subtitle.Title)
|
||||
} else {
|
||||
fmt.Fprintf(file, "%s\n\n", VTTHeader)
|
||||
}
|
||||
|
||||
// Write styles if any
|
||||
if cssStyle, ok := subtitle.Styles["css"]; ok && cssStyle != "" {
|
||||
fmt.Fprintln(file, "STYLE")
|
||||
fmt.Fprintln(file, cssStyle)
|
||||
fmt.Fprintln(file)
|
||||
}
|
||||
|
||||
// Write regions if any
|
||||
for _, region := range subtitle.Regions {
|
||||
fmt.Fprintf(file, "REGION %s:", region.ID)
|
||||
for key, value := range region.Settings {
|
||||
fmt.Fprintf(file, " %s=%s", key, value)
|
||||
}
|
||||
fmt.Fprintln(file)
|
||||
}
|
||||
|
||||
// Write comments if any
|
||||
for _, comment := range subtitle.Comments {
|
||||
fmt.Fprintf(file, "NOTE %s\n", comment)
|
||||
}
|
||||
if len(subtitle.Comments) > 0 {
|
||||
fmt.Fprintln(file)
|
||||
}
|
||||
|
||||
// Write cues
|
||||
for i, entry := range subtitle.Entries {
|
||||
// Write identifier if available
|
||||
if identifier, ok := entry.Metadata["identifier"]; ok && identifier != "" {
|
||||
fmt.Fprintln(file, identifier)
|
||||
} else if entry.Index > 0 {
|
||||
fmt.Fprintln(file, entry.Index)
|
||||
} else {
|
||||
fmt.Fprintln(file, i+1)
|
||||
}
|
||||
|
||||
// Write timestamps and settings
|
||||
fmt.Fprintf(file, "%s --> %s", formatVTTTimestamp(entry.StartTime), formatVTTTimestamp(entry.EndTime))
|
||||
|
||||
// Add cue settings
|
||||
for key, value := range entry.Styles {
|
||||
fmt.Fprintf(file, " %s:%s", key, value)
|
||||
}
|
||||
fmt.Fprintln(file)
|
||||
|
||||
// Write cue text
|
||||
fmt.Fprintln(file, entry.Text)
|
||||
fmt.Fprintln(file)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// formatVTTTimestamp formats a Timestamp struct as a VTT timestamp string
|
||||
func formatVTTTimestamp(ts model.Timestamp) string {
|
||||
return fmt.Sprintf("%02d:%02d:%02d.%03d",
|
||||
ts.Hours,
|
||||
ts.Minutes,
|
||||
ts.Seconds,
|
||||
ts.Milliseconds)
|
||||
}
|
||||
|
||||
// Format standardizes and formats a VTT file
|
||||
func Format(filePath string) error {
|
||||
// Parse the file
|
||||
subtitle, err := Parse(filePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error parsing VTT file: %w", err)
|
||||
}
|
||||
|
||||
// Standardize entry numbering
|
||||
for i := range subtitle.Entries {
|
||||
subtitle.Entries[i].Index = i + 1
|
||||
}
|
||||
|
||||
// Write back the formatted content
|
||||
return Generate(subtitle, filePath)
|
||||
}
|
||||
|
||||
// ConvertToSubtitle converts VTT entries to our intermediate Subtitle structure
|
||||
func ConvertToSubtitle(filePath string) (model.Subtitle, error) {
|
||||
return Parse(filePath)
|
||||
}
|
||||
|
||||
// ConvertFromSubtitle converts our intermediate Subtitle to VTT format
|
||||
func ConvertFromSubtitle(subtitle model.Subtitle, filePath string) error {
|
||||
return Generate(subtitle, filePath)
|
||||
}
|
|
@ -22,3 +22,63 @@ type SRTEntry struct {
|
|||
EndTime Timestamp
|
||||
Content string
|
||||
}
|
||||
|
||||
// SubtitleEntry represents a generic subtitle entry in our intermediate representation
|
||||
type SubtitleEntry struct {
|
||||
Index int // Sequential index/number
|
||||
StartTime Timestamp // Start time
|
||||
EndTime Timestamp // End time
|
||||
Text string // The subtitle text content
|
||||
Styles map[string]string // Styling information (e.g., VTT's align, position)
|
||||
Classes []string // CSS classes (for VTT)
|
||||
Metadata map[string]string // Additional metadata
|
||||
FormatData map[string]interface{} // Format-specific data that doesn't fit elsewhere
|
||||
}
|
||||
|
||||
// Subtitle represents our intermediate subtitle representation used for conversions
|
||||
type Subtitle struct {
|
||||
Title string // Optional title
|
||||
Metadata map[string]string // Global metadata (e.g., LRC's ti, ar, al)
|
||||
Entries []SubtitleEntry // Subtitle entries
|
||||
Format string // Source format
|
||||
Styles map[string]string // Global styles (e.g., VTT STYLE blocks)
|
||||
Comments []string // Comments/notes (for VTT)
|
||||
Regions []SubtitleRegion // Region definitions (for VTT)
|
||||
FormatData map[string]interface{} // Format-specific data that doesn't fit elsewhere
|
||||
}
|
||||
|
||||
// SubtitleRegion represents a region definition (mainly for VTT)
|
||||
type SubtitleRegion struct {
|
||||
ID string
|
||||
Settings map[string]string
|
||||
}
|
||||
|
||||
// Creates a new empty Subtitle
|
||||
func NewSubtitle() Subtitle {
|
||||
return Subtitle{
|
||||
Metadata: make(map[string]string),
|
||||
Entries: []SubtitleEntry{},
|
||||
Styles: make(map[string]string),
|
||||
Comments: []string{},
|
||||
Regions: []SubtitleRegion{},
|
||||
FormatData: make(map[string]interface{}),
|
||||
}
|
||||
}
|
||||
|
||||
// Creates a new empty SubtitleEntry
|
||||
func NewSubtitleEntry() SubtitleEntry {
|
||||
return SubtitleEntry{
|
||||
Styles: make(map[string]string),
|
||||
Classes: []string{},
|
||||
Metadata: make(map[string]string),
|
||||
FormatData: make(map[string]interface{}),
|
||||
}
|
||||
}
|
||||
|
||||
// Creates a new SubtitleRegion
|
||||
func NewSubtitleRegion(id string) SubtitleRegion {
|
||||
return SubtitleRegion{
|
||||
ID: id,
|
||||
Settings: make(map[string]string),
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue