feat: vtt converting

2025-04-23 10:44:08 +08:00 · 2025-04-23 10:44:08 +08:00 · ba66894e42
commit ba66894e42
parent ba2e477dc0
7 changed files with 693 additions and 107 deletions
--- a/internal/config/constants.go
+++ b/internal/config/constants.go
@ -25,4 +25,5 @@ const ConvertUsage = `Usage: sub-cli convert <source> <target>
  Target format is determined by file extension. Supported formats:
    .txt	Plain text format (No meta/timeline tags, only support as target format)
    .srt	SubRip Subtitle format
-    .lrc	LRC format`
+    .lrc	LRC format
+    .vtt	WebVTT format`
--- a/internal/converter/converter.go
+++ b/internal/converter/converter.go
@ -3,12 +3,13 @@ package converter
 import (
 	"errors"
 	"fmt"
-	"os"
 	"path/filepath"
 	"strings"

 	"sub-cli/internal/format/lrc"
 	"sub-cli/internal/format/srt"
+	"sub-cli/internal/format/txt"
+	"sub-cli/internal/format/vtt"
 	"sub-cli/internal/model"
 )

@ -20,118 +21,47 @@ func Convert(sourceFile, targetFile string) error {
 	sourceFmt := strings.TrimPrefix(filepath.Ext(sourceFile), ".")
 	targetFmt := strings.TrimPrefix(filepath.Ext(targetFile), ".")

-	switch sourceFmt {
+	// TXT only supports being a target format
+	if sourceFmt == "txt" {
+		return fmt.Errorf("%w: txt is only supported as a target format", ErrUnsupportedFormat)
+	}
+
+	// Convert source to intermediate representation
+	subtitle, err := convertToIntermediate(sourceFile, sourceFmt)
+	if err != nil {
+		return err
+	}
+
+	// Convert from intermediate representation to target format
+	return convertFromIntermediate(subtitle, targetFile, targetFmt)
+}
+
+// convertToIntermediate converts a source file to our intermediate Subtitle representation
+func convertToIntermediate(sourceFile, sourceFormat string) (model.Subtitle, error) {
+	switch sourceFormat {
 	case "lrc":
-		return convertFromLRC(sourceFile, targetFile, targetFmt)
+		return lrc.ConvertToSubtitle(sourceFile)
 	case "srt":
-		return convertFromSRT(sourceFile, targetFile, targetFmt)
+		return srt.ConvertToSubtitle(sourceFile)
+	case "vtt":
+		return vtt.ConvertToSubtitle(sourceFile)
 	default:
-		return fmt.Errorf("%w: %s", ErrUnsupportedFormat, sourceFmt)
+		return model.Subtitle{}, fmt.Errorf("%w: %s", ErrUnsupportedFormat, sourceFormat)
 	}
 }

-// convertFromLRC converts an LRC file to another format
-func convertFromLRC(sourceFile, targetFile, targetFmt string) error {
-	sourceLyrics, err := lrc.Parse(sourceFile)
-	if err != nil {
-		return fmt.Errorf("error parsing source LRC file: %w", err)
-	}
-
-	switch targetFmt {
+// convertFromIntermediate converts our intermediate Subtitle representation to a target format
+func convertFromIntermediate(subtitle model.Subtitle, targetFile, targetFormat string) error {
+	switch targetFormat {
+	case "lrc":
+		return lrc.ConvertFromSubtitle(subtitle, targetFile)
+	case "srt":
+		return srt.ConvertFromSubtitle(subtitle, targetFile)
+	case "vtt":
+		return vtt.ConvertFromSubtitle(subtitle, targetFile)
 	case "txt":
-		return lrcToTxt(sourceLyrics, targetFile)
-	case "srt":
-		return lrcToSRT(sourceLyrics, targetFile)
-	case "lrc":
-		return lrc.Generate(sourceLyrics, targetFile)
+		return txt.GenerateFromSubtitle(subtitle, targetFile)
 	default:
-		return fmt.Errorf("%w: %s", ErrUnsupportedFormat, targetFmt)
+		return fmt.Errorf("%w: %s", ErrUnsupportedFormat, targetFormat)
 	}
 }
-
-// convertFromSRT converts an SRT file to another format
-func convertFromSRT(sourceFile, targetFile, targetFmt string) error {
-	entries, err := srt.Parse(sourceFile)
-	if err != nil {
-		return fmt.Errorf("error parsing source SRT file: %w", err)
-	}
-
-	switch targetFmt {
-	case "txt":
-		return srtToTxt(entries, targetFile)
-	case "lrc":
-		lyrics := srt.ConvertToLyrics(entries)
-		return lrc.Generate(lyrics, targetFile)
-	case "srt":
-		return srt.Generate(entries, targetFile)
-	default:
-		return fmt.Errorf("%w: %s", ErrUnsupportedFormat, targetFmt)
-	}
-}
-
-// lrcToTxt converts LRC lyrics to a plain text file
-func lrcToTxt(lyrics model.Lyrics, targetFile string) error {
-	file, err := os.Create(targetFile)
-	if err != nil {
-		return fmt.Errorf("error creating target file: %w", err)
-	}
-	defer file.Close()
-
-	for _, content := range lyrics.Content {
-		if _, err := fmt.Fprintln(file, content); err != nil {
-			return err
-		}
-	}
-
-	return nil
-}
-
-// lrcToSRT converts LRC lyrics to an SRT file
-func lrcToSRT(lyrics model.Lyrics, targetFile string) error {
-	var entries []model.SRTEntry
-
-	for i, content := range lyrics.Content {
-		if i >= len(lyrics.Timeline) {
-			break
-		}
-
-		startTime := lyrics.Timeline[i]
-		endTime := startTime
-		
-		// If there's a next timeline entry, use it for end time
-		// Otherwise add a few seconds to the start time
-		if i+1 < len(lyrics.Timeline) {
-			endTime = lyrics.Timeline[i+1]
-		} else {
-			endTime.Seconds += 3
-		}
-
-		entry := model.SRTEntry{
-			Number:    i + 1,
-			StartTime: startTime,
-			EndTime:   endTime,
-			Content:   content,
-		}
-
-		entries = append(entries, entry)
-	}
-
-	return srt.Generate(entries, targetFile)
-}
-
-// srtToTxt converts SRT entries to a plain text file
-func srtToTxt(entries []model.SRTEntry, targetFile string) error {
-	file, err := os.Create(targetFile)
-	if err != nil {
-		return fmt.Errorf("error creating target file: %w", err)
-	}
-	defer file.Close()
-
-	for _, entry := range entries {
-		if _, err := fmt.Fprintln(file, entry.Content); err != nil {
-			return err
-		}
-	}
-
-	return nil
-}
--- a/internal/format/lrc/lrc.go
+++ b/internal/format/lrc/lrc.go
@ -180,3 +180,87 @@ func Format(filePath string) error {
 	
 	return Generate(lyrics, filePath)
 }
+
+// ConvertToSubtitle converts LRC file to our intermediate Subtitle representation
+func ConvertToSubtitle(filePath string) (model.Subtitle, error) {
+	lyrics, err := Parse(filePath)
+	if err != nil {
+		return model.Subtitle{}, err
+	}
+
+	subtitle := model.NewSubtitle()
+	subtitle.Format = "lrc"
+	
+	// Copy metadata
+	for key, value := range lyrics.Metadata {
+		subtitle.Metadata[key] = value
+	}
+	
+	// Check for specific LRC metadata we should use for title
+	if title, ok := lyrics.Metadata["ti"]; ok {
+		subtitle.Title = title
+	}
+	
+	// Create entries from timeline and content
+	for i, content := range lyrics.Content {
+		if i >= len(lyrics.Timeline) {
+			break
+		}
+		
+		entry := model.NewSubtitleEntry()
+		entry.Index = i + 1
+		entry.StartTime = lyrics.Timeline[i]
+		
+		// Set end time based on next timeline entry if available, otherwise add a few seconds
+		if i+1 < len(lyrics.Timeline) {
+			entry.EndTime = lyrics.Timeline[i+1]
+		} else {
+			// Default end time: start time + 3 seconds
+			entry.EndTime = model.Timestamp{
+				Hours:        entry.StartTime.Hours,
+				Minutes:      entry.StartTime.Minutes,
+				Seconds:      entry.StartTime.Seconds + 3,
+				Milliseconds: entry.StartTime.Milliseconds,
+			}
+			// Handle overflow
+			if entry.EndTime.Seconds >= 60 {
+				entry.EndTime.Seconds -= 60
+				entry.EndTime.Minutes++
+			}
+			if entry.EndTime.Minutes >= 60 {
+				entry.EndTime.Minutes -= 60
+				entry.EndTime.Hours++
+			}
+		}
+		
+		entry.Text = content
+		subtitle.Entries = append(subtitle.Entries, entry)
+	}
+	
+	return subtitle, nil
+}
+
+// ConvertFromSubtitle converts our intermediate Subtitle representation to LRC format
+func ConvertFromSubtitle(subtitle model.Subtitle, filePath string) error {
+	lyrics := model.Lyrics{
+		Metadata: make(map[string]string),
+	}
+	
+	// Copy metadata
+	for key, value := range subtitle.Metadata {
+		lyrics.Metadata[key] = value
+	}
+	
+	// Add title if present and not already in metadata
+	if subtitle.Title != "" && lyrics.Metadata["ti"] == "" {
+		lyrics.Metadata["ti"] = subtitle.Title
+	}
+	
+	// Convert entries to timeline and content
+	for _, entry := range subtitle.Entries {
+		lyrics.Timeline = append(lyrics.Timeline, entry.StartTime)
+		lyrics.Content = append(lyrics.Content, entry.Text)
+	}
+	
+	return Generate(lyrics, filePath)
+}
--- a/internal/format/srt/srt.go
+++ b/internal/format/srt/srt.go
@ -152,3 +152,91 @@ func ConvertToLyrics(entries []model.SRTEntry) model.Lyrics {

 	return lyrics
 }
+
+// ConvertToSubtitle converts SRT entries to our intermediate Subtitle structure
+func ConvertToSubtitle(filePath string) (model.Subtitle, error) {
+	entries, err := Parse(filePath)
+	if err != nil {
+		return model.Subtitle{}, fmt.Errorf("error parsing SRT file: %w", err)
+	}
+	
+	subtitle := model.NewSubtitle()
+	subtitle.Format = "srt"
+	
+	// Convert SRT entries to intermediate representation
+	for _, entry := range entries {
+		subtitleEntry := model.NewSubtitleEntry()
+		subtitleEntry.Index = entry.Number
+		subtitleEntry.StartTime = entry.StartTime
+		subtitleEntry.EndTime = entry.EndTime
+		subtitleEntry.Text = entry.Content
+		
+		// Look for HTML styling tags and store information about them
+		if strings.Contains(entry.Content, "<") && strings.Contains(entry.Content, ">") {
+			// Extract and store HTML styling info
+			if strings.Contains(entry.Content, "<i>") || strings.Contains(entry.Content, "<I>") {
+				subtitleEntry.Styles["italic"] = "true"
+			}
+			if strings.Contains(entry.Content, "<b>") || strings.Contains(entry.Content, "<B>") {
+				subtitleEntry.Styles["bold"] = "true"
+			}
+			if strings.Contains(entry.Content, "<u>") || strings.Contains(entry.Content, "<U>") {
+				subtitleEntry.Styles["underline"] = "true"
+			}
+			
+			subtitleEntry.FormatData["has_html_tags"] = true
+		}
+		
+		subtitle.Entries = append(subtitle.Entries, subtitleEntry)
+	}
+	
+	return subtitle, nil
+}
+
+// ConvertFromSubtitle converts our intermediate Subtitle representation to SRT format
+func ConvertFromSubtitle(subtitle model.Subtitle, filePath string) error {
+	var entries []model.SRTEntry
+	
+	// Convert intermediate representation to SRT entries
+	for i, subtitleEntry := range subtitle.Entries {
+		entry := model.SRTEntry{
+			Number:    i + 1, // Ensure sequential numbering
+			StartTime: subtitleEntry.StartTime,
+			EndTime:   subtitleEntry.EndTime,
+			Content:   subtitleEntry.Text,
+		}
+		
+		// Use index from original entry if available
+		if subtitleEntry.Index > 0 {
+			entry.Number = subtitleEntry.Index
+		}
+		
+		// Apply any styling stored in the entry if needed
+		// Note: SRT only supports basic HTML tags, so we convert style attributes back to HTML
+		content := entry.Content
+		if _, ok := subtitleEntry.Styles["italic"]; ok && subtitleEntry.Styles["italic"] == "true" {
+			if !strings.Contains(content, "<i>") {
+				content = "<i>" + content + "</i>"
+			}
+		}
+		if _, ok := subtitleEntry.Styles["bold"]; ok && subtitleEntry.Styles["bold"] == "true" {
+			if !strings.Contains(content, "<b>") {
+				content = "<b>" + content + "</b>"
+			}
+		}
+		if _, ok := subtitleEntry.Styles["underline"]; ok && subtitleEntry.Styles["underline"] == "true" {
+			if !strings.Contains(content, "<u>") {
+				content = "<u>" + content + "</u>"
+			}
+		}
+		
+		// Only update content if we applied styling
+		if content != entry.Content {
+			entry.Content = content
+		}
+		
+		entries = append(entries, entry)
+	}
+	
+	return Generate(entries, filePath)
+}
--- a/internal/format/txt/txt.go
+++ b/internal/format/txt/txt.go
@ -0,0 +1,30 @@
+package txt
+
+import (
+	"fmt"
+	"os"
+
+	"sub-cli/internal/model"
+)
+
+// GenerateFromSubtitle converts our intermediate Subtitle to plain text format
+func GenerateFromSubtitle(subtitle model.Subtitle, filePath string) error {
+	file, err := os.Create(filePath)
+	if err != nil {
+		return fmt.Errorf("error creating TXT file: %w", err)
+	}
+	defer file.Close()
+
+	// Write title if available
+	if subtitle.Title != "" {
+		fmt.Fprintln(file, subtitle.Title)
+		fmt.Fprintln(file)
+	}
+
+	// Write content without timestamps
+	for _, entry := range subtitle.Entries {
+		fmt.Fprintln(file, entry.Text)
+	}
+
+	return nil
+}
--- a/internal/format/vtt/vtt.go
+++ b/internal/format/vtt/vtt.go
@ -0,0 +1,393 @@
+package vtt
+
+import (
+	"bufio"
+	"fmt"
+	"os"
+	"regexp"
+	"strconv"
+	"strings"
+	"time"
+
+	"sub-cli/internal/model"
+)
+
+// Constants for VTT format
+const (
+	VTTHeader = "WEBVTT"
+)
+
+// Parse parses a WebVTT file into our intermediate Subtitle representation
+func Parse(filePath string) (model.Subtitle, error) {
+	subtitle := model.NewSubtitle()
+	subtitle.Format = "vtt"
+
+	file, err := os.Open(filePath)
+	if err != nil {
+		return subtitle, fmt.Errorf("error opening VTT file: %w", err)
+	}
+	defer file.Close()
+
+	scanner := bufio.NewScanner(file)
+
+	// Check header
+	if !scanner.Scan() {
+		return subtitle, fmt.Errorf("empty VTT file")
+	}
+
+	header := strings.TrimSpace(scanner.Text())
+	if !strings.HasPrefix(header, VTTHeader) {
+		return subtitle, fmt.Errorf("invalid VTT file: missing WEBVTT header")
+	}
+
+	// Get metadata from header
+	if strings.Contains(header, " - ") {
+		subtitle.Title = strings.TrimSpace(strings.TrimPrefix(header, VTTHeader+" - "))
+	}
+
+	// Process file content
+	var currentEntry model.SubtitleEntry
+	var inCue bool
+	var inStyle bool
+	var styleBuffer strings.Builder
+	var cueTextBuffer strings.Builder
+	
+	lineNum := 1
+	for scanner.Scan() {
+		lineNum++
+		line := scanner.Text()
+		
+		// Skip empty lines
+		if strings.TrimSpace(line) == "" {
+			if inCue {
+				// End of a cue
+				currentEntry.Text = cueTextBuffer.String()
+				subtitle.Entries = append(subtitle.Entries, currentEntry)
+				currentEntry = model.NewSubtitleEntry()
+				cueTextBuffer.Reset()
+				inCue = false
+			}
+			continue
+		}
+		
+		// Check for style blocks
+		if strings.HasPrefix(line, "STYLE") {
+			inStyle = true
+			continue
+		}
+		
+		if inStyle {
+			if line == "" {
+				inStyle = false
+				subtitle.Styles["css"] = styleBuffer.String()
+				styleBuffer.Reset()
+			} else {
+				styleBuffer.WriteString(line)
+				styleBuffer.WriteString("\n")
+			}
+			continue
+		}
+		
+		// Check for NOTE comments
+		if strings.HasPrefix(line, "NOTE") {
+			comment := strings.TrimSpace(strings.TrimPrefix(line, "NOTE"))
+			subtitle.Comments = append(subtitle.Comments, comment)
+			continue
+		}
+		
+		// Check for REGION definitions
+		if strings.HasPrefix(line, "REGION") {
+			parts := strings.Split(strings.TrimPrefix(line, "REGION"), ":")
+			if len(parts) >= 2 {
+				regionID := strings.TrimSpace(parts[0])
+				region := model.NewSubtitleRegion(regionID)
+				
+				settings := strings.Split(parts[1], " ")
+				for _, setting := range settings {
+					keyValue := strings.Split(setting, "=")
+					if len(keyValue) == 2 {
+						region.Settings[strings.TrimSpace(keyValue[0])] = strings.TrimSpace(keyValue[1])
+					}
+				}
+				
+				subtitle.Regions = append(subtitle.Regions, region)
+			}
+			continue
+		}
+		
+		// Check for timestamp lines
+		if strings.Contains(line, "-->") {
+			inCue = true
+			
+			// Parse timestamps
+			timestamps := strings.Split(line, "-->")
+			if len(timestamps) != 2 {
+				return subtitle, fmt.Errorf("invalid timestamp format at line %d: %s", lineNum, line)
+			}
+			
+			startTimeStr := strings.TrimSpace(timestamps[0])
+			
+			endTimeAndSettings := strings.TrimSpace(timestamps[1])
+			endTimeStr := endTimeAndSettings
+			settings := ""
+			
+			// Check for cue settings after end timestamp
+			if spaceIndex := strings.IndexByte(endTimeAndSettings, ' '); spaceIndex != -1 {
+				endTimeStr = endTimeAndSettings[:spaceIndex]
+				settings = endTimeAndSettings[spaceIndex+1:]
+			}
+			
+			// Set timestamps
+			currentEntry.StartTime = parseVTTTimestamp(startTimeStr)
+			currentEntry.EndTime = parseVTTTimestamp(endTimeStr)
+			
+			// Parse cue settings
+			if settings != "" {
+				settingPairs := strings.Split(settings, " ")
+				for _, pair := range settingPairs {
+					if pair == "" {
+						continue
+					}
+					
+					if strings.Contains(pair, ":") {
+						parts := strings.Split(pair, ":")
+						if len(parts) == 2 {
+							currentEntry.Styles[strings.TrimSpace(parts[0])] = strings.TrimSpace(parts[1])
+						}
+					} else {
+						// Handle non-key-value settings if any
+						currentEntry.FormatData["setting_"+pair] = true
+					}
+				}
+			}
+			
+			cueTextBuffer.Reset()
+			continue
+		}
+		
+		// Check if we have identifier before timestamp
+		if !inCue && currentEntry.Index == 0 && !strings.Contains(line, "-->") {
+			// This might be a cue identifier
+			if _, err := strconv.Atoi(line); err == nil {
+				// It's likely a numeric identifier
+				num, _ := strconv.Atoi(line)
+				currentEntry.Index = num
+			} else {
+				// It's a string identifier, store it in metadata
+				currentEntry.Metadata["identifier"] = line
+				currentEntry.Index = len(subtitle.Entries) + 1
+			}
+			continue
+		}
+		
+		// If we're in a cue, add this line to the text
+		if inCue {
+			if cueTextBuffer.Len() > 0 {
+				cueTextBuffer.WriteString("\n")
+			}
+			cueTextBuffer.WriteString(line)
+		}
+	}
+	
+	// Don't forget the last entry
+	if inCue && cueTextBuffer.Len() > 0 {
+		currentEntry.Text = cueTextBuffer.String()
+		subtitle.Entries = append(subtitle.Entries, currentEntry)
+	}
+
+	// Process cue text to extract styling
+	processVTTCueTextStyling(&subtitle)
+
+	if err := scanner.Err(); err != nil {
+		return subtitle, fmt.Errorf("error reading VTT file: %w", err)
+	}
+
+	return subtitle, nil
+}
+
+// parseVTTTimestamp parses a VTT timestamp string into our Timestamp model
+func parseVTTTimestamp(timeStr string) model.Timestamp {
+	// VTT timestamps format: 00:00:00.000
+	re := regexp.MustCompile(`(\d+):(\d+):(\d+)\.(\d+)|\d+:(\d+)\.(\d+)`)
+	matches := re.FindStringSubmatch(timeStr)
+	
+	var hours, minutes, seconds, milliseconds int
+	
+	if len(matches) >= 5 && matches[1] != "" {
+		// Full format: 00:00:00.000
+		hours, _ = strconv.Atoi(matches[1])
+		minutes, _ = strconv.Atoi(matches[2])
+		seconds, _ = strconv.Atoi(matches[3])
+		
+		msStr := matches[4]
+		// Ensure milliseconds are treated correctly
+		switch len(msStr) {
+		case 1:
+			milliseconds, _ = strconv.Atoi(msStr + "00")
+		case 2:
+			milliseconds, _ = strconv.Atoi(msStr + "0")
+		case 3:
+			milliseconds, _ = strconv.Atoi(msStr)
+		default:
+			if len(msStr) > 3 {
+				milliseconds, _ = strconv.Atoi(msStr[:3])
+			}
+		}
+	} else if len(matches) >= 7 && matches[5] != "" {
+		// Short format: 00:00.000
+		minutes, _ = strconv.Atoi(matches[5])
+		seconds, _ = strconv.Atoi(matches[6])
+		
+		msStr := matches[7]
+		// Ensure milliseconds are treated correctly
+		switch len(msStr) {
+		case 1:
+			milliseconds, _ = strconv.Atoi(msStr + "00")
+		case 2:
+			milliseconds, _ = strconv.Atoi(msStr + "0")
+		case 3:
+			milliseconds, _ = strconv.Atoi(msStr)
+		default:
+			if len(msStr) > 3 {
+				milliseconds, _ = strconv.Atoi(msStr[:3])
+			}
+		}
+	} else {
+		// Try another approach with time.Parse
+		layout := "15:04:05.000"
+		t, err := time.Parse(layout, timeStr)
+		if err == nil {
+			hours = t.Hour()
+			minutes = t.Minute()
+			seconds = t.Second()
+			milliseconds = t.Nanosecond() / 1000000
+		}
+	}
+	
+	return model.Timestamp{
+		Hours:        hours,
+		Minutes:      minutes,
+		Seconds:      seconds,
+		Milliseconds: milliseconds,
+	}
+}
+
+// processVTTCueTextStyling processes the cue text to extract styling tags
+func processVTTCueTextStyling(subtitle *model.Subtitle) {
+	for i, entry := range subtitle.Entries {
+		// Look for basic HTML tags in the text and extract them to styling attributes
+		text := entry.Text
+		
+		// Process <b>, <i>, <u>, etc. tags to collect styling information
+		// For simplicity, we'll just note that styling exists, but we won't modify the text
+		if strings.Contains(text, "<") && strings.Contains(text, ">") {
+			entry.FormatData["has_html_tags"] = true
+		}
+		
+		// Update the entry
+		subtitle.Entries[i] = entry
+	}
+}
+
+// Generate generates a WebVTT file from our intermediate Subtitle representation
+func Generate(subtitle model.Subtitle, filePath string) error {
+	file, err := os.Create(filePath)
+	if err != nil {
+		return fmt.Errorf("error creating VTT file: %w", err)
+	}
+	defer file.Close()
+
+	// Write header
+	if subtitle.Title != "" {
+		fmt.Fprintf(file, "%s - %s\n\n", VTTHeader, subtitle.Title)
+	} else {
+		fmt.Fprintf(file, "%s\n\n", VTTHeader)
+	}
+
+	// Write styles if any
+	if cssStyle, ok := subtitle.Styles["css"]; ok && cssStyle != "" {
+		fmt.Fprintln(file, "STYLE")
+		fmt.Fprintln(file, cssStyle)
+		fmt.Fprintln(file)
+	}
+
+	// Write regions if any
+	for _, region := range subtitle.Regions {
+		fmt.Fprintf(file, "REGION %s:", region.ID)
+		for key, value := range region.Settings {
+			fmt.Fprintf(file, " %s=%s", key, value)
+		}
+		fmt.Fprintln(file)
+	}
+
+	// Write comments if any
+	for _, comment := range subtitle.Comments {
+		fmt.Fprintf(file, "NOTE %s\n", comment)
+	}
+	if len(subtitle.Comments) > 0 {
+		fmt.Fprintln(file)
+	}
+
+	// Write cues
+	for i, entry := range subtitle.Entries {
+		// Write identifier if available
+		if identifier, ok := entry.Metadata["identifier"]; ok && identifier != "" {
+			fmt.Fprintln(file, identifier)
+		} else if entry.Index > 0 {
+			fmt.Fprintln(file, entry.Index)
+		} else {
+			fmt.Fprintln(file, i+1)
+		}
+
+		// Write timestamps and settings
+		fmt.Fprintf(file, "%s --> %s", formatVTTTimestamp(entry.StartTime), formatVTTTimestamp(entry.EndTime))
+		
+		// Add cue settings
+		for key, value := range entry.Styles {
+			fmt.Fprintf(file, " %s:%s", key, value)
+		}
+		fmt.Fprintln(file)
+
+		// Write cue text
+		fmt.Fprintln(file, entry.Text)
+		fmt.Fprintln(file)
+	}
+
+	return nil
+}
+
+// formatVTTTimestamp formats a Timestamp struct as a VTT timestamp string
+func formatVTTTimestamp(ts model.Timestamp) string {
+	return fmt.Sprintf("%02d:%02d:%02d.%03d", 
+		ts.Hours, 
+		ts.Minutes, 
+		ts.Seconds, 
+		ts.Milliseconds)
+}
+
+// Format standardizes and formats a VTT file
+func Format(filePath string) error {
+	// Parse the file
+	subtitle, err := Parse(filePath)
+	if err != nil {
+		return fmt.Errorf("error parsing VTT file: %w", err)
+	}
+
+	// Standardize entry numbering
+	for i := range subtitle.Entries {
+		subtitle.Entries[i].Index = i + 1
+	}
+
+	// Write back the formatted content
+	return Generate(subtitle, filePath)
+}
+
+// ConvertToSubtitle converts VTT entries to our intermediate Subtitle structure
+func ConvertToSubtitle(filePath string) (model.Subtitle, error) {
+	return Parse(filePath)
+}
+
+// ConvertFromSubtitle converts our intermediate Subtitle to VTT format
+func ConvertFromSubtitle(subtitle model.Subtitle, filePath string) error {
+	return Generate(subtitle, filePath)
+}
--- a/internal/model/model.go
+++ b/internal/model/model.go
@ -22,3 +22,63 @@ type SRTEntry struct {
 	EndTime   Timestamp
 	Content   string
 }
+
+// SubtitleEntry represents a generic subtitle entry in our intermediate representation
+type SubtitleEntry struct {
+	Index      int                    // Sequential index/number
+	StartTime  Timestamp              // Start time
+	EndTime    Timestamp              // End time
+	Text       string                 // The subtitle text content
+	Styles     map[string]string      // Styling information (e.g., VTT's align, position)
+	Classes    []string               // CSS classes (for VTT)
+	Metadata   map[string]string      // Additional metadata
+	FormatData map[string]interface{} // Format-specific data that doesn't fit elsewhere
+}
+
+// Subtitle represents our intermediate subtitle representation used for conversions
+type Subtitle struct {
+	Title       string                  // Optional title
+	Metadata    map[string]string       // Global metadata (e.g., LRC's ti, ar, al)
+	Entries     []SubtitleEntry         // Subtitle entries
+	Format      string                  // Source format
+	Styles      map[string]string       // Global styles (e.g., VTT STYLE blocks)
+	Comments    []string                // Comments/notes (for VTT)
+	Regions     []SubtitleRegion        // Region definitions (for VTT)
+	FormatData  map[string]interface{}  // Format-specific data that doesn't fit elsewhere
+}
+
+// SubtitleRegion represents a region definition (mainly for VTT)
+type SubtitleRegion struct {
+	ID       string
+	Settings map[string]string
+}
+
+// Creates a new empty Subtitle
+func NewSubtitle() Subtitle {
+	return Subtitle{
+		Metadata:   make(map[string]string),
+		Entries:    []SubtitleEntry{},
+		Styles:     make(map[string]string),
+		Comments:   []string{},
+		Regions:    []SubtitleRegion{},
+		FormatData: make(map[string]interface{}),
+	}
+}
+
+// Creates a new empty SubtitleEntry
+func NewSubtitleEntry() SubtitleEntry {
+	return SubtitleEntry{
+		Styles:     make(map[string]string),
+		Classes:    []string{},
+		Metadata:   make(map[string]string),
+		FormatData: make(map[string]interface{}),
+	}
+}
+
+// Creates a new SubtitleRegion
+func NewSubtitleRegion(id string) SubtitleRegion {
+	return SubtitleRegion{
+		ID:       id,
+		Settings: make(map[string]string),
+	}
+}