package vtt import ( "bufio" "fmt" "os" "regexp" "strconv" "strings" "time" "sub-cli/internal/model" ) // Constants for VTT format const ( VTTHeader = "WEBVTT" ) // Parse parses a WebVTT file into our intermediate Subtitle representation func Parse(filePath string) (model.Subtitle, error) { subtitle := model.NewSubtitle() subtitle.Format = "vtt" // Ensure maps are initialized if subtitle.Styles == nil { subtitle.Styles = make(map[string]string) } file, err := os.Open(filePath) if err != nil { return subtitle, fmt.Errorf("error opening VTT file: %w", err) } defer file.Close() scanner := bufio.NewScanner(file) // First line must be WEBVTT if !scanner.Scan() { return subtitle, fmt.Errorf("empty VTT file") } header := scanner.Text() if !strings.HasPrefix(header, VTTHeader) { return subtitle, fmt.Errorf("invalid VTT file, missing WEBVTT header") } // Get metadata from header if strings.Contains(header, " - ") { subtitle.Title = strings.TrimSpace(strings.TrimPrefix(header, VTTHeader+" - ")) } // Process file content var currentEntry model.SubtitleEntry var inCue bool var inStyle bool var styleBuffer strings.Builder var cueTextBuffer strings.Builder lineNum := 0 prevLine := "" for scanner.Scan() { lineNum++ line := scanner.Text() // Check for style blocks if strings.HasPrefix(line, "STYLE") { inStyle = true continue } if inStyle { if strings.TrimSpace(line) == "" { inStyle = false subtitle.Styles["css"] = styleBuffer.String() styleBuffer.Reset() } else { styleBuffer.WriteString(line) styleBuffer.WriteString("\n") } continue } // Skip empty lines, but handle end of cue if strings.TrimSpace(line) == "" { if inCue && cueTextBuffer.Len() > 0 { // End of a cue currentEntry.Text = strings.TrimSpace(cueTextBuffer.String()) subtitle.Entries = append(subtitle.Entries, currentEntry) inCue = false cueTextBuffer.Reset() currentEntry = model.SubtitleEntry{} // Reset to zero value } continue } // Check for NOTE comments if strings.HasPrefix(line, "NOTE") { comment := strings.TrimSpace(strings.TrimPrefix(line, "NOTE")) subtitle.Comments = append(subtitle.Comments, comment) continue } // Check for REGION definitions if strings.HasPrefix(line, "REGION") { // Process region definitions if needed continue } // Check for cue timing line if strings.Contains(line, " --> ") { inCue = true // If we already have a populated currentEntry, save it if currentEntry.Text != "" { subtitle.Entries = append(subtitle.Entries, currentEntry) cueTextBuffer.Reset() } // Start a new entry currentEntry = model.NewSubtitleEntry() // Use the previous line as cue identifier if it's a number if prevLine != "" && !inCue { if index, err := strconv.Atoi(strings.TrimSpace(prevLine)); err == nil { currentEntry.Index = index } } // Parse timestamps timestamps := strings.Split(line, " --> ") if len(timestamps) != 2 { return subtitle, fmt.Errorf("invalid timestamp format at line %d: %s", lineNum, line) } startTimeStr := strings.TrimSpace(timestamps[0]) endTimeAndSettings := strings.TrimSpace(timestamps[1]) // Extract cue settings if any endTimeStr := endTimeAndSettings settings := "" if spaceIndex := strings.IndexByte(endTimeAndSettings, ' '); spaceIndex > 0 { endTimeStr = endTimeAndSettings[:spaceIndex] settings = endTimeAndSettings[spaceIndex+1:] } // Set timestamps currentEntry.StartTime = parseVTTTimestamp(startTimeStr) currentEntry.EndTime = parseVTTTimestamp(endTimeStr) // Initialize the styles map currentEntry.Styles = make(map[string]string) currentEntry.FormatData = make(map[string]interface{}) // Parse cue settings if settings != "" { settingPairs := strings.Split(settings, " ") for _, pair := range settingPairs { if pair == "" { continue } if strings.Contains(pair, ":") { parts := strings.Split(pair, ":") if len(parts) == 2 { currentEntry.Styles[strings.TrimSpace(parts[0])] = strings.TrimSpace(parts[1]) } } else { // Handle non-key-value settings if any currentEntry.FormatData["setting_"+pair] = true } } } cueTextBuffer.Reset() continue } // If we're in a cue, add the line to the text buffer if inCue { if cueTextBuffer.Len() > 0 { cueTextBuffer.WriteString("\n") } cueTextBuffer.WriteString(line) } prevLine = line } // Don't forget the last entry if inCue && cueTextBuffer.Len() > 0 { currentEntry.Text = strings.TrimSpace(cueTextBuffer.String()) subtitle.Entries = append(subtitle.Entries, currentEntry) } // Ensure all entries have sequential indices if they don't already for i := range subtitle.Entries { if subtitle.Entries[i].Index == 0 { subtitle.Entries[i].Index = i + 1 } // Ensure styles map is initialized for all entries if subtitle.Entries[i].Styles == nil { subtitle.Entries[i].Styles = make(map[string]string) } // Ensure formatData map is initialized for all entries if subtitle.Entries[i].FormatData == nil { subtitle.Entries[i].FormatData = make(map[string]interface{}) } } if err := scanner.Err(); err != nil { return subtitle, fmt.Errorf("error reading VTT file: %w", err) } // Process cue text to extract styling processVTTCueTextStyling(&subtitle) return subtitle, nil } // parseVTTTimestamp parses a VTT timestamp string into our Timestamp model func parseVTTTimestamp(timeStr string) model.Timestamp { // VTT timestamps format: 00:00:00.000 re := regexp.MustCompile(`(\d+):(\d+):(\d+)\.(\d+)|\d+:(\d+)\.(\d+)`) matches := re.FindStringSubmatch(timeStr) var hours, minutes, seconds, milliseconds int if len(matches) >= 5 && matches[1] != "" { // Full format: 00:00:00.000 hours, _ = strconv.Atoi(matches[1]) minutes, _ = strconv.Atoi(matches[2]) seconds, _ = strconv.Atoi(matches[3]) msStr := matches[4] // Ensure milliseconds are treated correctly switch len(msStr) { case 1: milliseconds, _ = strconv.Atoi(msStr + "00") case 2: milliseconds, _ = strconv.Atoi(msStr + "0") case 3: milliseconds, _ = strconv.Atoi(msStr) default: if len(msStr) > 3 { milliseconds, _ = strconv.Atoi(msStr[:3]) } } } else if len(matches) >= 7 && matches[5] != "" { // Short format: 00:00.000 minutes, _ = strconv.Atoi(matches[5]) seconds, _ = strconv.Atoi(matches[6]) msStr := matches[7] // Ensure milliseconds are treated correctly switch len(msStr) { case 1: milliseconds, _ = strconv.Atoi(msStr + "00") case 2: milliseconds, _ = strconv.Atoi(msStr + "0") case 3: milliseconds, _ = strconv.Atoi(msStr) default: if len(msStr) > 3 { milliseconds, _ = strconv.Atoi(msStr[:3]) } } } else { // Try another approach with time.Parse layout := "15:04:05.000" t, err := time.Parse(layout, timeStr) if err == nil { hours = t.Hour() minutes = t.Minute() seconds = t.Second() milliseconds = t.Nanosecond() / 1000000 } } return model.Timestamp{ Hours: hours, Minutes: minutes, Seconds: seconds, Milliseconds: milliseconds, } } // processVTTCueTextStyling processes the cue text to extract styling tags func processVTTCueTextStyling(subtitle *model.Subtitle) { for i, entry := range subtitle.Entries { // Look for basic HTML tags in the text and extract them to styling attributes text := entry.Text // Process , , , etc. tags to collect styling information // For simplicity, we'll just note that styling exists, but we won't modify the text if strings.Contains(text, "<") && strings.Contains(text, ">") { entry.FormatData["has_html_tags"] = true } // Update the entry subtitle.Entries[i] = entry } } // Generate generates a WebVTT file from our intermediate Subtitle representation func Generate(subtitle model.Subtitle, filePath string) error { file, err := os.Create(filePath) if err != nil { return fmt.Errorf("error creating VTT file: %w", err) } defer file.Close() // Write header if subtitle.Title != "" { fmt.Fprintf(file, "%s - %s\n\n", VTTHeader, subtitle.Title) } else { fmt.Fprintf(file, "%s\n\n", VTTHeader) } // Write styles if any if cssStyle, ok := subtitle.Styles["css"]; ok && cssStyle != "" { fmt.Fprintln(file, "STYLE") fmt.Fprintln(file, cssStyle) fmt.Fprintln(file) } // Write regions if any for _, region := range subtitle.Regions { fmt.Fprintf(file, "REGION %s:", region.ID) for key, value := range region.Settings { fmt.Fprintf(file, " %s=%s", key, value) } fmt.Fprintln(file) } // Write comments if any for _, comment := range subtitle.Comments { fmt.Fprintf(file, "NOTE %s\n", comment) } if len(subtitle.Comments) > 0 { fmt.Fprintln(file) } // Write cues for i, entry := range subtitle.Entries { // Write identifier if available if identifier, ok := entry.Metadata["identifier"]; ok && identifier != "" { fmt.Fprintln(file, identifier) } else if entry.Index > 0 { fmt.Fprintln(file, entry.Index) } else { fmt.Fprintln(file, i+1) } // Write timestamps and settings fmt.Fprintf(file, "%s --> %s", formatVTTTimestamp(entry.StartTime), formatVTTTimestamp(entry.EndTime)) // Add cue settings for key, value := range entry.Styles { fmt.Fprintf(file, " %s:%s", key, value) } fmt.Fprintln(file) // Write cue text fmt.Fprintln(file, entry.Text) fmt.Fprintln(file) } return nil } // formatVTTTimestamp formats a Timestamp struct as a VTT timestamp string func formatVTTTimestamp(ts model.Timestamp) string { return fmt.Sprintf("%02d:%02d:%02d.%03d", ts.Hours, ts.Minutes, ts.Seconds, ts.Milliseconds) } // Format standardizes and formats a VTT file func Format(filePath string) error { // Parse the file subtitle, err := Parse(filePath) if err != nil { return fmt.Errorf("error parsing VTT file: %w", err) } // Standardize entry numbering for i := range subtitle.Entries { subtitle.Entries[i].Index = i + 1 } // Write back the formatted content return Generate(subtitle, filePath) } // ConvertToSubtitle converts VTT entries to our intermediate Subtitle structure func ConvertToSubtitle(filePath string) (model.Subtitle, error) { return Parse(filePath) } // ConvertFromSubtitle converts our intermediate Subtitle to VTT format func ConvertFromSubtitle(subtitle model.Subtitle, filePath string) error { return Generate(subtitle, filePath) }