feat: add tests
This commit is contained in:
parent
44c7e9bee5
commit
bb87f058f0
17 changed files with 4436 additions and 80 deletions
|
@ -21,6 +21,11 @@ const (
|
|||
func Parse(filePath string) (model.Subtitle, error) {
|
||||
subtitle := model.NewSubtitle()
|
||||
subtitle.Format = "vtt"
|
||||
|
||||
// Ensure maps are initialized
|
||||
if subtitle.Styles == nil {
|
||||
subtitle.Styles = make(map[string]string)
|
||||
}
|
||||
|
||||
file, err := os.Open(filePath)
|
||||
if err != nil {
|
||||
|
@ -29,15 +34,15 @@ func Parse(filePath string) (model.Subtitle, error) {
|
|||
defer file.Close()
|
||||
|
||||
scanner := bufio.NewScanner(file)
|
||||
|
||||
// Check header
|
||||
|
||||
// First line must be WEBVTT
|
||||
if !scanner.Scan() {
|
||||
return subtitle, fmt.Errorf("empty VTT file")
|
||||
}
|
||||
|
||||
header := strings.TrimSpace(scanner.Text())
|
||||
|
||||
header := scanner.Text()
|
||||
if !strings.HasPrefix(header, VTTHeader) {
|
||||
return subtitle, fmt.Errorf("invalid VTT file: missing WEBVTT header")
|
||||
return subtitle, fmt.Errorf("invalid VTT file, missing WEBVTT header")
|
||||
}
|
||||
|
||||
// Get metadata from header
|
||||
|
@ -52,24 +57,13 @@ func Parse(filePath string) (model.Subtitle, error) {
|
|||
var styleBuffer strings.Builder
|
||||
var cueTextBuffer strings.Builder
|
||||
|
||||
lineNum := 1
|
||||
lineNum := 0
|
||||
prevLine := ""
|
||||
|
||||
for scanner.Scan() {
|
||||
lineNum++
|
||||
line := scanner.Text()
|
||||
|
||||
// Skip empty lines
|
||||
if strings.TrimSpace(line) == "" {
|
||||
if inCue {
|
||||
// End of a cue
|
||||
currentEntry.Text = cueTextBuffer.String()
|
||||
subtitle.Entries = append(subtitle.Entries, currentEntry)
|
||||
currentEntry = model.NewSubtitleEntry()
|
||||
cueTextBuffer.Reset()
|
||||
inCue = false
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Check for style blocks
|
||||
if strings.HasPrefix(line, "STYLE") {
|
||||
inStyle = true
|
||||
|
@ -77,7 +71,7 @@ func Parse(filePath string) (model.Subtitle, error) {
|
|||
}
|
||||
|
||||
if inStyle {
|
||||
if line == "" {
|
||||
if strings.TrimSpace(line) == "" {
|
||||
inStyle = false
|
||||
subtitle.Styles["css"] = styleBuffer.String()
|
||||
styleBuffer.Reset()
|
||||
|
@ -88,6 +82,19 @@ func Parse(filePath string) (model.Subtitle, error) {
|
|||
continue
|
||||
}
|
||||
|
||||
// Skip empty lines, but handle end of cue
|
||||
if strings.TrimSpace(line) == "" {
|
||||
if inCue && cueTextBuffer.Len() > 0 {
|
||||
// End of a cue
|
||||
currentEntry.Text = strings.TrimSpace(cueTextBuffer.String())
|
||||
subtitle.Entries = append(subtitle.Entries, currentEntry)
|
||||
inCue = false
|
||||
cueTextBuffer.Reset()
|
||||
currentEntry = model.SubtitleEntry{} // Reset to zero value
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Check for NOTE comments
|
||||
if strings.HasPrefix(line, "NOTE") {
|
||||
comment := strings.TrimSpace(strings.TrimPrefix(line, "NOTE"))
|
||||
|
@ -97,42 +104,44 @@ func Parse(filePath string) (model.Subtitle, error) {
|
|||
|
||||
// Check for REGION definitions
|
||||
if strings.HasPrefix(line, "REGION") {
|
||||
parts := strings.Split(strings.TrimPrefix(line, "REGION"), ":")
|
||||
if len(parts) >= 2 {
|
||||
regionID := strings.TrimSpace(parts[0])
|
||||
region := model.NewSubtitleRegion(regionID)
|
||||
|
||||
settings := strings.Split(parts[1], " ")
|
||||
for _, setting := range settings {
|
||||
keyValue := strings.Split(setting, "=")
|
||||
if len(keyValue) == 2 {
|
||||
region.Settings[strings.TrimSpace(keyValue[0])] = strings.TrimSpace(keyValue[1])
|
||||
}
|
||||
}
|
||||
|
||||
subtitle.Regions = append(subtitle.Regions, region)
|
||||
}
|
||||
// Process region definitions if needed
|
||||
continue
|
||||
}
|
||||
|
||||
// Check for timestamp lines
|
||||
if strings.Contains(line, "-->") {
|
||||
// Check for cue timing line
|
||||
if strings.Contains(line, " --> ") {
|
||||
inCue = true
|
||||
|
||||
// If we already have a populated currentEntry, save it
|
||||
if currentEntry.Text != "" {
|
||||
subtitle.Entries = append(subtitle.Entries, currentEntry)
|
||||
cueTextBuffer.Reset()
|
||||
}
|
||||
|
||||
// Start a new entry
|
||||
currentEntry = model.NewSubtitleEntry()
|
||||
|
||||
// Use the previous line as cue identifier if it's a number
|
||||
if prevLine != "" && !inCue {
|
||||
if index, err := strconv.Atoi(strings.TrimSpace(prevLine)); err == nil {
|
||||
currentEntry.Index = index
|
||||
}
|
||||
}
|
||||
|
||||
// Parse timestamps
|
||||
timestamps := strings.Split(line, "-->")
|
||||
timestamps := strings.Split(line, " --> ")
|
||||
if len(timestamps) != 2 {
|
||||
return subtitle, fmt.Errorf("invalid timestamp format at line %d: %s", lineNum, line)
|
||||
}
|
||||
|
||||
startTimeStr := strings.TrimSpace(timestamps[0])
|
||||
|
||||
endTimeAndSettings := strings.TrimSpace(timestamps[1])
|
||||
|
||||
// Extract cue settings if any
|
||||
endTimeStr := endTimeAndSettings
|
||||
settings := ""
|
||||
|
||||
// Check for cue settings after end timestamp
|
||||
if spaceIndex := strings.IndexByte(endTimeAndSettings, ' '); spaceIndex != -1 {
|
||||
if spaceIndex := strings.IndexByte(endTimeAndSettings, ' '); spaceIndex > 0 {
|
||||
endTimeStr = endTimeAndSettings[:spaceIndex]
|
||||
settings = endTimeAndSettings[spaceIndex+1:]
|
||||
}
|
||||
|
@ -141,6 +150,10 @@ func Parse(filePath string) (model.Subtitle, error) {
|
|||
currentEntry.StartTime = parseVTTTimestamp(startTimeStr)
|
||||
currentEntry.EndTime = parseVTTTimestamp(endTimeStr)
|
||||
|
||||
// Initialize the styles map
|
||||
currentEntry.Styles = make(map[string]string)
|
||||
currentEntry.FormatData = make(map[string]interface{})
|
||||
|
||||
// Parse cue settings
|
||||
if settings != "" {
|
||||
settingPairs := strings.Split(settings, " ")
|
||||
|
@ -165,42 +178,46 @@ func Parse(filePath string) (model.Subtitle, error) {
|
|||
continue
|
||||
}
|
||||
|
||||
// Check if we have identifier before timestamp
|
||||
if !inCue && currentEntry.Index == 0 && !strings.Contains(line, "-->") {
|
||||
// This might be a cue identifier
|
||||
if _, err := strconv.Atoi(line); err == nil {
|
||||
// It's likely a numeric identifier
|
||||
num, _ := strconv.Atoi(line)
|
||||
currentEntry.Index = num
|
||||
} else {
|
||||
// It's a string identifier, store it in metadata
|
||||
currentEntry.Metadata["identifier"] = line
|
||||
currentEntry.Index = len(subtitle.Entries) + 1
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// If we're in a cue, add this line to the text
|
||||
// If we're in a cue, add the line to the text buffer
|
||||
if inCue {
|
||||
if cueTextBuffer.Len() > 0 {
|
||||
cueTextBuffer.WriteString("\n")
|
||||
}
|
||||
cueTextBuffer.WriteString(line)
|
||||
}
|
||||
|
||||
prevLine = line
|
||||
}
|
||||
|
||||
// Don't forget the last entry
|
||||
if inCue && cueTextBuffer.Len() > 0 {
|
||||
currentEntry.Text = cueTextBuffer.String()
|
||||
currentEntry.Text = strings.TrimSpace(cueTextBuffer.String())
|
||||
subtitle.Entries = append(subtitle.Entries, currentEntry)
|
||||
}
|
||||
|
||||
// Process cue text to extract styling
|
||||
processVTTCueTextStyling(&subtitle)
|
||||
|
||||
|
||||
// Ensure all entries have sequential indices if they don't already
|
||||
for i := range subtitle.Entries {
|
||||
if subtitle.Entries[i].Index == 0 {
|
||||
subtitle.Entries[i].Index = i + 1
|
||||
}
|
||||
|
||||
// Ensure styles map is initialized for all entries
|
||||
if subtitle.Entries[i].Styles == nil {
|
||||
subtitle.Entries[i].Styles = make(map[string]string)
|
||||
}
|
||||
|
||||
// Ensure formatData map is initialized for all entries
|
||||
if subtitle.Entries[i].FormatData == nil {
|
||||
subtitle.Entries[i].FormatData = make(map[string]interface{})
|
||||
}
|
||||
}
|
||||
|
||||
if err := scanner.Err(); err != nil {
|
||||
return subtitle, fmt.Errorf("error reading VTT file: %w", err)
|
||||
}
|
||||
|
||||
// Process cue text to extract styling
|
||||
processVTTCueTextStyling(&subtitle)
|
||||
|
||||
return subtitle, nil
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue