symphonia_core/
formats.rs

1// Symphonia
2// Copyright (c) 2019-2022 The Project Symphonia Developers.
3//
4// This Source Code Form is subject to the terms of the Mozilla Public
5// License, v. 2.0. If a copy of the MPL was not distributed with this
6// file, You can obtain one at https://mozilla.org/MPL/2.0/.
7
8//! The `format` module provides the traits and support structures necessary to implement media
9//! demuxers.
10
11use crate::codecs::CodecParameters;
12use crate::errors::Result;
13use crate::io::{BufReader, MediaSourceStream};
14use crate::meta::{Metadata, Tag};
15use crate::units::{Time, TimeStamp};
16
17pub mod prelude {
18    //! The `formats` module prelude.
19
20    pub use crate::units::{Duration, TimeBase, TimeStamp};
21
22    pub use super::{Cue, FormatOptions, FormatReader, Packet, SeekMode, SeekTo, SeekedTo, Track};
23}
24
25/// `SeekTo` specifies a position to seek to.
26pub enum SeekTo {
27    /// Seek to a `Time` in regular time units.
28    Time {
29        /// The `Time` to seek to.
30        time: Time,
31        /// If `Some`, specifies which track's timestamp should be returned after the seek. If
32        /// `None`, then the default track's timestamp is returned. If the container does not have
33        /// a default track, then the first track's timestamp is returned.
34        track_id: Option<u32>,
35    },
36    /// Seek to a track's `TimeStamp` in that track's timebase units.
37    TimeStamp {
38        /// The `TimeStamp` to seek to.
39        ts: TimeStamp,
40        /// Specifies which track `ts` is relative to.
41        track_id: u32,
42    },
43}
44
45/// `SeekedTo` is the result of a seek.
46#[derive(Copy, Clone, Debug)]
47pub struct SeekedTo {
48    /// The track the seek was relative to.
49    pub track_id: u32,
50    /// The `TimeStamp` required for the requested seek.
51    pub required_ts: TimeStamp,
52    /// The `TimeStamp` that was seeked to.
53    pub actual_ts: TimeStamp,
54}
55
56/// `SeekMode` selects the precision of a seek.
57#[derive(Copy, Clone, Debug, Eq, PartialEq)]
58pub enum SeekMode {
59    /// Coarse seek mode is a best-effort attempt to seek to the requested position. The actual
60    /// position seeked to may be before or after the requested position. Coarse seeking is an
61    /// optional performance enhancement. If a `FormatReader` does not support this mode an
62    /// accurate seek will be performed instead.
63    Coarse,
64    /// Accurate (aka sample-accurate) seek mode will be always seek to a position before the
65    /// requested position.
66    Accurate,
67}
68
69/// `FormatOptions` is a common set of options that all demuxers use.
70#[derive(Copy, Clone, Debug)]
71pub struct FormatOptions {
72    /// If a `FormatReader` requires a seek index, but the container does not provide one, build the
73    /// seek index during instantiation instead of building it progressively. Default: `false`.
74    pub prebuild_seek_index: bool,
75    /// If a seek index needs to be built, this value determines how often in seconds of decoded
76    /// content an entry is added to the index. Default: `20`.
77    ///
78    /// Note: This is a CPU vs. memory trade-off. A high value will increase the amount of IO
79    /// required during a seek, whereas a low value will require more memory. The default chosen is
80    /// a good compromise for casual playback of music, podcasts, movies, etc. However, for
81    /// highly-interactive applications, this value should be decreased.
82    pub seek_index_fill_rate: u16,
83    /// Enable support for gapless playback. Default: `false`.
84    ///
85    /// When enabled, the reader will provide trim information in packets that may be used by
86    /// decoders to trim any encoder delay or padding.
87    ///
88    /// When enabled, this option will also alter the value and interpretation of timestamps and
89    /// durations such that they are relative to the non-trimmed region.
90    pub enable_gapless: bool,
91}
92
93impl Default for FormatOptions {
94    fn default() -> Self {
95        FormatOptions {
96            prebuild_seek_index: false,
97            seek_index_fill_rate: 20,
98            enable_gapless: false,
99        }
100    }
101}
102
103/// A `Cue` is a designated point of time within a media stream.
104///
105/// A `Cue` may be a mapping from either a source track, a chapter, cuesheet, or a timestamp
106/// depending on the source media. A `Cue`'s duration is the difference between the `Cue`'s
107/// timestamp and the next. Each `Cue` may contain an optional index of points relative to the `Cue`
108/// that never exceed the timestamp of the next `Cue`. A `Cue` may also have associated `Tag`s.
109#[derive(Clone, Debug)]
110pub struct Cue {
111    /// A unique index for the `Cue`.
112    pub index: u32,
113    /// The starting timestamp in number of frames from the start of the stream.
114    pub start_ts: u64,
115    /// A list of `Tag`s associated with the `Cue`.
116    pub tags: Vec<Tag>,
117    /// A list of `CuePoints`s that are contained within this `Cue`. These points are children of
118    /// the `Cue` since the `Cue` itself is an implicit `CuePoint`.
119    pub points: Vec<CuePoint>,
120}
121
122/// A `CuePoint` is a point, represented as a frame offset, within a `Cue`.
123///
124/// A `CuePoint` provides more precise indexing within a parent `Cue`. Additional `Tag`s may be
125/// associated with a `CuePoint`.
126#[derive(Clone, Debug)]
127pub struct CuePoint {
128    /// The offset of the first frame in the `CuePoint` relative to the start of the parent `Cue`.
129    pub start_offset_ts: u64,
130    /// A list of `Tag`s associated with the `CuePoint`.
131    pub tags: Vec<Tag>,
132}
133
134/// A `Track` is an independently coded media bitstream. A media format may contain multiple tracks
135/// in one container. Each of those tracks are represented by one `Track`.
136#[derive(Clone, Debug)]
137pub struct Track {
138    /// A unique identifier for the track.
139    pub id: u32,
140    /// The codec parameters for the track.
141    pub codec_params: CodecParameters,
142    /// The language of the track. May be unknown.
143    pub language: Option<String>,
144}
145
146impl Track {
147    pub fn new(id: u32, codec_params: CodecParameters) -> Self {
148        Track { id, codec_params, language: None }
149    }
150}
151
152/// A `FormatReader` is a container demuxer. It provides methods to probe a media container for
153/// information and access the tracks encapsulated in the container.
154///
155/// Most, if not all, media containers contain metadata, then a number of packetized, and
156/// interleaved codec bitstreams. These bitstreams are usually referred to as tracks. Generally,
157/// the encapsulated bitstreams are independently encoded using some codec. The allowed codecs for a
158/// container are defined in the specification of the container format.
159///
160/// While demuxing, packets are read one-by-one and may be discarded or decoded at the choice of
161/// the caller. The contents of a packet is undefined: it may be a frame of video, a millisecond
162/// of audio, or a subtitle, but a packet will never contain data from two different bitstreams.
163/// Therefore the caller can be selective in what tracks(s) should be decoded and consumed.
164///
165/// `FormatReader` provides an Iterator-like interface over packets for easy consumption and
166/// filtering. Seeking will invalidate the state of any `Decoder` processing packets from the
167/// `FormatReader` and should be reset after a successful seek operation.
168pub trait FormatReader: Send + Sync {
169    /// Attempt to instantiate a `FormatReader` using the provided `FormatOptions` and
170    /// `MediaSourceStream`. The reader will probe the container to verify format support, determine
171    /// the number of tracks, and read any initial metadata.
172    fn try_new(source: MediaSourceStream, options: &FormatOptions) -> Result<Self>
173    where
174        Self: Sized;
175
176    /// Gets a list of all `Cue`s.
177    fn cues(&self) -> &[Cue];
178
179    /// Gets the metadata revision log.
180    fn metadata(&mut self) -> Metadata<'_>;
181
182    /// Seek, as precisely as possible depending on the mode, to the `Time` or track `TimeStamp`
183    /// requested. Returns the requested and actual `TimeStamps` seeked to, as well as the `Track`.
184    ///
185    /// After a seek, all `Decoder`s consuming packets from this reader should be reset.
186    ///
187    /// Note: The `FormatReader` by itself cannot seek to an exact audio frame, it is only capable
188    /// of seeking to the nearest `Packet`. Therefore, to seek to an exact frame, a `Decoder` must
189    /// decode packets until the requested position is reached. When using the accurate `SeekMode`,
190    /// the seeked position will always be before the requested position. If the coarse `SeekMode`
191    /// is used, then the seek position may be after the requested position. Coarse seeking is an
192    /// optional performance enhancement, therefore, a coarse seek may sometimes be an accurate
193    /// seek.
194    fn seek(&mut self, mode: SeekMode, to: SeekTo) -> Result<SeekedTo>;
195
196    /// Gets a list of tracks in the container.
197    fn tracks(&self) -> &[Track];
198
199    /// Gets the default track. If the `FormatReader` has a method of determining the default track,
200    /// this function should return it. Otherwise, the first track is returned. If no tracks are
201    /// present then `None` is returned.
202    fn default_track(&self) -> Option<&Track> {
203        self.tracks().first()
204    }
205
206    /// Get the next packet from the container.
207    ///
208    /// If `ResetRequired` is returned, then the track list must be re-examined and all `Decoder`s
209    /// re-created. All other errors are unrecoverable.
210    fn next_packet(&mut self) -> Result<Packet>;
211
212    /// Destroys the `FormatReader` and returns the underlying media source stream
213    fn into_inner(self: Box<Self>) -> MediaSourceStream;
214}
215
216/// A `Packet` contains a discrete amount of encoded data for a single codec bitstream. The exact
217/// amount of data is bounded, but not defined, and is dependant on the container and/or the
218/// encapsulated codec.
219#[derive(Clone)]
220pub struct Packet {
221    /// The track id.
222    track_id: u32,
223    /// The timestamp of the packet. When gapless support is enabled, this timestamp is relative to
224    /// the end of the encoder delay.
225    ///
226    /// This timestamp is in `TimeBase` units.
227    pub ts: u64,
228    /// The duration of the packet. When gapless support is enabled, the duration does not include
229    /// the encoder delay or padding.
230    ///
231    /// The duration is in `TimeBase` units.
232    pub dur: u64,
233    /// When gapless support is enabled, this is the number of decoded frames that should be trimmed
234    /// from the start of the packet to remove the encoder delay. Must be 0 in all other cases.
235    pub trim_start: u32,
236    /// When gapless support is enabled, this is the number of decoded frames that should be trimmed
237    /// from the end of the packet to remove the encoder padding. Must be 0 in all other cases.
238    pub trim_end: u32,
239    /// The packet buffer.
240    pub data: Box<[u8]>,
241}
242
243impl Packet {
244    /// Create a new `Packet` from a slice.
245    pub fn new_from_slice(track_id: u32, ts: u64, dur: u64, buf: &[u8]) -> Self {
246        Packet { track_id, ts, dur, trim_start: 0, trim_end: 0, data: Box::from(buf) }
247    }
248
249    /// Create a new `Packet` from a boxed slice.
250    pub fn new_from_boxed_slice(track_id: u32, ts: u64, dur: u64, data: Box<[u8]>) -> Self {
251        Packet { track_id, ts, dur, trim_start: 0, trim_end: 0, data }
252    }
253
254    /// Create a new `Packet` with trimming information from a slice.
255    pub fn new_trimmed_from_slice(
256        track_id: u32,
257        ts: u64,
258        dur: u64,
259        trim_start: u32,
260        trim_end: u32,
261        buf: &[u8],
262    ) -> Self {
263        Packet { track_id, ts, dur, trim_start, trim_end, data: Box::from(buf) }
264    }
265
266    /// Create a new `Packet` with trimming information from a boxed slice.
267    pub fn new_trimmed_from_boxed_slice(
268        track_id: u32,
269        ts: u64,
270        dur: u64,
271        trim_start: u32,
272        trim_end: u32,
273        data: Box<[u8]>,
274    ) -> Self {
275        Packet { track_id, ts, dur, trim_start, trim_end, data }
276    }
277
278    /// The track identifier of the track this packet belongs to.
279    pub fn track_id(&self) -> u32 {
280        self.track_id
281    }
282
283    /// Get the timestamp of the packet in `TimeBase` units.
284    ///
285    /// If gapless support is enabled, then this timestamp is relative to the end of the encoder
286    /// delay.
287    pub fn ts(&self) -> u64 {
288        self.ts
289    }
290
291    /// Get the duration of the packet in `TimeBase` units.
292    ///
293    /// If gapless support is enabled, then this is the duration after the encoder delay and padding
294    /// is trimmed.
295    pub fn dur(&self) -> u64 {
296        self.dur
297    }
298
299    /// Get the duration of the packet in `TimeBase` units if no decoded frames are trimmed.
300    ///
301    /// If gapless support is disabled, then this is the same as the duration.
302    pub fn block_dur(&self) -> u64 {
303        self.dur + u64::from(self.trim_start) + u64::from(self.trim_end)
304    }
305
306    /// Get the number of frames to trim from the start of the decoded packet.
307    pub fn trim_start(&self) -> u32 {
308        self.trim_start
309    }
310
311    /// Get the number of frames to trim from the end of the decoded packet.
312    pub fn trim_end(&self) -> u32 {
313        self.trim_end
314    }
315
316    /// Get an immutable slice to the packet buffer.
317    pub fn buf(&self) -> &[u8] {
318        &self.data
319    }
320
321    /// Get a `BufStream` to read the packet data buffer sequentially.
322    pub fn as_buf_reader(&self) -> BufReader<'_> {
323        BufReader::new(&self.data)
324    }
325}
326
327pub mod util {
328    //! Helper utilities for implementing `FormatReader`s.
329
330    use super::Packet;
331
332    /// A `SeekPoint` is a mapping between a sample or frame number to byte offset within a media
333    /// stream.
334    #[derive(Copy, Clone, Debug, PartialEq, Eq)]
335    pub struct SeekPoint {
336        /// The frame or sample timestamp of the `SeekPoint`.
337        pub frame_ts: u64,
338        /// The byte offset of the `SeekPoint`s timestamp relative to a format-specific location.
339        pub byte_offset: u64,
340        /// The number of frames the `SeekPoint` covers.
341        pub n_frames: u32,
342    }
343
344    impl SeekPoint {
345        fn new(frame_ts: u64, byte_offset: u64, n_frames: u32) -> Self {
346            SeekPoint { frame_ts, byte_offset, n_frames }
347        }
348    }
349
350    /// A `SeekIndex` stores `SeekPoint`s (generally a sample or frame number to byte offset) within
351    /// a media stream and provides methods to efficiently search for the nearest `SeekPoint`(s)
352    /// given a timestamp.
353    ///
354    /// A `SeekIndex` does not require complete coverage of the entire media stream. However, the
355    /// better the coverage, the smaller the manual search range the `SeekIndex` will return.
356    #[derive(Default)]
357    pub struct SeekIndex {
358        points: Vec<SeekPoint>,
359    }
360
361    /// `SeekSearchResult` is the return value for a search on a `SeekIndex`. It returns a range of
362    /// `SeekPoint`s a `FormatReader` should search to find the desired timestamp. Ranges are
363    /// lower-bound inclusive, and upper-bound exclusive.
364    #[derive(Copy, Clone, Debug, PartialEq, Eq)]
365    pub enum SeekSearchResult {
366        /// The `SeekIndex` is empty so the desired timestamp could not be found. The entire stream
367        /// should be searched for the desired timestamp.
368        Stream,
369        /// The desired timestamp can be found before, the `SeekPoint`. The stream should be
370        /// searched for the desired timestamp from the start of the stream up-to, but not
371        /// including, the `SeekPoint`.
372        Upper(SeekPoint),
373        /// The desired timestamp can be found at, or after, the `SeekPoint`. The stream should be
374        /// searched for the desired timestamp starting at the provided `SeekPoint` up-to the end of
375        /// the stream.
376        Lower(SeekPoint),
377        /// The desired timestamp can be found within the range. The stream should be searched for
378        /// the desired starting at the first `SeekPoint` up-to, but not-including, the second
379        /// `SeekPoint`.
380        Range(SeekPoint, SeekPoint),
381    }
382
383    impl SeekIndex {
384        /// Create an empty `SeekIndex`
385        pub fn new() -> SeekIndex {
386            SeekIndex { points: Vec::new() }
387        }
388
389        /// Insert a `SeekPoint` into the index.
390        pub fn insert(&mut self, ts: u64, byte_offset: u64, n_frames: u32) {
391            // Create the seek point.
392            let seek_point = SeekPoint::new(ts, byte_offset, n_frames);
393
394            // Get the timestamp of the last entry in the index.
395            let (last_ts, last_offset) =
396                self.points.last().map_or((u64::MAX, u64::MAX), |p| (p.frame_ts, p.byte_offset));
397
398            // If the seek point has a timestamp greater-than and byte offset greater-than or equal to
399            // the last entry in the index, then simply append it to the index.
400            if ts > last_ts && byte_offset >= last_offset {
401                self.points.push(seek_point)
402            }
403            else if ts < last_ts {
404                // If the seek point has a timestamp less-than the last entry in the index, then the
405                // insertion point must be found. This case should rarely occur.
406                let i = self
407                    .points
408                    .partition_point(|p| ts > p.frame_ts && byte_offset >= p.byte_offset);
409
410                // Insert if the point found or if the points are empty
411                if i < self.points.len() || i == 0 {
412                    self.points.insert(i, seek_point);
413                }
414            }
415        }
416
417        /// Search the index to find a bounded range of bytes wherein the specified frame timestamp
418        /// will be contained. If the index is empty, this function simply returns a result
419        /// indicating the entire stream should be searched manually.
420        pub fn search(&self, frame_ts: u64) -> SeekSearchResult {
421            // The index must contain atleast one SeekPoint to return a useful result.
422            if !self.points.is_empty() {
423                let mut lower = 0;
424                let mut upper = self.points.len() - 1;
425
426                // If the desired timestamp is less than the first SeekPoint within the index,
427                // indicate that the stream should be searched from the beginning.
428                if frame_ts < self.points[lower].frame_ts {
429                    return SeekSearchResult::Upper(self.points[lower]);
430                }
431                // If the desired timestamp is greater than or equal to the last SeekPoint within
432                // the index, indicate that the stream should be searched from the last SeekPoint.
433                else if frame_ts >= self.points[upper].frame_ts {
434                    return SeekSearchResult::Lower(self.points[upper]);
435                }
436
437                // Desired timestamp is between the lower and upper indicies. Perform a binary
438                // search to find a range of SeekPoints containing the desired timestamp. The binary
439                // search exits when either two adjacent SeekPoints or a single SeekPoint is found.
440                while upper - lower > 1 {
441                    let mid = (lower + upper) / 2;
442                    let mid_ts = self.points[mid].frame_ts;
443
444                    if frame_ts < mid_ts {
445                        upper = mid;
446                    }
447                    else {
448                        lower = mid;
449                    }
450                }
451
452                return SeekSearchResult::Range(self.points[lower], self.points[upper]);
453            }
454
455            // The index is empty, the stream must be searched manually.
456            SeekSearchResult::Stream
457        }
458    }
459
460    /// Given a `Packet`, the encoder delay in frames, and the number of non-delay or padding
461    /// frames, adjust the packet's timestamp and duration, and populate the trim information.
462    pub fn trim_packet(packet: &mut Packet, delay: u32, num_frames: Option<u64>) {
463        packet.trim_start = if packet.ts < u64::from(delay) {
464            let trim = (u64::from(delay) - packet.ts).min(packet.dur);
465            packet.ts = 0;
466            packet.dur -= trim;
467            trim as u32
468        }
469        else {
470            packet.ts -= u64::from(delay);
471            0
472        };
473
474        if let Some(num_frames) = num_frames {
475            packet.trim_end = if packet.ts + packet.dur > num_frames {
476                let trim = (packet.ts + packet.dur - num_frames).min(packet.dur);
477                packet.dur -= trim;
478                trim as u32
479            }
480            else {
481                0
482            };
483        }
484    }
485
486    #[cfg(test)]
487    mod tests {
488        use super::{SeekIndex, SeekPoint, SeekSearchResult};
489
490        #[test]
491        fn verify_seek_index_search() {
492            let mut index = SeekIndex::new();
493            // Normal index insert
494            index.insert(479232, 706812, 1152);
495            index.insert(959616, 1421536, 1152);
496            index.insert(1919232, 2833241, 1152);
497            index.insert(2399616, 3546987, 1152);
498            index.insert(2880000, 4259455, 1152);
499
500            // Search for point lower than the first entry
501            assert_eq!(
502                index.search(0),
503                SeekSearchResult::Upper(SeekPoint::new(479232, 706812, 1152))
504            );
505
506            // Search for point higher than last entry
507            assert_eq!(
508                index.search(3000000),
509                SeekSearchResult::Lower(SeekPoint::new(2880000, 4259455, 1152))
510            );
511
512            // Search for point that has equal timestamp with some index
513            assert_eq!(
514                index.search(959616),
515                SeekSearchResult::Range(
516                    SeekPoint::new(959616, 1421536, 1152),
517                    SeekPoint::new(1919232, 2833241, 1152)
518                )
519            );
520
521            // Index insert out of order
522            index.insert(1440000, 2132419, 1152);
523
524            // Search for point that have out of order index when inserting
525            assert_eq!(
526                index.search(1000000),
527                SeekSearchResult::Range(
528                    SeekPoint::new(959616, 1421536, 1152),
529                    SeekPoint::new(1440000, 2132419, 1152)
530                )
531            );
532
533            // Index insert with byte_offset less than last entry
534            index.insert(3359232, 0, 0);
535
536            // Search for ignored point because byte_offset less than last entry
537            assert_eq!(
538                index.search(3359232),
539                SeekSearchResult::Lower(SeekPoint::new(2880000, 4259455, 1152))
540            );
541        }
542    }
543}