#if 0
# -----------------------------------------------------------------------
# mkvinfo.py - Matroska Streaming Video Files
# -----------------------------------------------------------------------
# $Id: mkvinfo.py 309 2004-04-18 17:55:26Z dischi $
#
# $Log$
# Revision 1.3 2004/04/18 17:55:26 dischi
# update, including subtitle support
#
# Revision 1.2 2004/03/21 08:57:31 dischi
# major bugfix
#
# Revision 1.1 2004/01/31 12:24:15 dischi
# add basic matroska info
#
# -----------------------------------------------------------------------
# MMPython - Media Metadata for Python
# Copyright (C) 2003 Thomas Schueppel, Dirk Meyer
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MER-
# CHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# -----------------------------------------------------------------------
#endif
from mmpython import mediainfo
import mmpython
import struct
import re
import stat
import os
import math
from types import *
from struct import *
from string import *
_print = mediainfo._debug
# Main IDs for the Matroska streams
MATROSKA_VIDEO_TRACK = 0x01
MATROSKA_AUDIO_TRACK = 0x02
MATROSKA_SUBTITLES_TRACK = 0x11
MATROSKA_HEADER_ID = 0x1A45DFA3
MATROSKA_TRACKS_ID = 0x1654AE6B
MATROSKA_SEGMENT_ID = 0x18538067
MATROSKA_SEGMENT_INFO_ID = 0x1549A966
MATROSKA_CLUSTER_ID = 0x1F43B675
MATROSKA_VOID_ID = 0xEC
MATROSKA_CRC_ID = 0xBF
MATROSKA_TIMECODESCALE_ID = 0x2AD7B1
MATROSKA_DURATION_ID = 0x4489
MATROSKA_CRC32_ID = 0xBF
MATROSKA_TRACK_TYPE_ID = 0x83
MATROSKA_TRACK_LANGUAGE_ID = 0x22B59C
MATROSKA_TIMECODESCALE_ID = 0x4489
MATROSKA_MUXING_APP_ID = 0x4D80
MATROSKA_WRITING_APP_ID = 0x5741
MATROSKA_CODEC_ID = 0x86
MATROSKA_CODEC_NAME_ID = 0x258688
MATROSKA_FRAME_DURATION_ID = 0x23E383
MATROSKA_VIDEO_SETTINGS_ID = 0xE0
MATROSKA_VID_WIDTH_ID = 0xB0
MATROSKA_VID_HEIGHT_ID = 0xBA
MATROSKA_AUDIO_SETTINGS_ID = 0xE1
MATROSKA_AUDIO_SAMPLERATE_ID = 0xB5
MATROSKA_AUDIO_CHANNELS_ID = 0x9F
MATROSKA_TRACK_UID_ID = 0x73C5
MATROSKA_TRACK_NUMBER_ID = 0xD7
# This is class that is responsible to handle one Ebml entity as described in the Matroska/Ebml spec
class EbmlEntity:
def __init__(self, inbuf):
# Compute the EBML id
# Set the CRC len to zero
self.crc_len = 0
# Now loop until we find an entity without CRC
self.build_entity(inbuf)
while self.get_id() == MATROSKA_CRC32_ID:
self.crc_len += self.get_total_len()
inbuf = inbuf[self.get_total_len():]
self.build_entity(inbuf)
def build_entity(self, inbuf):
self.compute_id(inbuf)
#_print("Entity id : %08X" % self.entity_id)
if ( self.id_len == 0):
self.valid = 0
_print("EBML entity not found, bad file format")
return
self.valid = 1
self.entity_len = self.compute_len(inbuf[self.id_len:])
# Obviously, the segment can be very long (ie the whole file, so we truncate it at the read buffer size
if (self.entity_len == -1):
self.entity_data = inbuf[self.id_len+self.len_size:]
self.entity_len = len(self.entity_data) # Set the remaining size
else:
self.entity_data = inbuf[self.id_len+self.len_size:self.id_len+self.len_size+self.entity_len]
#_print("Entity len : %d" % self.entity_len)
# if the size is 1, 2 3 or 4 it could be a numeric value, so do the job
self.value = 0
if self.entity_len == 1:
self.value = ord(self.entity_data[0])
if self.entity_len == 2:
self.value = unpack('!H', self.entity_data)[0]
if self.entity_len == 3:
self.value = ord(self.entity_data[0])<<16 | ord(self.entity_data[1])<<8 | ord(self.entity_data[2])
if self.entity_len == 4:
self.value = unpack('!I', self.entity_data)[0]
def compute_id(self, inbuf):
first = ord(inbuf[0])
self.id_len = 0
if (first & 0x80):
self.id_len = 1
self.entity_id = first
elif (first & 0x40):
self.id_len = 2
self.entity_id = ord(inbuf[0])<<8 | ord(inbuf[1])
elif (first & 0x20):
self.id_len = 3
self.entity_id = (ord(inbuf[0])<<16) | (ord(inbuf[1])<<8) | (ord(inbuf[2]))
elif (first & 0x10):
self.id_len = 4
self.entity_id = (ord(inbuf[0])<<24) | (ord(inbuf[1])<<16) | (ord(inbuf[2])<<8) | (ord(inbuf[3]))
self.entity_str = inbuf[0:self.id_len]
return
def compute_len(self, inbuf):
# Here we just handle the size up to 4 bytes
# The size above will be truncated by the read buffer itself
first = ord(inbuf[0])
if (first & 0x80):
self.len_size = 1
return first - 0x80
if (first & 0x40):
self.len_size = 2
(c1,c2) = unpack('BB',inbuf[:2])
return ((c1-0x40)<<8) | (c2)
if (first & 0x20):
self.len_size = 3
(c1, c2, c3) = unpack('BBB',inbuf[:3])
return ((c1-0x20)<<16) | (c2<<8) | (c3)
if (first & 0x10):
self.len_size = 4
(len) = unpack('!I',inbuf[:4])
return len
if (first & 0x08):
self.len_size = 5
return -1
if (first & 0x04):
self.len_size = 6
return -1
if (first & 0x02):
self.len_size = 7
return -1
if (first & 0x01):
self.len_size = 8
return -1
def get_crc_len(self):
return self.crc_len
def get_value(self):
value = self.value
return value
def get_data(self):
return self.entity_data
def get_id(self):
return self.entity_id
def get_str_id(self):
return self.entity_str
def get_len(self):
return self.entity_len
def get_total_len(self):
return self.entity_len+self.id_len+self.len_size
# This ithe main Matroska object
class MkvInfo(mediainfo.AVInfo):
def __init__(self, file):
mediainfo.AVInfo.__init__(self)
self.samplerate = 1
buffer = file.read(80000)
if len(buffer) == 0:
# Regular File end
return None
# Check the Matroska header
header = EbmlEntity(buffer)
if ( header.get_id() == MATROSKA_HEADER_ID ):
_print("HEADER ID found %08X" % header.get_id() )
self.valid = 1
self.mime = 'application/mkv'
self.type = 'Matroska'
# Now get the segment
segment = EbmlEntity(buffer[header.get_total_len():])
if ( segment.get_id() == MATROSKA_SEGMENT_ID):
_print("SEGMENT ID found %08X" % segment.get_id() )
#MEDIACORE = ['title', 'caption', 'comment', 'artist', 'size', 'type', 'subtype',
#'date', 'keywords', 'country', 'language', 'url']
segtab = self.process_one_level(segment)
seginfotab = self.process_one_level(segtab[MATROSKA_SEGMENT_INFO_ID])
try:
# Express scalecode in ms instead of ns
# Rescale it to the second
scalecode = float(seginfotab[MATROSKA_TIMECODESCALE_ID].get_value() / (1000*1000))
except:
scalecode = 1000
try:
duration = float(unpack('!f', seginfotab[MATROSKA_DURATION_ID].get_data() )[0])
duration = float(duration / scalecode)
# Express the time in minutes
self.length = int(duration/60)
except:
pass
try:
_print ("Searching for id : %X" % MATROSKA_TRACKS_ID)
entity = segtab[MATROSKA_TRACKS_ID]
self.process_tracks(entity)
except:
_print("TRACKS ID not found !!" )
else:
_print("SEGMENT ID not found %08X" % segment.get_id() )
else:
self.valid = 0
def process_tracks(self, tracks):
tracksbuf = tracks.get_data()
indice = 0
while indice < tracks.get_len():
trackelem = EbmlEntity(tracksbuf[indice:])
_print ("ELEMENT %X found" % trackelem.get_id())
self.process_one_track(trackelem)
indice += trackelem.get_total_len() + trackelem.get_crc_len()
def process_one_level(self, item):
buf = item.get_data()
indice = 0
tabelem = {}
while indice < item.get_len():
elem = EbmlEntity(buf[indice:])
tabelem[elem.get_id()] = elem
indice += elem.get_total_len() + elem.get_crc_len()
return tabelem
def process_one_track(self, track):
# Process all the items at the track level
tabelem = self.process_one_level(track)
# We have the dict of track eleme, now build the MMPYTHON information
type = tabelem[MATROSKA_TRACK_TYPE_ID]
mytype = type.get_value()
_print ("Track type found with UID %d" % mytype)
if (mytype == MATROSKA_VIDEO_TRACK ):
_print("VIDEO TRACK found !!" )
#VIDEOCORE = ['length', 'encoder', 'bitrate', 'samplerate', 'codec', 'samplebits',
# 'width', 'height', 'fps', 'aspect']
vi = mediainfo.VideoInfo()
try:
elem = tabelem[MATROSKA_CODEC_ID]
vi.codec = elem.get_data()
except:
vi.codec = 'Unknown'
try:
elem = tabelem[MATROSKA_FRAME_DURATION_ID]
vi.fps = 1 / (pow(10, -9) * (elem.get_value()))
except:
vi.fps = 0
try:
vinfo = tabelem[MATROSKA_VIDEO_SETTINGS_ID]
vidtab = self.process_one_level(vinfo)
vi.width = vidtab[MATROSKA_VID_WIDTH_ID].get_value()
vi.height = vidtab[MATROSKA_VID_HEIGHT_ID].get_value()
except:
_print("No other info about video track !!!")
self.video.append(vi)
elif (mytype == MATROSKA_AUDIO_TRACK ):
_print("AUDIO TRACK found !!" )
#AUDIOCORE = ['channels', 'samplerate', 'length', 'encoder', 'codec', 'samplebits',
# 'bitrate', 'language']
ai = mediainfo.AudioInfo()
try:
elem = tabelem[MATROSKA_TRACK_LANGUAGE_ID]
ai.language = elem.get_data()
ai['language'] = elem.get_data()
except:
ai.language = 'en'
ai['language'] = 'en'
try:
elem = tabelem[MATROSKA_CODEC_ID]
ai.codec = elem.get_data()
except:
ai.codec = "Unknown"
try:
ainfo = tabelem[MATROSKA_AUDIO_SETTINGS_ID]
audtab = self.process_one_level(vinfo)
ai.samplerate = unpack('!f', audtab[MATROSKA_AUDIO_SAMPLERATE_ID].get_value())[0]
ai.channels = audtab[MATROSKA_AUDIO_CHANNELS_ID].get_value()
except:
_print("No other info about audio track !!!")
self.audio.append(ai)
elif (mytype == MATROSKA_SUBTITLES_TRACK):
try:
elem = tabelem[MATROSKA_TRACK_LANGUAGE_ID]
language = elem.get_data()
_print ("Subtitle language found : %s" % elem.get_data() )
except:
language = "en" # By default
self.subtitles.append(language)
#_print("Found %d elem for this track" % len(tabelem) )
mmpython.registertype( 'application/mkv', ('mkv', 'mka',), mediainfo.TYPE_AV, MkvInfo )
|