MP3 Parser 範例

以下是使用 Pyhton 2 撰寫的 MP3 以及 ID3 Parser 範例。作者為 zonble。
#!/usr/bin/env python
# encoding: utf-8

import os
import sys
import struct

class MP3Parser:
    '''
    Parse mp3 file to check if there is invalid frame.
    '''

    class _Header:
        '''
        Represent the ID3 header in a tag.
        '''
        def __init__(self):
            self.majorVersion = 0
            self.revision = 0
            self.flags = 0
            self.size = 0
            self.bUnsynchronized = False
            self.bExperimental = False
            self.bFooter = False

        def __str__(self):
            return str(self.__dict__)

    @classmethod
    def _getSyncSafeInt(self, bytes):
        '''
        Get integer from 4 bytes
        '''
        assert len(bytes) == 4
        if type(bytes) == type(''):
            bytes = [ ord(c) for c in bytes ]
        return (bytes[0] << 21) + (bytes[1] << 14) + (bytes[2] << 7) + bytes[3]

    @classmethod
    def isAllFramesValid(self, inInputFilePath):
        '''
        Check if all frames are valid.

        If we encounter frame that is not MPEG version 1, layer 3,
        sample rate 44.1 KHz, we return False.

        Because MP3 File should be frame after frame, if we skip any
        byte between frames, we return False.

        Otherwise, this function will return True.

        :param inInputFilePath: the mp3 file path to check.
        :type inInputFilePath: str
        :returns: whether all frames are valid.
        :rtype: bool

        '''
        content = self.loadFile(inInputFilePath)
        return self.isAllFramesInDataValid(content)

    @classmethod
    def isAllFramesInDataValid(self, content, offset = 0, shouldCheckID3Tag = True):
        '''
        Check if all frames are valid in bytes.

        :param offset: offset into content.
        :type offset: integer
        :param shouldCheckID3Tag: whether we should check for id3 tag
            or not.
        :type shouldCheckID3Tag: bool

        '''
        if offset > len(content):
            return False
        MP3BitrateLookup = [0, 32000, 40000, 48000, 56000, 64000, 80000, 96000, 112000, 128000, 160000, 192000, 224000, 256000, 320000, 0]

        # The first 512 bytes are Internal Header, so we can skip them.
        # Based on mac's code, it seems that we always will get ID3
        # tag, so we can also just scan for ID3 tag.
        i = offset
        foundFirstFrame = False

        # skip id3 tag
        while i + 10 < len(content) and shouldCheckID3Tag:
            header = content[i:i+10]
            hstuff = struct.unpack("!3sBBBBBBB", str(header))
            if hstuff[0] == "ID3":
                header = self._Header()
                header.majorVersion = hstuff[1]
                header.revision = hstuff[2]
                header.flags = hstuff[3]
                header.size = self._getSyncSafeInt(hstuff[4:8])
                hasFooter = not not (header.flags & 0x40)
                headerBodylength = header.size
                headerLength = headerBodylength + (20 if hasFooter else 10)
                i += headerLength
                break
            else:
                i += 1

        while i + 2 < len(content):
            frameSync = (content[i] << 8) | (content[i + 1] & (0x80 | 0x40 | 0x20))
            if frameSync != 0xffe0:
                if foundFirstFrame:
                    # After founding first frame, we shouldn't skip
                    # any byte, so if we execute to here, there is an
                    # error.
                    # print 'skipping byte at:' + repr(i)
                    pass
                i += 1
                continue


            # frame start
            if not foundFirstFrame:
                foundFirstFrame = True
            # print i
            # AAAAAAAA AAABBCCD EEEEFFGH IIJJKLMM
            audioVersion = (content[i + 1] >> 3) & 0x03;
            layer = (content[i + 1] >> 1) & 0x03
            hasCRC = not(content[i + 1] & 0x01)
            bitrateIndex = content[i + 2] >> 4;
            sampleRateIndex = content[i + 2] >> 2 & 0x03;
            # print 'audioVersion:%d, layer:%d, sampleRateIndex:%d' % (audioVersion, layer, sampleRateIndex)
            if not(audioVersion == 0x03 and
                   layer == 0x01 and
                   sampleRateIndex == 0x00):
                # we only support MPEG version 1, layer 3, sample rate
                # 44.1 KHz--and we ignore the error altogether
                print "Unsupported MPEG audio version."
                return False

            bitrate = MP3BitrateLookup[bitrateIndex];
            hasPadding = not(not((content[i + 2] >> 1) & 0x01))
            # print 'hasCRC:%d, hasPadding:%d' % (hasCRC, hasPadding)

            frameLength = 144 * bitrate / 44100 + \
                          (1 if hasPadding else 0) + \
                          (2 if hasCRC else  0)
            i += frameLength
        if not foundFirstFrame:
            return False
        return True

    @classmethod
    def loadFile(self, inInputFilePath):
        '''
        Load file into bytearray

        :param inInputFilePath: path of the input file.
        :type inInputFilePath: str
        '''
        inputFile = open(inInputFilePath, 'rb')
        data = bytearray(inputFile.read())
        inputFile.close()
        return data

def printUsage():
    print '''Usage: python %s [MP3 file]
Example: python %s 1311434.mp3
It will parse all frames in mp3 file and return parse result as bool.''' % (sys.argv[0], sys.argv[0])

if __name__ == '__main__':
    if len(sys.argv) != 2:
        printUsage()
        exit()
    filename = str(sys.argv[1])
    if not(os.path.exists(filename)):
        print 'File not found: %s' % (filename)
        printUsage()
        exit()
    result = MP3Parser.isAllFramesValid(filename)
    print result