我在使用 Mutagen 进行字符编码时遇到问题。
我将其dict[key]
转换为 Unicode,但我收到的只是错误。有问题的字符是U+00E9
or é
,但我打印的是├⌐
. 我假设 Mutagen 的默认字符集是 UTF-8,但有没有办法解决这个问题?
输出:
Winter Wonderland.mp3
Album : Christmas
Album Artist: Michael Bublé
Artist : Michael Bublé
Composer : None
Disk : None
Encoded By : None
Genre : Christmas
Title : Winter Wonderland
Track : 17/19
Year : 2011
代码:
#!/usr/bin/env python
import os
import re
from mutagen.mp3 import MP3
first_cap_re = re.compile('(.)([A-Z][a-z]+)')
all_cap_re = re.compile('([a-z0-9])([A-Z])')
def convertCamelCase2Underscore(name):
s1 = first_cap_re.sub(r'\1_\2', name)
return all_cap_re.sub(r'\1_\2', s1).lower()
def convertCamelCase2CapitalizedWords(name):
return ' '.join([x.capitalize() for x in convertCamelCase2Underscore(name).split('_')])
def safeValue(dict, key):
return None if key not in dict else dict[key]
class Track:
def __init__(self, path):
audio = MP3(path)
self.title = safeValue(audio, 'TIT2')
self.artist = safeValue(audio, 'TPE1')
self.albumArtist = safeValue(audio, 'TPE2')
self.album = safeValue(audio, 'TALB')
self.genre = safeValue(audio, 'TCON')
self.year = safeValue(audio, 'TDRL')
self.encodedBy = safeValue(audio, 'TENC')
self.composer = safeValue(audio, 'TXXX:TCM')
self.track = safeValue(audio, 'TRCK')
self.disk = safeValue(audio, 'TXXX:TPA')
def __repr__(self):
ret = ''
fields = self.__dict__
for k, v in sorted(self.__dict__.iteritems()):
ret += '{:12s}: {:s}\n'.format(convertCamelCase2CapitalizedWords(k), v)
return ret
files = os.listdir('.')
for filename in files:
print filename
print Track(filename)