1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120 | #!/bin/python3
'''
id3-unicodify fixes non-ASCII ID3 tags that where wrongly stored as Latin-1
by misconfigured software. For usage information run with -h/--help.
Copyright (C) 2018 dkr <https://tarxjf.info> <dkr _at_ tarxjf.info>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
'''
import argparse
import os
from mutagen.easyid3 import EasyID3
from mutagen import MutagenError
from bs4 import UnicodeDammit
def tagToString(tag_value):
"""Turns alphanum dict values into str for encoding manipulation"""
try:
orig_str = ''.join(tag_value).encode(source_enc)
except UnicodeEncodeError:
"""If source_enc is not latin-1 assume it's utf-8 just pass it along"""
orig_str = ''.join(tag_value)
return orig_str
def valueReplace(f):
"""Replaces original values with target values in a given file"""
for key, value in f.items():
orig = tagToString(value)
target = unicodify(orig)
if target['source_enc'] is None:
pass
elif target['source_enc'] in ["ascii", "iso-8859-1", "latin-1"]:
#print('{} "{}" looks like {}. No need to change'.format(
# key, ''.join(value), target['source_enc']))
pass
else:
print('{} "{}" looks like {}. It will become "{}"'.format(
key, ''.join(value), target['source_enc'], target['uni_markup']))
f[key] = target['uni_markup']
def openFile(filename):
"""Loads audio file"""
metadata = EasyID3(filename)
print("Loaded file: {}.".format(filename))
return metadata
def travDir(workingdir):
"""If source path is dir, return list of files"""
filelist = []
for root, dirs, files in os.walk(workingdir):
if not files:
continue
for f in files:
filepath = os.path.join(root, f)
#print("Found file", filepath)
filelist.append(filepath)
return filelist
def unicodify(string):
language = UnicodeDammit(string, [suspects])
return {'source_enc': language.original_encoding,
'uni_markup': language.unicode_markup}
parser = argparse.ArgumentParser(description='')
source = parser.add_mutually_exclusive_group(required=True)
source.add_argument('-d', '--dir', help='Source directory path')
source.add_argument('-f', '--file', help='Source file')
parser.add_argument('-c', '--codepage',
help='Suspected encodings to aid detection. \
Comma-seperated list.')
parser.add_argument('--dryrun', help='Do not actually write changes to file',
action='store_true')
args = parser.parse_args()
source_enc = 'latin-1' # Default scenario
suspects = args.codepage
print(__doc__)
if args.dryrun:
print("This is a dry run. No changes will be written.")
else:
print("Changes will be written.")
if args.file is not None:
audiofile = openFile(args.file)
valueReplace(audiofile)
else:
filelist = travDir(args.dir)
for f in filelist:
try:
audiofile = openFile(f)
valueReplace(audiofile)
if args.dryrun:
#print("This was a dry run. No changes were written.")
print("================")
else:
#print("Changes will be written to file.")
print("================")
audiofile.save()
except MutagenError:
#print("================")
print("Skipping file {}. No ID3 tags or incompatible format."
.format(f))
print("================")
continue
|