405 lines
12 KiB
Python
405 lines
12 KiB
Python
from enum import Enum
|
||
from typing import List, Dict, Tuple
|
||
|
||
from mutagen import id3
|
||
import datetime
|
||
|
||
|
||
class Mapping(Enum):
|
||
"""
|
||
These frames belong to the id3 standart
|
||
https://web.archive.org/web/20220830091059/https://id3.org/id3v2.4.0-frames
|
||
https://id3lib.sourceforge.net/id3/id3v2com-00.html
|
||
https://mutagen-specs.readthedocs.io/en/latest/id3/id3v2.4.0-frames.html
|
||
"""
|
||
# Textframes
|
||
TITLE = "TIT2"
|
||
ISRC = "TSRC"
|
||
LENGTH = "TLEN" # in milliseconds
|
||
# The 'Date' frame is a numeric string in the DDMM format containing the date for the recording. This field is always four characters long.
|
||
DATE = "TDAT"
|
||
# The 'Time' frame is a numeric string in the HHMM format containing the time for the recording. This field is always four characters long.
|
||
TIME = "TIME"
|
||
YEAR = "TYER"
|
||
TRACKNUMBER = "TRCK"
|
||
TOTALTRACKS = "TRCK" # Stored in the same frame with TRACKNUMBER, separated by '/': e.g. '4/9'.
|
||
TITLESORTORDER = "TSOT"
|
||
ENCODING_SETTINGS = "TSSE"
|
||
SUBTITLE = "TIT3"
|
||
SET_SUBTITLE = "TSST"
|
||
RELEASE_DATE = "TDRL"
|
||
RECORDING_DATES = "TXXX"
|
||
PUBLISHER_URL = "WPUB"
|
||
PUBLISHER = "TPUB"
|
||
RATING = "POPM"
|
||
DISCNUMBER = "TPOS"
|
||
MOVEMENT_COUNT = "MVIN"
|
||
TOTALDISCS = "TPOS"
|
||
ORIGINAL_RELEASE_DATE = "TDOR"
|
||
ORIGINAL_ARTIST = "TOPE"
|
||
ORIGINAL_ALBUM = "TOAL"
|
||
MEDIA_TYPE = "TMED"
|
||
LYRICIST = "TEXT"
|
||
WRITER = "TEXT"
|
||
ARTIST = "TPE1"
|
||
LANGUAGE = "TLAN" # https://en.wikipedia.org/wiki/ISO_639-2
|
||
ITUNESCOMPILATION = "TCMP"
|
||
REMIXED_BY = "TPE4"
|
||
RADIO_STATION_OWNER = "TRSO"
|
||
RADIO_STATION = "TRSN"
|
||
INITIAL_KEY = "TKEY"
|
||
OWNER = "TOWN"
|
||
ENCODED_BY = "TENC"
|
||
COPYRIGHT = "TCOP"
|
||
GENRE = "TCON"
|
||
GROUPING = "TIT1"
|
||
CONDUCTOR = "TPE3"
|
||
COMPOSERSORTORDER = "TSOC"
|
||
COMPOSER = "TCOM"
|
||
BPM = "TBPM"
|
||
ALBUM_ARTIST = "TPE2"
|
||
BAND = "TPE2"
|
||
ARTISTSORTORDER = "TSOP"
|
||
ALBUM = "TALB"
|
||
ALBUMSORTORDER = "TSOA"
|
||
ALBUMARTISTSORTORDER = "TSO2"
|
||
TAGGING_TIME = "TDTG"
|
||
|
||
SOURCE_WEBPAGE_URL = "WOAS"
|
||
FILE_WEBPAGE_URL = "WOAF"
|
||
INTERNET_RADIO_WEBPAGE_URL = "WORS"
|
||
ARTIST_WEBPAGE_URL = "WOAR"
|
||
COPYRIGHT_URL = "WCOP"
|
||
COMMERCIAL_INFORMATION_URL = "WCOM"
|
||
PAYMEMT_URL = "WPAY"
|
||
|
||
MOVEMENT_INDEX = "MVIN"
|
||
MOVEMENT_NAME = "MVNM"
|
||
|
||
UNSYNCED_LYRICS = "USLT"
|
||
COMMENT = "COMM"
|
||
|
||
@classmethod
|
||
def get_text_instance(cls, key: str, value: str):
|
||
return id3.Frames[key](encoding=3, text=value)
|
||
|
||
@classmethod
|
||
def get_url_instance(cls, key: str, url: str):
|
||
return id3.Frames[key](encoding=3, url=url)
|
||
|
||
@classmethod
|
||
def get_mutagen_instance(cls, attribute, value):
|
||
key = attribute.value
|
||
|
||
if key[0] == 'T':
|
||
# a text field
|
||
return cls.get_text_instance(key, value)
|
||
if key[0] == "W":
|
||
# an url field
|
||
return cls.get_url_instance(key, value)
|
||
|
||
|
||
class ID3Timestamp:
|
||
def __init__(
|
||
self,
|
||
year: int = None,
|
||
month: int = None,
|
||
day: int = None,
|
||
hour: int = None,
|
||
minute: int = None,
|
||
second: int = None
|
||
):
|
||
self.year = year
|
||
self.month = month
|
||
self.day = day
|
||
self.hour = hour
|
||
self.minute = minute
|
||
self.second = second
|
||
|
||
self.has_year = year is not None
|
||
self.has_month = month is not None
|
||
self.has_day = day is not None
|
||
self.has_hour = hour is not None
|
||
self.has_minute = minute is not None
|
||
self.has_second = second is not None
|
||
|
||
if not self.has_year:
|
||
year = 1
|
||
if not self.has_month:
|
||
month = 1
|
||
if not self.has_day:
|
||
day = 1
|
||
if not self.has_hour:
|
||
hour = 1
|
||
if not self.has_minute:
|
||
minute = 1
|
||
if not self.has_second:
|
||
second = 1
|
||
|
||
self.date_obj = datetime.datetime(
|
||
year=year,
|
||
month=month,
|
||
day=day,
|
||
hour=hour,
|
||
minute=minute,
|
||
second=second
|
||
)
|
||
|
||
def __hash__(self):
|
||
return self.date_obj.__hash__()
|
||
|
||
def __lt__(self, other):
|
||
return self.date_obj < other.date_obj
|
||
|
||
def __le__(self, other):
|
||
return self.date_obj <= other.date_obj
|
||
|
||
def __gt__(self, other):
|
||
return self.date_obj > other.date_obj
|
||
|
||
def __ge__(self, other):
|
||
return self.date_obj >= other.date_obj
|
||
|
||
def __eq__(self, other):
|
||
if type(other) != type(self):
|
||
return False
|
||
return self.date_obj == other.date_obj
|
||
|
||
def get_time_format(self) -> str:
|
||
"""
|
||
https://mutagen-specs.readthedocs.io/en/latest/id3/id3v2.4.0-structure.html
|
||
|
||
The timestamp fields are based on a subset of ISO 8601. When being as precise as possible the format of a
|
||
time string is
|
||
- yyyy-MM-ddTHH:mm:ss
|
||
- (year[%Y], “-”, month[%m], “-”, day[%d], “T”, hour (out of 24)[%H], ”:”, minutes[%M], ”:”, seconds[%S])
|
||
- %Y-%m-%dT%H:%M:%S
|
||
but the precision may be reduced by removing as many time indicators as wanted. Hence valid timestamps are
|
||
- yyyy
|
||
- yyyy-MM
|
||
- yyyy-MM-dd
|
||
- yyyy-MM-ddTHH
|
||
- yyyy-MM-ddTHH:mm
|
||
- yyyy-MM-ddTHH:mm:ss
|
||
All time stamps are UTC. For durations, use the slash character as described in 8601,
|
||
and for multiple non-contiguous dates, use multiple strings, if allowed by the frame definition.
|
||
|
||
:return timestamp: as timestamp in the format of the id3 time as above described
|
||
"""
|
||
|
||
if self.has_year and self.has_month and self.has_day and self.has_hour and self.has_minute and self.has_second:
|
||
return "%Y-%m-%dT%H:%M:%S"
|
||
if self.has_year and self.has_month and self.has_day and self.has_hour and self.has_minute:
|
||
return "%Y-%m-%dT%H:%M"
|
||
if self.has_year and self.has_month and self.has_day and self.has_hour:
|
||
return "%Y-%m-%dT%H"
|
||
if self.has_year and self.has_month and self.has_day:
|
||
return "%Y-%m-%d"
|
||
if self.has_year and self.has_month:
|
||
return "%Y-%m"
|
||
if self.has_year:
|
||
return "%Y"
|
||
return ""
|
||
|
||
def get_timestamp(self) -> str:
|
||
time_format = self.get_time_format()
|
||
return self.date_obj.strftime(time_format)
|
||
|
||
def get_timestamp_w_format(self) -> Tuple[str, str]:
|
||
time_format = self.get_time_format()
|
||
return time_format, self.date_obj.strftime(time_format)
|
||
|
||
@classmethod
|
||
def fromtimestamp(cls, utc_timestamp: int):
|
||
date_obj = datetime.datetime.fromtimestamp(utc_timestamp)
|
||
|
||
return cls(
|
||
year=date_obj.year,
|
||
month=date_obj.month,
|
||
day=date_obj.day,
|
||
hour=date_obj.hour,
|
||
minute=date_obj.minute,
|
||
second=date_obj.second
|
||
)
|
||
|
||
@classmethod
|
||
def strptime(cls, time_stamp: str, format: str):
|
||
"""
|
||
day: "%d"
|
||
month: "%b", "%B", "%m"
|
||
year: "%y", "%Y"
|
||
hour: "%H", "%I"
|
||
minute: "%M"
|
||
second: "%S"
|
||
"""
|
||
date_obj = datetime.datetime.strptime(time_stamp, format)
|
||
|
||
day = None
|
||
if "%d" in format:
|
||
day = date_obj.day
|
||
month = None
|
||
if any([i in format for i in ("%b", "%B", "%m")]):
|
||
month = date_obj.month
|
||
year = None
|
||
if any([i in format for i in ("%y", "%Y")]):
|
||
year = date_obj.year
|
||
hour = None
|
||
if any([i in format for i in ("%H", "%I")]):
|
||
hour = date_obj.hour
|
||
minute = None
|
||
if "%M" in format:
|
||
minute = date_obj.minute
|
||
second = None
|
||
if "%S" in format:
|
||
second = date_obj.second
|
||
|
||
return cls(
|
||
year=year,
|
||
month=month,
|
||
day=day,
|
||
hour=hour,
|
||
minute=minute,
|
||
second=second
|
||
)
|
||
|
||
@classmethod
|
||
def now(cls):
|
||
date_obj = datetime.datetime.now()
|
||
|
||
return cls(
|
||
year=date_obj.year,
|
||
month=date_obj.month,
|
||
day=date_obj.day,
|
||
hour=date_obj.hour,
|
||
minute=date_obj.minute,
|
||
second=date_obj.second
|
||
)
|
||
|
||
def strftime(self, format: str) -> str:
|
||
return self.date_obj.strftime(format)
|
||
|
||
def __str__(self) -> str:
|
||
return self.timestamp
|
||
|
||
def __repr__(self) -> str:
|
||
return self.timestamp
|
||
|
||
timestamp: str = property(fget=get_timestamp)
|
||
timeformat: str = property(fget=get_time_format)
|
||
|
||
|
||
class Metadata:
|
||
# it's a null byte for the later concatenation of text frames
|
||
NULL_BYTE: str = "\x00"
|
||
# this is pretty self-explanatory
|
||
# the key is an enum from Mapping
|
||
# the value is a list with each value
|
||
# the mutagen object for each frame will be generated dynamically
|
||
id3_dict: Dict[Mapping, list]
|
||
|
||
def __init__(self, id3_dict: Dict[any, list] = None) -> None:
|
||
self.id3_dict = dict()
|
||
if id3_dict is not None:
|
||
self.add_metadata_dict(id3_dict)
|
||
|
||
def __setitem__(self, frame: Mapping, value_list: list, override_existing: bool = True):
|
||
if type(value_list) != list:
|
||
raise ValueError(f"can only set attribute to list, not {type(value_list)}")
|
||
|
||
new_val = [i for i in value_list if i not in {None, ''}]
|
||
|
||
if len(new_val) == 0:
|
||
return
|
||
|
||
if override_existing:
|
||
self.id3_dict[frame] = new_val
|
||
else:
|
||
if frame not in self.id3_dict:
|
||
self.id3_dict[frame] = new_val
|
||
return
|
||
|
||
self.id3_dict[frame].extend(new_val)
|
||
|
||
def __getitem__(self, key):
|
||
if key not in self.id3_dict:
|
||
return None
|
||
return self.id3_dict[key]
|
||
|
||
def delete_field(self, key: str):
|
||
if key in self.id3_dict:
|
||
return self.id3_dict.pop(key)
|
||
|
||
def add_metadata_dict(self, metadata_dict: dict, override_existing: bool = True):
|
||
for field_enum, value in metadata_dict.items():
|
||
self.__setitem__(field_enum, value, override_existing=override_existing)
|
||
|
||
def merge(self, other, override_existing: bool = False):
|
||
"""
|
||
adds the values of another metadata obj to this one
|
||
|
||
other is a value of the type MetadataAttribute.Metadata
|
||
"""
|
||
|
||
self.add_metadata_dict(other.id3_dict, override_existing=override_existing)
|
||
|
||
def merge_many(self, many_other):
|
||
"""
|
||
adds the values of many other metadata objects to this one
|
||
"""
|
||
|
||
for other in many_other:
|
||
self.merge(other)
|
||
|
||
def get_id3_value(self, field):
|
||
if field not in self.id3_dict:
|
||
return None
|
||
|
||
list_data = self.id3_dict[field]
|
||
#correct duplications
|
||
correct_list_data = list()
|
||
for data in list_data:
|
||
if data not in correct_list_data:
|
||
correct_list_data.append(data)
|
||
list_data = correct_list_data
|
||
# convert for example the time objects to timestamps
|
||
for i, element in enumerate(list_data):
|
||
# for performance’s sake I don't do other checks if it is already the right type
|
||
if type(element) == str:
|
||
continue
|
||
|
||
if type(element) in {int}:
|
||
list_data[i] = str(element)
|
||
|
||
if type(element) == ID3Timestamp:
|
||
list_data[i] = element.timestamp
|
||
continue
|
||
|
||
"""
|
||
Version 2.4 of the specification prescribes that all text fields (the fields that start with a T, except for TXXX) can contain multiple values separated by a null character.
|
||
Thus if above conditions are met, I concatenate the list,
|
||
else I take the first element
|
||
"""
|
||
if field.value[0].upper() == "T" and field.value.upper() != "TXXX":
|
||
return self.NULL_BYTE.join(list_data)
|
||
|
||
return list_data[0]
|
||
|
||
def get_mutagen_object(self, field):
|
||
return Mapping.get_mutagen_instance(field, self.get_id3_value(field))
|
||
|
||
def __str__(self) -> str:
|
||
rows = []
|
||
for key, value in self.id3_dict.items():
|
||
rows.append(f"{key} - {str(value)}")
|
||
return "\n".join(rows)
|
||
|
||
def __iter__(self):
|
||
"""
|
||
returns a generator, you can iterate through,
|
||
to directly tagg a file with id3 container.
|
||
"""
|
||
# set the tagging timestamp to the current time
|
||
self.__setitem__(Mapping.TAGGING_TIME, [ID3Timestamp.now()])
|
||
for field in self.id3_dict:
|
||
yield self.get_mutagen_object(field)
|