music-kraken-core/music_kraken/objects/metadata.py

401 lines
12 KiB
Python
Raw Permalink Normal View History

2023-01-12 22:01:19 +00:00
from enum import Enum
2023-01-13 08:44:57 +00:00
from typing import List, Dict, Tuple
2023-01-14 13:41:40 +00:00
2023-01-12 22:01:19 +00:00
from mutagen import id3
2023-01-13 13:37:15 +00:00
import datetime
2023-01-12 22:01:19 +00:00
class Mapping(Enum):
"""
These frames belong to the id3 standart
2023-01-13 13:37:15 +00:00
https://web.archive.org/web/20220830091059/https://id3.org/id3v2.4.0-frames
https://id3lib.sourceforge.net/id3/id3v2com-00.html
https://mutagen-specs.readthedocs.io/en/latest/id3/id3v2.4.0-frames.html
2023-01-12 22:01:19 +00:00
"""
# Textframes
TITLE = "TIT2"
ISRC = "TSRC"
2023-02-25 21:16:32 +00:00
LENGTH = "TLEN" # in milliseconds
2023-06-20 17:30:48 +00:00
# The 'Date' frame is a numeric string in the DDMM format containing the date for the recording. This field is always four characters long.
DATE = "TDAT"
# The 'Time' frame is a numeric string in the HHMM format containing the time for the recording. This field is always four characters long.
TIME = "TIME"
YEAR = "TYER"
2023-01-12 22:01:19 +00:00
TRACKNUMBER = "TRCK"
TOTALTRACKS = "TRCK" # Stored in the same frame with TRACKNUMBER, separated by '/': e.g. '4/9'.
TITLESORTORDER = "TSOT"
ENCODING_SETTINGS = "TSSE"
SUBTITLE = "TIT3"
SET_SUBTITLE = "TSST"
RELEASE_DATE = "TDRL"
RECORDING_DATES = "TXXX"
PUBLISHER_URL = "WPUB"
PUBLISHER = "TPUB"
RATING = "POPM"
DISCNUMBER = "TPOS"
MOVEMENT_COUNT = "MVIN"
TOTALDISCS = "TPOS"
ORIGINAL_RELEASE_DATE = "TDOR"
ORIGINAL_ARTIST = "TOPE"
ORIGINAL_ALBUM = "TOAL"
MEDIA_TYPE = "TMED"
LYRICIST = "TEXT"
WRITER = "TEXT"
ARTIST = "TPE1"
2023-01-30 22:54:21 +00:00
LANGUAGE = "TLAN" # https://en.wikipedia.org/wiki/ISO_639-2
2023-01-12 22:01:19 +00:00
ITUNESCOMPILATION = "TCMP"
REMIXED_BY = "TPE4"
RADIO_STATION_OWNER = "TRSO"
RADIO_STATION = "TRSN"
INITIAL_KEY = "TKEY"
OWNER = "TOWN"
ENCODED_BY = "TENC"
COPYRIGHT = "TCOP"
GENRE = "TCON"
GROUPING = "TIT1"
CONDUCTOR = "TPE3"
COMPOSERSORTORDER = "TSOC"
COMPOSER = "TCOM"
BPM = "TBPM"
ALBUM_ARTIST = "TPE2"
BAND = "TPE2"
ARTISTSORTORDER = "TSOP"
ALBUM = "TALB"
ALBUMSORTORDER = "TSOA"
ALBUMARTISTSORTORDER = "TSO2"
2023-01-13 13:37:15 +00:00
TAGGING_TIME = "TDTG"
2023-01-12 22:01:19 +00:00
SOURCE_WEBPAGE_URL = "WOAS"
FILE_WEBPAGE_URL = "WOAF"
INTERNET_RADIO_WEBPAGE_URL = "WORS"
ARTIST_WEBPAGE_URL = "WOAR"
COPYRIGHT_URL = "WCOP"
COMMERCIAL_INFORMATION_URL = "WCOM"
2023-01-13 11:05:44 +00:00
PAYMEMT_URL = "WPAY"
2023-01-12 22:01:19 +00:00
MOVEMENT_INDEX = "MVIN"
MOVEMENT_NAME = "MVNM"
UNSYNCED_LYRICS = "USLT"
COMMENT = "COMM"
@classmethod
def get_text_instance(cls, key: str, value: str):
return id3.Frames[key](encoding=3, text=value)
@classmethod
def get_url_instance(cls, key: str, url: str):
return id3.Frames[key](encoding=3, url=url)
@classmethod
def get_mutagen_instance(cls, attribute, value):
key = attribute.value
if key[0] == 'T':
# a text fiel
return cls.get_text_instance(key, value)
if key[0] == "W":
# an url field
return cls.get_url_instance(key, value)
2023-01-16 11:26:56 +00:00
class ID3Timestamp:
2023-01-13 13:37:15 +00:00
def __init__(
self,
year: int = None,
month: int = None,
day: int = None,
hour: int = None,
minute: int = None,
2023-01-16 11:26:56 +00:00
second: int = None
2023-01-13 13:37:15 +00:00
):
2023-01-16 11:26:56 +00:00
self.year = year
self.month = month
self.day = day
self.hour = hour
self.minute = minute
self.second = second
2023-01-13 13:37:15 +00:00
self.has_year = year is not None
self.has_month = month is not None
self.has_day = day is not None
self.has_hour = hour is not None
self.has_minute = minute is not None
self.has_second = second is not None
if not self.has_year:
year = 1
if not self.has_month:
month = 1
if not self.has_day:
day = 1
2023-01-16 11:26:56 +00:00
if not self.has_hour:
hour = 1
if not self.has_minute:
minute = 1
if not self.has_second:
second = 1
self.date_obj = datetime.datetime(
2023-01-13 13:37:15 +00:00
year=year,
month=month,
day=day,
hour=hour,
minute=minute,
2023-01-16 11:26:56 +00:00
second=second
2023-01-13 13:37:15 +00:00
)
2023-03-31 07:47:03 +00:00
def __hash__(self):
return self.date_obj.__hash__()
2023-01-13 13:37:15 +00:00
2023-02-23 22:52:41 +00:00
def __lt__(self, other):
return self.date_obj < other.date_obj
def __le__(self, other):
return self.date_obj <= other.date_obj
def __gt__(self, other):
return self.date_obj > other.date_obj
def __ge__(self, other):
return self.date_obj >= other.date_obj
def __eq__(self, other):
if type(other) != type(self):
return False
2023-02-23 22:52:41 +00:00
return self.date_obj == other.date_obj
def get_time_format(self) -> str:
2023-01-13 13:37:15 +00:00
"""
https://mutagen-specs.readthedocs.io/en/latest/id3/id3v2.4.0-structure.html
The timestamp fields are based on a subset of ISO 8601. When being as precise as possible the format of a
time string is
- yyyy-MM-ddTHH:mm:ss
- (year[%Y], -, month[%m], -, day[%d], T, hour (out of 24)[%H], :, minutes[%M], :, seconds[%S])
- %Y-%m-%dT%H:%M:%S
but the precision may be reduced by removing as many time indicators as wanted. Hence valid timestamps are
- yyyy
- yyyy-MM
- yyyy-MM-dd
- yyyy-MM-ddTHH
- yyyy-MM-ddTHH:mm
- yyyy-MM-ddTHH:mm:ss
All time stamps are UTC. For durations, use the slash character as described in 8601,
and for multiple non-contiguous dates, use multiple strings, if allowed by the frame definition.
:return timestamp: as timestamp in the format of the id3 time as above described
"""
if self.has_year and self.has_month and self.has_day and self.has_hour and self.has_minute and self.has_second:
return "%Y-%m-%dT%H:%M:%S"
2023-01-13 13:37:15 +00:00
if self.has_year and self.has_month and self.has_day and self.has_hour and self.has_minute:
return "%Y-%m-%dT%H:%M"
2023-01-13 13:37:15 +00:00
if self.has_year and self.has_month and self.has_day and self.has_hour:
return "%Y-%m-%dT%H"
2023-01-13 13:37:15 +00:00
if self.has_year and self.has_month and self.has_day:
return "%Y-%m-%d"
2023-01-13 13:37:15 +00:00
if self.has_year and self.has_month:
return "%Y-%m"
2023-01-13 13:37:15 +00:00
if self.has_year:
return "%Y"
2023-01-13 13:37:15 +00:00
return ""
def get_timestamp(self) -> str:
time_format = self.get_time_format()
return self.date_obj.strftime(time_format)
2023-02-25 21:16:32 +00:00
def get_timestamp_w_format(self) -> Tuple[str, str]:
time_format = self.get_time_format()
return time_format, self.date_obj.strftime(time_format)
@classmethod
def fromtimestamp(cls, utc_timestamp: int):
date_obj = datetime.datetime.fromtimestamp(utc_timestamp)
return cls(
year=date_obj.year,
month=date_obj.month,
day=date_obj.day,
hour=date_obj.hour,
minute=date_obj.minute,
second=date_obj.second
)
2023-01-16 11:26:56 +00:00
@classmethod
def strptime(cls, time_stamp: str, format: str):
2023-01-16 11:57:04 +00:00
"""
day: "%d"
month: "%b", "%B", "%m"
year: "%y", "%Y"
hour: "%H", "%I"
minute: "%M"
second: "%S"
"""
date_obj = datetime.datetime.strptime(time_stamp, format)
day = None
if "%d" in format:
day = date_obj.day
month = None
if any([i in format for i in ("%b", "%B", "%m")]):
month = date_obj.month
year = None
if any([i in format for i in ("%y", "%Y")]):
year = date_obj.year
hour = None
if any([i in format for i in ("%H", "%I")]):
hour = date_obj.hour
minute = None
if "%M" in format:
minute = date_obj.minute
second = None
if "%S" in format:
second = date_obj.second
2023-01-16 11:26:56 +00:00
return cls(
2023-01-16 11:57:04 +00:00
year=year,
month=month,
day=day,
hour=hour,
minute=minute,
second=second
2023-01-16 11:26:56 +00:00
)
@classmethod
def now(cls):
date_obj = datetime.datetime.now()
return cls(
year=date_obj.year,
month=date_obj.month,
day=date_obj.day,
hour=date_obj.hour,
minute=date_obj.minute,
second=date_obj.second
)
def strftime(self, format: str) -> str:
return self.date_obj.strftime(format)
2023-01-13 13:37:15 +00:00
def __str__(self) -> str:
return self.timestamp
def __repr__(self) -> str:
return self.timestamp
timestamp: str = property(fget=get_timestamp)
2023-02-19 22:35:52 +00:00
timeformat: str = property(fget=get_time_format)
2023-01-13 13:37:15 +00:00
2023-03-10 08:09:35 +00:00
class Metadata:
# it's a null byte for the later concatenation of text frames
NULL_BYTE: str = "\x00"
# this is pretty self-explanatory
# the key is an enum from Mapping
# the value is a list with each value
# the mutagen object for each frame will be generated dynamically
2023-03-24 09:30:40 +00:00
id3_dict: Dict[Mapping, list]
2023-01-14 13:41:40 +00:00
2023-03-10 08:09:35 +00:00
def __init__(self, id3_dict: Dict[any, list] = None) -> None:
self.id3_dict = dict()
if id3_dict is not None:
self.add_metadata_dict(id3_dict)
2023-01-12 22:01:19 +00:00
2023-06-20 17:30:48 +00:00
def __setitem__(self, frame: Mapping, value_list: list, override_existing: bool = True):
2023-03-10 08:09:35 +00:00
if type(value_list) != list:
raise ValueError(f"can only set attribute to list, not {type(value_list)}")
2023-01-14 13:41:40 +00:00
2023-03-10 08:09:35 +00:00
new_val = [i for i in value_list if i not in {None, ''}]
2023-01-30 22:54:21 +00:00
2023-03-10 08:09:35 +00:00
if len(new_val) == 0:
return
2023-01-30 22:54:21 +00:00
2023-03-10 08:09:35 +00:00
if override_existing:
self.id3_dict[frame] = new_val
else:
if frame not in self.id3_dict:
2023-01-27 11:56:59 +00:00
self.id3_dict[frame] = new_val
2023-03-10 08:09:35 +00:00
return
2023-01-27 11:56:59 +00:00
2023-03-10 08:09:35 +00:00
self.id3_dict[frame].extend(new_val)
2023-01-27 11:56:59 +00:00
2023-03-10 08:09:35 +00:00
def __getitem__(self, key):
if key not in self.id3_dict:
return None
return self.id3_dict[key]
2023-01-27 11:56:59 +00:00
2023-03-10 08:09:35 +00:00
def delete_field(self, key: str):
if key in self.id3_dict:
return self.id3_dict.pop(key)
2023-01-30 13:41:02 +00:00
2023-03-10 08:09:35 +00:00
def add_metadata_dict(self, metadata_dict: dict, override_existing: bool = True):
for field_enum, value in metadata_dict.items():
self.__setitem__(field_enum, value, override_existing=override_existing)
2023-01-30 13:41:02 +00:00
2023-03-10 08:09:35 +00:00
def merge(self, other, override_existing: bool = False):
"""
adds the values of another metadata obj to this one
2023-01-30 22:54:21 +00:00
2023-03-10 08:09:35 +00:00
other is a value of the type MetadataAttribute.Metadata
"""
2023-01-30 22:54:21 +00:00
2023-03-10 08:09:35 +00:00
self.add_metadata_dict(other.id3_dict, override_existing=override_existing)
2023-01-27 11:56:59 +00:00
2023-03-10 08:09:35 +00:00
def merge_many(self, many_other):
"""
adds the values of many other metadata objects to this one
"""
2023-01-27 11:56:59 +00:00
2023-03-10 08:09:35 +00:00
for other in many_other:
self.merge(other)
2023-01-27 11:56:59 +00:00
2023-03-10 08:09:35 +00:00
def get_id3_value(self, field):
if field not in self.id3_dict:
return None
2023-01-27 11:56:59 +00:00
2023-03-10 08:09:35 +00:00
list_data = self.id3_dict[field]
2023-01-30 22:54:21 +00:00
2023-03-10 08:09:35 +00:00
# convert for example the time objects to timestamps
for i, element in enumerate(list_data):
# for performances sake I don't do other checks if it is already the right type
if type(element) == str:
continue
2023-01-27 11:56:59 +00:00
2023-03-10 08:09:35 +00:00
if type(element) in {int}:
list_data[i] = str(element)
2023-01-27 11:56:59 +00:00
2023-03-10 08:09:35 +00:00
if type(element) == ID3Timestamp:
list_data[i] = element.timestamp
continue
2023-01-27 11:56:59 +00:00
2023-03-10 08:09:35 +00:00
"""
Version 2.4 of the specification prescribes that all text fields (the fields that start with a T, except for TXXX) can contain multiple values separated by a null character.
Thus if above conditions are met, I concatenate the list,
else I take the first element
"""
if field.value[0].upper() == "T" and field.value.upper() != "TXXX":
return self.NULL_BYTE.join(list_data)
2023-01-27 11:56:59 +00:00
2023-03-10 08:09:35 +00:00
return list_data[0]
2023-01-27 11:56:59 +00:00
2023-03-10 08:09:35 +00:00
def get_mutagen_object(self, field):
return Mapping.get_mutagen_instance(field, self.get_id3_value(field))
2023-01-27 11:56:59 +00:00
2023-03-10 08:09:35 +00:00
def __str__(self) -> str:
rows = []
for key, value in self.id3_dict.items():
rows.append(f"{key} - {str(value)}")
return "\n".join(rows)
2023-01-27 11:56:59 +00:00
2023-03-10 08:09:35 +00:00
def __iter__(self):
2023-01-12 22:01:19 +00:00
"""
2023-03-10 08:09:35 +00:00
returns a generator, you can iterate through,
to directly tagg a file with id3 container.
2023-01-12 22:01:19 +00:00
"""
2023-03-10 08:09:35 +00:00
# set the tagging timestamp to the current time
self.__setitem__(Mapping.TAGGING_TIME, [ID3Timestamp.now()])
2023-01-12 22:01:19 +00:00
2023-03-10 08:09:35 +00:00
for field in self.id3_dict:
yield self.get_mutagen_object(field)