From ac7ab7b0ce16504b047352151c0e27b73a0acd46 Mon Sep 17 00:00:00 2001 From: Hellow2 Date: Mon, 13 Mar 2023 11:20:33 +0100 Subject: [PATCH 01/42] started a documentation --- documentation/objects.md | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 documentation/objects.md diff --git a/documentation/objects.md b/documentation/objects.md new file mode 100644 index 0000000..7b3d3c5 --- /dev/null +++ b/documentation/objects.md @@ -0,0 +1,36 @@ +# music_kraken.objects + +## DatabaseObject + +[music_kraken.objects.DatabaseObject](../src/music_kraken/objects/parents.py) + +## Collection + +[music_kraken.objects.Collection](../src/music_kraken/objects/collection.py) + +This is an object, which acts as a list. You can save instaces of a subclass of [DatabaseObject](#databaseobject). + +Then you can for example append a new Object. The difference to a normal list is, that if you have two different objects that both represent the same data, it doesn't get added, but all data gets merged into one Object instead. + +For example, you have two different Artist-Objects, where both have one source in common. The one Artist-Object already is in the Collection. The other artist object is passed in the append command. +In this case it doesn't simply add the artist object to the collection, but modifies the already existing Artist-Object, adding all attributes the new artist object has, and then discards the other object. + +```python +artist_collection = Collection(element_type=Artist) + +# adds the artist to the list (len 1) +artist_collection.append(artist_1) + +# detects artist 2 has a mutual source +# thus not adding but mergin (len 1) +artist_collection.appent(artist_2) +``` + +Function | Explanation +---|--- +`append()` | appends an object to the collection +`extend()` | appends a list of objects to the collection +`__len__()` | gets the ammount of objects in collection +`shallow_list` | gets a shallow copy of the list `_data` the objects are contained in +`sort()` | takes the same arguments than `list.sort`, and does the same +`__iter__()` | allows you to use collections e.g. a for loop From 49ad1d168981e42e54b91d9de490c1acea51e109 Mon Sep 17 00:00:00 2001 From: Hellow2 Date: Mon, 13 Mar 2023 13:46:58 +0100 Subject: [PATCH 02/42] Update objects.md --- documentation/objects.md | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/documentation/objects.md b/documentation/objects.md index 7b3d3c5..6aa27ed 100644 --- a/documentation/objects.md +++ b/documentation/objects.md @@ -4,13 +4,28 @@ [music_kraken.objects.DatabaseObject](../src/music_kraken/objects/parents.py) +This is a parent object, which most Music-Objects inherit from. It provides the **functionality** to: + +- autogenerate id's *(UUID)*, if not passed in the constructur. +- [merge](#databaseobjectmerge) the data of another instance of the same time in self. +- Check if two different instances of the same type represent the same data, using `__eq__`. + +Additionally it provides an **Interface** to: + +- define the attributes used to [merge](#databaseobjectmerge). +- define the attribuse and values used to check for equal data. *(used in `__eq__` and in the merge)* +- get the id3 [metadata](#metadata). +- get all [options](#options) *(used in searching from e.g. the command line)* + +### DatabaseObject.merge() + ## Collection [music_kraken.objects.Collection](../src/music_kraken/objects/collection.py) This is an object, which acts as a list. You can save instaces of a subclass of [DatabaseObject](#databaseobject). -Then you can for example append a new Object. The difference to a normal list is, that if you have two different objects that both represent the same data, it doesn't get added, but all data gets merged into one Object instead. +Then you can for example append a new Object. The difference to a normal list is, that if you have two different objects that both represent the same data, it doesn't get added, but all data gets [merged](#databaseobjectmerge) into the existing Object instead. For example, you have two different Artist-Objects, where both have one source in common. The one Artist-Object already is in the Collection. The other artist object is passed in the append command. In this case it doesn't simply add the artist object to the collection, but modifies the already existing Artist-Object, adding all attributes the new artist object has, and then discards the other object. @@ -34,3 +49,7 @@ Function | Explanation `shallow_list` | gets a shallow copy of the list `_data` the objects are contained in `sort()` | takes the same arguments than `list.sort`, and does the same `__iter__()` | allows you to use collections e.g. a for loop + +## Options + +## Metadata From 85c38ea7d577bbe5f9e0ce075f13ea06f1e24872 Mon Sep 17 00:00:00 2001 From: Hellow2 Date: Mon, 13 Mar 2023 13:58:24 +0100 Subject: [PATCH 03/42] continued documentation --- README.md | 49 ++++------------------------------------ documentation/objects.md | 27 ++++++++++++++++++++++ 2 files changed, 31 insertions(+), 45 deletions(-) diff --git a/README.md b/README.md index dff4049..5213a74 100644 --- a/README.md +++ b/README.md @@ -83,6 +83,7 @@ This application is $100\%$ centered around Data. Thus the most important thing - explanation of the [Data Model](#data-model) - how to use the [Data Objects](#data-objects) +- further Dokumentation of *hopefully* [most relevant classes](documentation/objects.md) ```mermaid --- @@ -295,8 +296,7 @@ For those who don't want any bugs and use it as intended *(which is recommended, If you want to append for example a Song to an Album, you obviously need to check beforehand if the Song already exists in the Album, and if so, you need to merge their data in one Song object, to not loose any Information. -Fortunately I implemented all of this functionality in [objects.Collection](#collection).append(music_object). -I made a flow chart showing how it works: +This is how I solve this problem: ```mermaid --- @@ -341,9 +341,9 @@ the music_object exists exist-->|"if already exists"|merge --> return ``` -This is Implemented in [music_kraken.objects.Collection.append()](src/music_kraken/objects/collection.py). +This is Implemented in [music_kraken.objects.Collection.append()](documentation/objects.md#collection). The merging which is mentioned in the flowchart is explained in the documentation of [DatabaseObject.merge()](documentation/objects.md#databaseobjectmerge). -The indexing values are defined in the superclass [DatabaseObject](src/music_kraken/objects/parents.py) and get implemented for each Object seperately. I will just give as example its implementation for the `Song` class: +The indexing values are defined in the superclass [DatabaseObject](documentation/objects.md#databaseobject) and get implemented for each Object seperately. I will just give as example its implementation for the `Song` class: ```python @property @@ -355,48 +355,7 @@ def indexing_values(self) -> List[Tuple[str, object]]: *[('url', source.url) for source in self.source_collection] ] ``` - -## Classes and Objects -### music_kraken.objects - -#### Collection - -#### Song - -So as you can see, the probably most important Class is the `music_kraken.Song` class. It is used to save the song in *(duh)*. - -It has handful attributes, where half of em are self-explanatory, like `title` or `genre`. The ones like `isrc` are only relevant to you, if you know what it is, so I won't elaborate on it. - -Interesting is the `date`. It uses a custom class. More on that [here](#music_krakenid3timestamp). - -#### ID3Timestamp - -For multiple Reasons I don't use the default `datetime.datetime` class. - -The most important reason is, that you need to pass in at least year, month and day. For every other values there are default values, that are indistinguishable from values that are directly passed in. But I need optional values. The ID3 standart allows default values. Additionally `datetime.datetime` is immutable, thus I can't inherint all the methods. Sorry. - -Anyway you can create those custom objects easily. - -```python -from music_kraken import ID3Timestamp - -# returns an instance of ID3Timestamp with the current time -ID3Timestamp.now() - -# yea -ID3Timestamp(year=1986, month=3, day=1) -``` - -you can pass in the Arguments: - - year - - month - - day - - hour - - minute - - second - -:) # Old implementation diff --git a/documentation/objects.md b/documentation/objects.md index 6aa27ed..d712a85 100644 --- a/documentation/objects.md +++ b/documentation/objects.md @@ -53,3 +53,30 @@ Function | Explanation ## Options ## Metadata + +## Song + +This object inherits from [DatabaseObject](#databaseobject) and implements all its interfaces. + +It has handful attributes, where half of em are self-explanatory, like `title` or `genre`. The ones like `isrc` are only relevant to you, if you know what it is, so I won't elaborate on it. + +Interesting is the `date`. It uses a custom class. More on that [here](#music_krakenid3timestamp). + +## ID3Timestamp + +For multiple Reasons I don't use the default `datetime.datetime` class. + +The most important reason is, that you need to pass in at least year, month and day. For every other values there are default values, that are indistinguishable from values that are directly passed in. But I need optional values. The ID3 standart allows default values. Additionally `datetime.datetime` is immutable, thus I can't inherint all the methods. Sorry. + +Anyway you can create those custom objects easily. + +```python +from music_kraken import ID3Timestamp + +# returns an instance of ID3Timestamp with the current time +ID3Timestamp.now() + +# returns an instance of ID3Timestamp with the given values +# all values are optional if unknown +ID3Timestamp(year=1986, month=3, day=1, hour=12, minute=30, second=6) +``` From 652a9cfb3b88828b2ef2c7b01d21a0194d0dc5ff Mon Sep 17 00:00:00 2001 From: Hellow2 Date: Mon, 13 Mar 2023 13:59:15 +0100 Subject: [PATCH 04/42] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5213a74..b678562 100644 --- a/README.md +++ b/README.md @@ -79,7 +79,7 @@ I decided against creating a discord server, due to piracy communities get often This application is $100\%$ centered around Data. Thus the most important thing for working with musik kraken is, to understand how I structured the data. -## quick Overview +## Quick Overview - explanation of the [Data Model](#data-model) - how to use the [Data Objects](#data-objects) From 891f41efef605f5cfc44d8b64cccaa0a6434b0f9 Mon Sep 17 00:00:00 2001 From: Hellow2 Date: Mon, 13 Mar 2023 14:05:03 +0100 Subject: [PATCH 05/42] documented the merging --- documentation/objects.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/documentation/objects.md b/documentation/objects.md index d712a85..dd8de45 100644 --- a/documentation/objects.md +++ b/documentation/objects.md @@ -19,6 +19,12 @@ Additionally it provides an **Interface** to: ### DatabaseObject.merge() +To merge the data of two instances of the same type, the attributes defined in `DatabaseObject.COLLECTION_ATTRIBUTES` and `SIMPLE_ATTRIBUTES` are used. + +The simple attributes just get carried from the other instance, to the self instance. + +The collection attributes appends all elements from other.collection to self.collection, but ofc [checks if already exists](#collection). + ## Collection [music_kraken.objects.Collection](../src/music_kraken/objects/collection.py) From 73e90e467e69506fd30d7b0118a3aa97a00a7a90 Mon Sep 17 00:00:00 2001 From: Hellow2 Date: Mon, 13 Mar 2023 14:14:25 +0100 Subject: [PATCH 06/42] seperated old implementation from readme --- README.md | 308 ++-------------------------- documentation/old_implementation.md | 285 +++++++++++++++++++++++++ 2 files changed, 299 insertions(+), 294 deletions(-) create mode 100644 documentation/old_implementation.md diff --git a/README.md b/README.md index b678562..db0123b 100644 --- a/README.md +++ b/README.md @@ -2,15 +2,19 @@ -1. [Installlation](#installation) -2. [Command Line Usage](#quick-guide) -3. [Contribute](#contribute) -4. [Matrix Space](#matrix-space), if you don't wanna read: **[Invite](https://matrix.to/#/#music-kraken:matrix.org)** - -5. [Library Usage / Python Interface](#programming-interface--use-as-library) -6. [About Metadata](#metadata) -7. [About the Audio](#download) -8. [About the Lyrics](#lyrics) +- [Music Kraken](#music-kraken) + - [Installation](#installation) + - [Notes for Python 3.9](#notes-for-python-39) + - [Notes for WSL](#notes-for-wsl) + - [Quick-Guide](#quick-guide) + - [CONTRIBUTE](#contribute) + - [Matrix Space](#matrix-space) +- [Programming Interface / Use as Library](#programming-interface--use-as-library) + - [Quick Overview](#quick-overview) + - [Data Model](#data-model) + - [Data Objects](#data-objects) + - [Creation](#creation) + - [Appending and Merging data](#appending-and-merging-data) --- @@ -84,6 +88,7 @@ This application is $100\%$ centered around Data. Thus the most important thing - explanation of the [Data Model](#data-model) - how to use the [Data Objects](#data-objects) - further Dokumentation of *hopefully* [most relevant classes](documentation/objects.md) +- the [old implementation](documentation/old_implementation.md) ```mermaid --- @@ -356,290 +361,5 @@ def indexing_values(self) -> List[Tuple[str, object]]: ] ``` - -# Old implementation - -> IF U USE THIS NOW YOU ARE DUMB *no offense thoug*. IT ISN'T FINISHED AND THE STUFF YOU CODE NOW WILL BE BROKEN TOMORROW -> SOON YOU CAN THOUGH - -If you want to use this project, or parts from it in your own projects from it, -make sure to be familiar with [Python Modules](https://docs.python.org/3/tutorial/modules.html). -Further and better documentation including code examples are yet to come, so here is the rough -module structure for now. (Should be up-to-date, but no guarantees) - -If you simply want to run the builtin minimal cli just do this: -```python -from music_kraken import cli - -cli() -``` - -### Search for Metadata - -The whole program takes the data it processes further from the cache, a sqlite database. -So before you can do anything, you will need to fill it with the songs you want to download (*or create song objects manually, but more on that later*). - -For now the base of everything is [musicbrainz][mb], so you need to get the musicbrainz `id` and `type`. The `id` corresponds to either - - an artist - - a release group - - a release - - a recording/track). - -To get this info, you first have to initialize a search object (`music_kraken.MetadataSearch`). - -```python -search_object = music_kraken.MetadataSearch() -``` - -Then you need an initial "text search" to get some options you can choose from. For -this you can either specify artists releases and whatever directly with one of the following functions: - -```python -# you can directly specify artist, release group, release or recording/track -multiple_options = search_object.search_from_text(artist=input("input the name of the artist: ")) -# you can specify a query see the simple integrated cli on how to use the query -multiple_options = search_object.search_from_query(query=input("input the query: ")) -``` - -Both methods return an instance of `MultipleOptions`, which can be directly converted to a string. - -```python -print(multiple_options) -``` - -After the first "*text search*" you can either again search the same way as before, -or you can further explore one of the options from the previous search. -To explore and select one options from `MultipleOptions`, simply call `MetadataSearch.choose(self, index: int)`. -The index represents the number in the previously returned instance of MultipleOptions. -The selected Option will be selected and can be downloaded in the next step. - -*Thus, this has to be done **after either search_from_text or search_from_query*** - -```python -# choosing the best matching band -multiple_options = search_object.choose(0) -# choosing the first ever release group of this band -multiple_options = search_object.choose(1) -# printing out the current options -print(multiple_options) -``` - -This process can be repeated indefinitely (until you run out of memory). -A search history is kept in the Search instance. You could go back to -the previous search (without any loading time) like this: - -```python -multiple_options = search_object.get_previous_options() -``` - -### Downloading Metadata / Filling up the Cache - -You can download following metadata: - - an artist (the whole discography) - - a release group - - a release - - a track/recording - -If you got an instance of `MetadataSearch`, like I elaborated [previously](#search-for-metadata), downloading every piece of metadata from the currently selected Option is really quite easy. - -```python -from music_kraken import fetch_metadata_from_search - -# this is it :) -music_kraken.fetch_metadata_from_search(search_object) -``` - -If you already know what you want to download you can skip the search instance and simply do the following. - -```python -from music_kraken import fetch_metadata - -# might change and break after I add multiple metadata sources which I will - -fetch_metadata(id_=musicbrainz_id, type=metadata_type) -``` -The option type is a string (*I'm sorry for not making it an enum I know its a bad pratice*), which can -have following values: - - 'artist' - - 'release_group' - - 'release' - - 'recording' - -**PAY ATTENTION TO TYPOS, IT'S CASE SENSITIVE** - -The musicbrainz id is just the id of the object from musicbrainz. - -After following those steps, it might take a couple seconds/minutes to execute, but then the Cache will be filled. - - -### Cache / Temporary Database - -All the data, the functions that download stuff use, can be gotten from the temporary database / cache. -The cache can be simply used like this: - -```python -music_kraken.test_db -``` - -When fetching any song data from the cache, you will get it as Song -object (music_kraken.Song). There are multiple methods -to get different sets of Songs. The names explain the methods pretty -well: - -```python -from music_kraken import cache - -# gets a single track specified by the id -cache.get_track_metadata(id: str) - -# gets a list of tracks. -cache.get_tracks_to_download() -cache.get_tracks_without_src() -cache.get_tracks_without_isrc() -cache.get_tracks_without_filepath() -``` - -The id always is a musicbrainz id and distinct for every track. - -### Setting the Target - -By default the music downloader doesn't know where to save the music file, if downloaded. To set those variables (the directory to save the file in and the filepath), it is enough to run one single command: - -```python -from music_kraken import set_target - -# adds file path, file directory and the genre to the database -set_target(genre="some test genre") -``` - -The concept of genres is too loose, to definitely say, this band exclusively plays this genre, or this song is this genre. This doesn't work manually, this will never work automatically. Thus, I've decided to just use the genre as category, to sort the artists and songs by. Most Music players support that. - -As a result of this decision you will have to pass the genre in this function. - -### Get Audio - -This is most likely the most useful and unique feature of this Project. If the cache is filled, you can get audio sources for the songs you only have the metadata, and download them. This works for most songs. I'd guess for about 97% (?) - -First of you will need a List of song objects `music_kraken.Song`. As [mentioned above](#cache--temporary-database), you could get a list like that from the cache. - -```python -# Here is an Example -from music_kraken import ( - cache, - fetch_sources, - fetch_audios -) - -# scanning pages, searching for a download and storing results -fetch_sources(cache.get_tracks_without_src()) - -# downloading all previously fetched sources to previously defined targets -fetch_audios(cache.get_tracks_to_download()) - -``` - -*Note:* -To download audio two cases have to be met: - 1. [The target](#setting-the-target) has to be set beforehand - 2. The sources have to be fetched beforehand - ---- - -## Metadata - -First the metadata has to be downloaded. The best api to do so is undeniably [Musicbrainz][mb]. This is a result of them being a website with a large Database spanning over all Genres. - -### Musicbrainz - -![Musicbrainz Data Scheme](https://wiki.musicbrainz.org/-/images/9/9e/pymb3-model-core.png) - -To fetch from [Musicbrainz][mb] we first have to know what to fetch. A good start is to get an input query, which can be just put into the MB-Api. It then returns a list of possible artists, releases and recordings. - -If the following chosen element is an artist, its discography + a couple tracks are printed, if a release is chosen, the artists + tracklist + release is outputted, If a track is chosen its artists and releases are shown. - -For now, it doesn't if the discography or tracklist is chosen. - -### Metadata to fetch - -I orient on which metadata to download on the keys in `mutagen.EasyID3`. The following I fetch and tag the MP3 with: -- title -- artist -- albumartist -- tracknumber -- albumsort can sort albums cronological -- titlesort is just set to the tracknumber to sort by track order to sort correctly -- isrc -- musicbrainz_artistid -- musicbrainz_albumid -- musicbrainz_albumartistid -- musicbrainz_albumstatus -- language -- musicbrainz_albumtype -- releasecountry -- barcode - -#### albumsort/titlesort - -Those Tags are for the musicplayer to not sort for Example the albums of a band alphabetically, but in another way. I set it just to chronological order - -#### isrc - -This is the **international standart release code**. With this a track can be identified 99% of the time, if it is known and the website has a search api for that. Obviously this will get important later. - -## Download - -Now that the metadata is downloaded and cached, download sources need to be sound, because one can't listen to metadata. Granted it would be amazing if that would be possible. - -### Musify - -The quickest source to get download links from is to my knowledge [musify](https://musify.club/). It's a Russian music downloading page, where many many songs are available to stream and to download. Due to me not wanting to stress the server to much, I abuse a handy feature nearly every page where you can search suff has. The autocomplete api for the search input. Those always are quite limited in the number of results it returns, but it is optimized to be quick. Thus with the http header `Connection` set to `keep-alive` the bottleneck definitely is not at the speed of those requests. - -For musify the endpoint is following: [https://musify.club/search/suggestions?term={title}](https://musify.club/search/suggestions?term=LornaShore) If the http headers are set correctly, then searching for example for "Lorna Shore" yields following result: - -```json -[ - { - "id":"Lorna Shore", - "label":"Lorna Shore", - "value":"Lorna Shore", - "category":"Исполнители", - "image":"https://39s.musify.club/img/68/9561484/25159224.jpg", - "url":"/artist/lorna-shore-59611" - }, - {"id":"Immortal","label":"Lorna Shore - Immortal (2020)","value":"Immortal","category":"Релизы","image":"https://39s-a.musify.club/img/70/20335517/52174338.jpg","url":"/release/lorna-shore-immortal-2020-1241300"}, - {"id":"Immortal","label":"Lorna Shore - Immortal","value":"Immortal","category":"Треки","image":"","url":"/track/lorna-shore-immortal-12475071"} -] -``` - -This is a shortened example for the response the api gives. The results are very Limited, but it is also very efficient to parse. The steps I take are: - -- call the api with the query being the track name -- parse the json response to an object -- look at how different the title and artist are on every element from the category `Треки`, translated roughly to track or release. -- If they match get the download links and cache them. - -### Youtube - -Herte the **isrc** plays a huge role. You probably know it, when you search on youtube for a song, and the music videos has a long intro or the first result is a live version. I don't want those in my music collection, only if the tracks are like this in the official release. Well how can you get around that? - -Turns out if you search for the **isrc** on youtube the results contain the music, like it is on the official release and some japanese meme videos. The tracks I wan't just have the title of the released track, so one can just compare those two. - -For searching, as well as for downloading I use the programm `youtube-dl`, which also has a programming interface for python. - -There are two bottlenecks with this approach though: -1. `youtube-dl` is just slow. Actually it has to be, to not get blocked by youtube. -2. Ofthen musicbrainz just doesn't give the isrc for some songs. - - -## Lyrics - -To get the Lyrics, I scrape them, and put those in the USLT ID3 Tags of for example mp3 files. Unfortunately some players, like the one I use, Rhythmbox don't support USLT Lyrics. So I created an Plugin for Rhythmbox. You can find it here: [https://github.com/HeIIow2/rythmbox-id3-lyrics-support](https://github.com/HeIIow2/rythmbox-id3-lyrics-support). - -### Genius - -For the lyrics source the page [https://genius.com/](https://genius.com/) is easily sufficient. It has most songs. Some songs are not present though, but that is fine, because the lyrics are optional anyways. - - [i10]: https://github.com/HeIIow2/music-downloader/issues/10 [i2]: https://github.com/HeIIow2/music-downloader/issues/2 -[mb]: https://musicbrainz.org/ diff --git a/documentation/old_implementation.md b/documentation/old_implementation.md new file mode 100644 index 0000000..4fe9c41 --- /dev/null +++ b/documentation/old_implementation.md @@ -0,0 +1,285 @@ +# Old implementation + +> IF U USE THIS NOW YOU ARE DUMB *no offense thoug*. IT ISN'T FINISHED AND THE STUFF YOU CODE NOW WILL BE BROKEN TOMORROW +> SOON YOU CAN THOUGH + +If you want to use this project, or parts from it in your own projects from it, +make sure to be familiar with [Python Modules](https://docs.python.org/3/tutorial/modules.html). +Further and better documentation including code examples are yet to come, so here is the rough +module structure for now. (Should be up-to-date, but no guarantees) + +If you simply want to run the builtin minimal cli just do this: +```python +from music_kraken import cli + +cli() +``` + +### Search for Metadata + +The whole program takes the data it processes further from the cache, a sqlite database. +So before you can do anything, you will need to fill it with the songs you want to download (*or create song objects manually, but more on that later*). + +For now the base of everything is [musicbrainz][mb], so you need to get the musicbrainz `id` and `type`. The `id` corresponds to either + - an artist + - a release group + - a release + - a recording/track). + +To get this info, you first have to initialize a search object (`music_kraken.MetadataSearch`). + +```python +search_object = music_kraken.MetadataSearch() +``` + +Then you need an initial "text search" to get some options you can choose from. For +this you can either specify artists releases and whatever directly with one of the following functions: + +```python +# you can directly specify artist, release group, release or recording/track +multiple_options = search_object.search_from_text(artist=input("input the name of the artist: ")) +# you can specify a query see the simple integrated cli on how to use the query +multiple_options = search_object.search_from_query(query=input("input the query: ")) +``` + +Both methods return an instance of `MultipleOptions`, which can be directly converted to a string. + +```python +print(multiple_options) +``` + +After the first "*text search*" you can either again search the same way as before, +or you can further explore one of the options from the previous search. +To explore and select one options from `MultipleOptions`, simply call `MetadataSearch.choose(self, index: int)`. +The index represents the number in the previously returned instance of MultipleOptions. +The selected Option will be selected and can be downloaded in the next step. + +*Thus, this has to be done **after either search_from_text or search_from_query*** + +```python +# choosing the best matching band +multiple_options = search_object.choose(0) +# choosing the first ever release group of this band +multiple_options = search_object.choose(1) +# printing out the current options +print(multiple_options) +``` + +This process can be repeated indefinitely (until you run out of memory). +A search history is kept in the Search instance. You could go back to +the previous search (without any loading time) like this: + +```python +multiple_options = search_object.get_previous_options() +``` + +### Downloading Metadata / Filling up the Cache + +You can download following metadata: + - an artist (the whole discography) + - a release group + - a release + - a track/recording + +If you got an instance of `MetadataSearch`, like I elaborated [previously](#search-for-metadata), downloading every piece of metadata from the currently selected Option is really quite easy. + +```python +from music_kraken import fetch_metadata_from_search + +# this is it :) +music_kraken.fetch_metadata_from_search(search_object) +``` + +If you already know what you want to download you can skip the search instance and simply do the following. + +```python +from music_kraken import fetch_metadata + +# might change and break after I add multiple metadata sources which I will + +fetch_metadata(id_=musicbrainz_id, type=metadata_type) +``` +The option type is a string (*I'm sorry for not making it an enum I know its a bad pratice*), which can +have following values: + - 'artist' + - 'release_group' + - 'release' + - 'recording' + +**PAY ATTENTION TO TYPOS, IT'S CASE SENSITIVE** + +The musicbrainz id is just the id of the object from musicbrainz. + +After following those steps, it might take a couple seconds/minutes to execute, but then the Cache will be filled. + + +### Cache / Temporary Database + +All the data, the functions that download stuff use, can be gotten from the temporary database / cache. +The cache can be simply used like this: + +```python +music_kraken.test_db +``` + +When fetching any song data from the cache, you will get it as Song +object (music_kraken.Song). There are multiple methods +to get different sets of Songs. The names explain the methods pretty +well: + +```python +from music_kraken import cache + +# gets a single track specified by the id +cache.get_track_metadata(id: str) + +# gets a list of tracks. +cache.get_tracks_to_download() +cache.get_tracks_without_src() +cache.get_tracks_without_isrc() +cache.get_tracks_without_filepath() +``` + +The id always is a musicbrainz id and distinct for every track. + +### Setting the Target + +By default the music downloader doesn't know where to save the music file, if downloaded. To set those variables (the directory to save the file in and the filepath), it is enough to run one single command: + +```python +from music_kraken import set_target + +# adds file path, file directory and the genre to the database +set_target(genre="some test genre") +``` + +The concept of genres is too loose, to definitely say, this band exclusively plays this genre, or this song is this genre. This doesn't work manually, this will never work automatically. Thus, I've decided to just use the genre as category, to sort the artists and songs by. Most Music players support that. + +As a result of this decision you will have to pass the genre in this function. + +### Get Audio + +This is most likely the most useful and unique feature of this Project. If the cache is filled, you can get audio sources for the songs you only have the metadata, and download them. This works for most songs. I'd guess for about 97% (?) + +First of you will need a List of song objects `music_kraken.Song`. As [mentioned above](#cache--temporary-database), you could get a list like that from the cache. + +```python +# Here is an Example +from music_kraken import ( + cache, + fetch_sources, + fetch_audios +) + +# scanning pages, searching for a download and storing results +fetch_sources(cache.get_tracks_without_src()) + +# downloading all previously fetched sources to previously defined targets +fetch_audios(cache.get_tracks_to_download()) + +``` + +*Note:* +To download audio two cases have to be met: + 1. [The target](#setting-the-target) has to be set beforehand + 2. The sources have to be fetched beforehand + +--- + +## Metadata + +First the metadata has to be downloaded. The best api to do so is undeniably [Musicbrainz][mb]. This is a result of them being a website with a large Database spanning over all Genres. + +### Musicbrainz + +![Musicbrainz Data Scheme](https://wiki.musicbrainz.org/-/images/9/9e/pymb3-model-core.png) + +To fetch from [Musicbrainz][mb] we first have to know what to fetch. A good start is to get an input query, which can be just put into the MB-Api. It then returns a list of possible artists, releases and recordings. + +If the following chosen element is an artist, its discography + a couple tracks are printed, if a release is chosen, the artists + tracklist + release is outputted, If a track is chosen its artists and releases are shown. + +For now, it doesn't if the discography or tracklist is chosen. + +### Metadata to fetch + +I orient on which metadata to download on the keys in `mutagen.EasyID3`. The following I fetch and tag the MP3 with: +- title +- artist +- albumartist +- tracknumber +- albumsort can sort albums cronological +- titlesort is just set to the tracknumber to sort by track order to sort correctly +- isrc +- musicbrainz_artistid +- musicbrainz_albumid +- musicbrainz_albumartistid +- musicbrainz_albumstatus +- language +- musicbrainz_albumtype +- releasecountry +- barcode + +#### albumsort/titlesort + +Those Tags are for the musicplayer to not sort for Example the albums of a band alphabetically, but in another way. I set it just to chronological order + +#### isrc + +This is the **international standart release code**. With this a track can be identified 99% of the time, if it is known and the website has a search api for that. Obviously this will get important later. + +## Download + +Now that the metadata is downloaded and cached, download sources need to be sound, because one can't listen to metadata. Granted it would be amazing if that would be possible. + +### Musify + +The quickest source to get download links from is to my knowledge [musify](https://musify.club/). It's a Russian music downloading page, where many many songs are available to stream and to download. Due to me not wanting to stress the server to much, I abuse a handy feature nearly every page where you can search suff has. The autocomplete api for the search input. Those always are quite limited in the number of results it returns, but it is optimized to be quick. Thus with the http header `Connection` set to `keep-alive` the bottleneck definitely is not at the speed of those requests. + +For musify the endpoint is following: [https://musify.club/search/suggestions?term={title}](https://musify.club/search/suggestions?term=LornaShore) If the http headers are set correctly, then searching for example for "Lorna Shore" yields following result: + +```json +[ + { + "id":"Lorna Shore", + "label":"Lorna Shore", + "value":"Lorna Shore", + "category":"Исполнители", + "image":"https://39s.musify.club/img/68/9561484/25159224.jpg", + "url":"/artist/lorna-shore-59611" + }, + {"id":"Immortal","label":"Lorna Shore - Immortal (2020)","value":"Immortal","category":"Релизы","image":"https://39s-a.musify.club/img/70/20335517/52174338.jpg","url":"/release/lorna-shore-immortal-2020-1241300"}, + {"id":"Immortal","label":"Lorna Shore - Immortal","value":"Immortal","category":"Треки","image":"","url":"/track/lorna-shore-immortal-12475071"} +] +``` + +This is a shortened example for the response the api gives. The results are very Limited, but it is also very efficient to parse. The steps I take are: + +- call the api with the query being the track name +- parse the json response to an object +- look at how different the title and artist are on every element from the category `Треки`, translated roughly to track or release. +- If they match get the download links and cache them. + +### Youtube + +Herte the **isrc** plays a huge role. You probably know it, when you search on youtube for a song, and the music videos has a long intro or the first result is a live version. I don't want those in my music collection, only if the tracks are like this in the official release. Well how can you get around that? + +Turns out if you search for the **isrc** on youtube the results contain the music, like it is on the official release and some japanese meme videos. The tracks I wan't just have the title of the released track, so one can just compare those two. + +For searching, as well as for downloading I use the programm `youtube-dl`, which also has a programming interface for python. + +There are two bottlenecks with this approach though: +1. `youtube-dl` is just slow. Actually it has to be, to not get blocked by youtube. +2. Ofthen musicbrainz just doesn't give the isrc for some songs. + + +## Lyrics + +To get the Lyrics, I scrape them, and put those in the USLT ID3 Tags of for example mp3 files. Unfortunately some players, like the one I use, Rhythmbox don't support USLT Lyrics. So I created an Plugin for Rhythmbox. You can find it here: [https://github.com/HeIIow2/rythmbox-id3-lyrics-support](https://github.com/HeIIow2/rythmbox-id3-lyrics-support). + +### Genius + +For the lyrics source the page [https://genius.com/](https://genius.com/) is easily sufficient. It has most songs. Some songs are not present though, but that is fine, because the lyrics are optional anyways. + + + +[mb]: https://musicbrainz.org/ From a11a4ca4c27d9ce706f853c598b28ac5761cbafe Mon Sep 17 00:00:00 2001 From: Hellow2 Date: Mon, 13 Mar 2023 14:28:28 +0100 Subject: [PATCH 07/42] Update formatted_text.py --- src/music_kraken/objects/formatted_text.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/src/music_kraken/objects/formatted_text.py b/src/music_kraken/objects/formatted_text.py index 3abc6d1..238cc2c 100644 --- a/src/music_kraken/objects/formatted_text.py +++ b/src/music_kraken/objects/formatted_text.py @@ -10,6 +10,10 @@ https://pandoc.org/installing.html class FormattedText: + """ + the self.html value should be saved to the database + """ + doc = None def __init__( @@ -51,23 +55,13 @@ class FormattedText: if self.doc is None: return None return pandoc.write(self.doc, format="plain").strip() - - @property - def json(self) -> str: - if self.doc is None: - return None - return pandoc.write(self.doc, format="json") + plaintext = property(fget=get_plaintext, fset=set_plaintext) markdown = property(fget=get_markdown, fset=set_markdown) html = property(fget=get_html, fset=set_html) -class NotesAttributes: - def __init__(self) -> None: - pass - - if __name__ == "__main__": _plaintext = """ World of Work From 8e6baef85446f333bd79e13b9061e748a3b090b7 Mon Sep 17 00:00:00 2001 From: Hellow2 Date: Mon, 13 Mar 2023 14:30:11 +0100 Subject: [PATCH 08/42] Update lyrics.py --- src/music_kraken/objects/lyrics.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/music_kraken/objects/lyrics.py b/src/music_kraken/objects/lyrics.py index 67931c6..746fc36 100644 --- a/src/music_kraken/objects/lyrics.py +++ b/src/music_kraken/objects/lyrics.py @@ -9,6 +9,9 @@ from .formatted_text import FormattedText class Lyrics(DatabaseObject): + COLLECTION_ATTRIBUTES = ["source_collection"] + SIMPLE_ATTRIBUTES = ["text", "language"] + def __init__( self, text: FormattedText, From 25ddfc4997b6c65f268642c23178ee350db38255 Mon Sep 17 00:00:00 2001 From: Hellow2 Date: Mon, 13 Mar 2023 14:32:00 +0100 Subject: [PATCH 09/42] Update source.py --- src/music_kraken/objects/source.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/music_kraken/objects/source.py b/src/music_kraken/objects/source.py index f95ab80..eb50346 100644 --- a/src/music_kraken/objects/source.py +++ b/src/music_kraken/objects/source.py @@ -55,6 +55,8 @@ class Source(DatabaseObject): Source(src="youtube", url="https://youtu.be/dfnsdajlhkjhsd") ``` """ + COLLECTION_ATTRIBUTES = [] + SIMPLE_ATTRIBUTES = ["type_enum", "page_enum", "url"] def __init__(self, page_enum: SourcePages, url: str, id_: str = None, type_enum=None) -> None: DatabaseObject.__init__(self, id_=id_) From 292bdfd2fade39fb9a0248a8d2e60500ab7bfcb6 Mon Sep 17 00:00:00 2001 From: Hellow2 Date: Mon, 13 Mar 2023 14:33:17 +0100 Subject: [PATCH 10/42] attribute --- src/music_kraken/objects/lyrics.py | 4 ++-- src/music_kraken/objects/source.py | 4 ++-- src/music_kraken/objects/target.py | 1 + 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/music_kraken/objects/lyrics.py b/src/music_kraken/objects/lyrics.py index 746fc36..dc668ed 100644 --- a/src/music_kraken/objects/lyrics.py +++ b/src/music_kraken/objects/lyrics.py @@ -9,8 +9,8 @@ from .formatted_text import FormattedText class Lyrics(DatabaseObject): - COLLECTION_ATTRIBUTES = ["source_collection"] - SIMPLE_ATTRIBUTES = ["text", "language"] + COLLECTION_ATTRIBUTES = ("source_collection",) + SIMPLE_ATTRIBUTES = ("text", "language") def __init__( self, diff --git a/src/music_kraken/objects/source.py b/src/music_kraken/objects/source.py index eb50346..8fe03fc 100644 --- a/src/music_kraken/objects/source.py +++ b/src/music_kraken/objects/source.py @@ -55,8 +55,8 @@ class Source(DatabaseObject): Source(src="youtube", url="https://youtu.be/dfnsdajlhkjhsd") ``` """ - COLLECTION_ATTRIBUTES = [] - SIMPLE_ATTRIBUTES = ["type_enum", "page_enum", "url"] + COLLECTION_ATTRIBUTES = tuple() + SIMPLE_ATTRIBUTES = ("type_enum", "page_enum", "url") def __init__(self, page_enum: SourcePages, url: str, id_: str = None, type_enum=None) -> None: DatabaseObject.__init__(self, id_=id_) diff --git a/src/music_kraken/objects/target.py b/src/music_kraken/objects/target.py index 7d7f7eb..895e367 100644 --- a/src/music_kraken/objects/target.py +++ b/src/music_kraken/objects/target.py @@ -15,6 +15,7 @@ class Target(DatabaseObject): """ SIMPLE_ATTRIBUTES = ("_file", "_path") + COLLECTION_ATTRIBUTES = tuple() def __init__( self, From e445e7b5d1a39fd25b4cf9d0ef7c1053fc86da47 Mon Sep 17 00:00:00 2001 From: Hellow2 Date: Mon, 13 Mar 2023 14:39:46 +0100 Subject: [PATCH 11/42] options --- src/metal_archives.py | 5 +++++ src/music_kraken/objects/__init__.py | 5 ++++- src/music_kraken/pages/abstract.py | 7 ++++--- src/music_kraken/pages/encyclopaedia_metallum.py | 15 ++++++++------- 4 files changed, 21 insertions(+), 11 deletions(-) diff --git a/src/metal_archives.py b/src/metal_archives.py index 99bbec6..78e7b84 100644 --- a/src/metal_archives.py +++ b/src/metal_archives.py @@ -5,9 +5,14 @@ from music_kraken.pages import ( EncyclopaediaMetallum ) +""" results = EncyclopaediaMetallum.search_by_query("#a Only Smile") artist = results[0] artist: objects.Artist = EncyclopaediaMetallum.fetch_details(artist) print(artist.options) print() +""" + +if __name__ == "__main__": + pass diff --git a/src/music_kraken/objects/__init__.py b/src/music_kraken/objects/__init__.py index 2f39411..03907fc 100644 --- a/src/music_kraken/objects/__init__.py +++ b/src/music_kraken/objects/__init__.py @@ -4,7 +4,8 @@ from . import ( source, parents, formatted_text, - album + album, + option ) MusicObject = parents.DatabaseObject @@ -28,3 +29,5 @@ AlbumStatus = album.AlbumStatus Album = song.Album FormattedText = formatted_text.FormattedText + +Options = option.Options diff --git a/src/music_kraken/pages/abstract.py b/src/music_kraken/pages/abstract.py index be08b81..1236738 100644 --- a/src/music_kraken/pages/abstract.py +++ b/src/music_kraken/pages/abstract.py @@ -9,7 +9,8 @@ from ..objects import ( Artist, Lyrics, Target, - MusicObject + MusicObject, + Options ) @@ -69,7 +70,7 @@ class Page: song_str = property(fget=lambda self: self.get_str(self.song)) @classmethod - def search_by_query(cls, query: str) -> List[MusicObject]: + def search_by_query(cls, query: str) -> Options: """ # The Query You can define a new parameter with "#", @@ -84,7 +85,7 @@ class Page: :return possible_music_objects: """ - return [] + return Options() @classmethod def fetch_details(cls, music_object: MusicObject, flat: bool = False) -> MusicObject: diff --git a/src/music_kraken/pages/encyclopaedia_metallum.py b/src/music_kraken/pages/encyclopaedia_metallum.py index dc89db3..f226303 100644 --- a/src/music_kraken/pages/encyclopaedia_metallum.py +++ b/src/music_kraken/pages/encyclopaedia_metallum.py @@ -17,7 +17,8 @@ from ..objects import ( Album, ID3Timestamp, FormattedText, - Label + Label, + Options ) from ..utils import ( string_processing @@ -34,7 +35,7 @@ class EncyclopaediaMetallum(Page): SOURCE_TYPE = SourcePages.ENCYCLOPAEDIA_METALLUM @classmethod - def search_by_query(cls, query: str) -> List[MusicObject]: + def search_by_query(cls, query: str) -> Options: query_obj = cls.Query(query) if query_obj.is_raw: @@ -42,14 +43,14 @@ class EncyclopaediaMetallum(Page): return cls.advanced_search(query_obj) @classmethod - def advanced_search(cls, query: Page.Query) -> List[MusicObject]: + def advanced_search(cls, query: Page.Query) -> Options: if query.song is not None: - return cls.search_for_song(query=query) + return Options(cls.search_for_song(query=query)) if query.album is not None: - return cls.search_for_album(query=query) + return Options(cls.search_for_album(query=query)) if query.artist is not None: - return cls.search_for_artist(query=query) - return [] + return Options(cls.search_for_artist(query=query)) + return Options @classmethod def search_for_song(cls, query: Page.Query) -> List[Song]: From a7de1d2a68a87c612f204ecdd5d79cdd3143c59d Mon Sep 17 00:00:00 2001 From: Hellow2 Date: Mon, 13 Mar 2023 14:47:49 +0100 Subject: [PATCH 12/42] fixed somethin --- src/metal_archives.py | 8 ++------ src/music_kraken/objects/option.py | 6 ++++-- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/metal_archives.py b/src/metal_archives.py index 78e7b84..d3dfd4f 100644 --- a/src/metal_archives.py +++ b/src/metal_archives.py @@ -5,14 +5,10 @@ from music_kraken.pages import ( EncyclopaediaMetallum ) -""" -results = EncyclopaediaMetallum.search_by_query("#a Only Smile") + +results = EncyclopaediaMetallum.search_by_query("#a Happy Days") artist = results[0] artist: objects.Artist = EncyclopaediaMetallum.fetch_details(artist) print(artist.options) print() -""" - -if __name__ == "__main__": - pass diff --git a/src/music_kraken/objects/option.py b/src/music_kraken/objects/option.py index 524596e..a5703d3 100644 --- a/src/music_kraken/objects/option.py +++ b/src/music_kraken/objects/option.py @@ -21,8 +21,10 @@ class Options: return self._data[index].options - def __getitem__(self, item: int) -> 'Options': + def __getitem__(self, item: int) -> 'DatabaseObject': if type(item) != int: raise TypeError("Key needs to be an Integer") + if item >= len(self._data): + raise ValueError("Index out of bounds") - return self.get_next_options(item) + return self._data[item] From 38142df92e29dd777690478035ff662e03dd3225 Mon Sep 17 00:00:00 2001 From: Hellow2 Date: Mon, 13 Mar 2023 15:47:38 +0100 Subject: [PATCH 13/42] Create musify.py --- src/music_kraken/pages/musify.py | 90 ++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 src/music_kraken/pages/musify.py diff --git a/src/music_kraken/pages/musify.py b/src/music_kraken/pages/musify.py new file mode 100644 index 0000000..b359f52 --- /dev/null +++ b/src/music_kraken/pages/musify.py @@ -0,0 +1,90 @@ +from typing import List +import requests +from bs4 import BeautifulSoup +import pycountry + +from ..utils.shared import ( + ENCYCLOPAEDIA_METALLUM_LOGGER as LOGGER +) + +from .abstract import Page +from ..objects import ( + MusicObject, + Artist, + Source, + SourcePages, + Song, + Album, + ID3Timestamp, + FormattedText, + Label, + Options +) +from ..utils import ( + string_processing, + shared +) + + +class EncyclopaediaMetallum(Page): + API_SESSION: requests.Session = requests.Session() + API_SESSION.headers = { + "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:106.0) Gecko/20100101 Firefox/106.0", + "Connection": "keep-alive", + "Referer": "https://musify.club/" + } + API_SESSION.proxies = shared.proxies + + SOURCE_TYPE = SourcePages.MUSIFY + + @classmethod + def search_by_query(cls, query: str) -> Options: + query_obj = cls.Query(query) + + if query_obj.is_raw: + return cls.simple_search(query_obj) + return cls.advanced_search(query_obj) + + @classmethod + def advanced_search(cls, query: Page.Query) -> Options: + if query.song is not None: + return Options(cls.search_for_song(query=query)) + if query.album is not None: + return Options(cls.search_for_album(query=query)) + if query.artist is not None: + return Options(cls.search_for_artist(query=query)) + return Options + + @classmethod + def search_for_song(cls, query: Page.Query) -> List[Song]: + return [] + + @classmethod + def search_for_album(cls, query: Page.Query) -> List[Album]: + return [] + + @classmethod + def search_for_artist(cls, query: Page.Query) -> List[Artist]: + return [] + + @classmethod + def simple_search(cls, query: Page.Query) -> List[Artist]: + return [] + + @classmethod + def fetch_album_details(cls, album: Album, flat: bool = False) -> Album: + + return album + + @classmethod + def fetch_song_details(cls, song: Song, flat: bool = False) -> Song: + source_list = song.source_collection.get_sources_from_page(cls.SOURCE_TYPE) + if len(source_list) == 0: + return song + + """ + TODO + lyrics + """ + + return song From 4056c736b9dcfbbce13956ff9cf6d9c6cb66a273 Mon Sep 17 00:00:00 2001 From: Hellow2 Date: Tue, 14 Mar 2023 11:03:54 +0100 Subject: [PATCH 14/42] made a function to compile --- src/create_custom_objects.py | 7 ++- src/music_kraken/objects/collection.py | 21 +++++++ src/music_kraken/objects/parents.py | 12 ++++ src/music_kraken/objects/song.py | 60 ++++++++++++++++++++ src/music_kraken/recurse/__init__.py | 3 + src/music_kraken/recurse/build.py | 35 ++++++++++++ src/music_kraken/recurse_objects/__init__.py | 0 7 files changed, 137 insertions(+), 1 deletion(-) create mode 100644 src/music_kraken/recurse/__init__.py create mode 100644 src/music_kraken/recurse/build.py delete mode 100644 src/music_kraken/recurse_objects/__init__.py diff --git a/src/create_custom_objects.py b/src/create_custom_objects.py index 3a9c323..1312d19 100644 --- a/src/create_custom_objects.py +++ b/src/create_custom_objects.py @@ -1,4 +1,4 @@ -from music_kraken import objects +from music_kraken import objects, recurse import pycountry @@ -41,6 +41,9 @@ song = objects.Song( objects.SourcePages.ENCYCLOPAEDIA_METALLUM, "https://www.metal-archives.com/bands/I%27m_in_a_Coffin/127727" ) + ], + label_list=[ + objects.Label(name="Depressive records") ] ), objects.Artist(name="some_split_artist") @@ -55,6 +58,8 @@ song = objects.Song( ], ) +song.compile() + print(song.option_string) for album in song.album_collection: print(album.option_string) diff --git a/src/music_kraken/objects/collection.py b/src/music_kraken/objects/collection.py index 0173de0..d70617b 100644 --- a/src/music_kraken/objects/collection.py +++ b/src/music_kraken/objects/collection.py @@ -33,6 +33,7 @@ class Collection: ``` """ self._attribute_to_object_map: Dict[str, Dict[object, DatabaseObject]] = defaultdict(dict) + self._used_ids: set = set() if data is not None: self.extend(data, merge_on_conflict=True) @@ -46,6 +47,8 @@ class Collection: continue self._attribute_to_object_map[name][value] = element + + self._used_ids.add(element.id) def append(self, element: DatabaseObject, merge_on_conflict: bool = True): """ @@ -98,3 +101,21 @@ class Collection: returns a shallow copy of the data list """ return self._data.copy() + + def insecure_append(self, element: DatabaseObject): + if element.id in self._used_ids: + return False + self._used_ids.add(element.id) + + self._data.append(element) + self.map_element(element) + return True + + def insecure_extend(self, element_list: Iterable[DatabaseObject]): + success = False + + for element in element_list: + if self.insecure_append(element): + success = True + + return success diff --git a/src/music_kraken/objects/parents.py b/src/music_kraken/objects/parents.py index c9c5ca9..fbb2993 100644 --- a/src/music_kraken/objects/parents.py +++ b/src/music_kraken/objects/parents.py @@ -82,6 +82,18 @@ class DatabaseObject: @property def option_string(self) -> str: return self.__repr__() + + def compile(self) -> bool: + """ + compiles the recursive structures, + + Args: + traceback (set, optional): Defaults to an empty set. + + Returns: + bool: returns true if id has been found in set + """ + pass class MainObject(DatabaseObject): diff --git a/src/music_kraken/objects/song.py b/src/music_kraken/objects/song.py index 53769fa..252678d 100644 --- a/src/music_kraken/objects/song.py +++ b/src/music_kraken/objects/song.py @@ -83,6 +83,21 @@ class Song(MainObject): self.main_artist_collection = Collection(data=main_artist_list, element_type=Artist) self.feature_artist_collection = Collection(data=feature_artist_list, element_type=Artist) + def compile(self): + album: Album + for album in self.album_collection: + if album.song_collection.insecure_append(self): + album.compile() + + artist: Artist + for artist in self.feature_artist_collection: + if artist.feature_song_collection.insecure_append(self): + artist.compile() + + for artist in self.main_artist_collection: + if artist.main_album_collection.insecure_extend(self.album_collection): + artist.compile() + @property def indexing_values(self) -> List[Tuple[str, object]]: return [ @@ -214,6 +229,24 @@ class Album(MainObject): self.artist_collection: Collection = Collection(data=artist_list, element_type=Artist) self.label_collection: Collection = Collection(data=label_list, element_type=Label) + def compile(self): + song: "Song" + for song in self.song_collection: + if song.album_collection.insecure_append(self): + song.compile() + + artist: Artist + for artist in self.artist_collection: + if artist.main_album_collection.insecure_append(self): + artist.compile() + + label: Label + for label in self.label_collection: + if label.album_collection.insecure_append(self): + label.compile() + + + @property def indexing_values(self) -> List[Tuple[str, object]]: return [ @@ -365,6 +398,22 @@ class Artist(MainObject): self.main_album_collection: Collection = Collection(data=main_album_list, element_type=Album) self.label_collection: Collection = Collection(data=label_list, element_type=Label) + def compile(self): + song: "Song" + for song in self.feature_song_collection: + if song.feature_artist_collection.insecure_append(self): + song.compile() + + album: "Album" + for album in self.main_album_collection: + if album.artist_collection.insecure_append(self): + album.compile() + + label: Label + for label in self.label_collection: + if label.current_artist_collection.insecure_append(self): + label.compile() + @property def indexing_values(self) -> List[Tuple[str, object]]: return [ @@ -485,6 +534,17 @@ class Label(MainObject): self.album_collection: Collection = Collection(data=album_list, element_type=Album) self.current_artist_collection: Collection = Collection(data=current_artist_list, element_type=Artist) + def compile(self) -> bool: + album: Album + for album in self.album_collection: + if album.label_collection.insecure_append(self): + album.compile() + + artist: Artist + for artist in self.current_artist_collection: + if artist.label_collection.insecure_append(self): + artist.compile() + @property def indexing_values(self) -> List[Tuple[str, object]]: return [ diff --git a/src/music_kraken/recurse/__init__.py b/src/music_kraken/recurse/__init__.py new file mode 100644 index 0000000..98e44a1 --- /dev/null +++ b/src/music_kraken/recurse/__init__.py @@ -0,0 +1,3 @@ +from . import build + +Builder = build.Builder diff --git a/src/music_kraken/recurse/build.py b/src/music_kraken/recurse/build.py new file mode 100644 index 0000000..4f77447 --- /dev/null +++ b/src/music_kraken/recurse/build.py @@ -0,0 +1,35 @@ +from .. import objects + +class Builder: + @classmethod + def build_album(cls, album: objects.Album, traceback: set): + print(album.option_string) + if objects.Album in traceback: + return + traceback.add(objects.Album) + + for song in album.song_collection: + song.album_collection.append(album) + + @classmethod + def build_song(cls, song: objects.Song, traceback: set): + print(song.option_string) + if objects.Song in traceback: + return + traceback.add(objects.Song) + + for album in song.album_collection: + album.song_collection.append(song) + cls.build_album(album, traceback) + + for feature_artist in song.feature_artist_collection: + feature_artist.feature_song_collection.append(song) + + @classmethod + def build(cls, data_object: objects.MusicObject): + if isinstance(data_object, objects.Song): + cls.build_song(data_object, set()) + + if isinstance(data_object, objects.Album): + cls.build_album(data_object, set()) + \ No newline at end of file diff --git a/src/music_kraken/recurse_objects/__init__.py b/src/music_kraken/recurse_objects/__init__.py deleted file mode 100644 index e69de29..0000000 From f858b97a17b1f812be6e75741b59aae40578ce93 Mon Sep 17 00:00:00 2001 From: Hellow2 Date: Tue, 14 Mar 2023 12:02:58 +0100 Subject: [PATCH 15/42] made better --- src/create_custom_objects.py | 6 +---- src/metal_archives.py | 2 ++ src/music_kraken/recurse/__init__.py | 3 --- src/music_kraken/recurse/build.py | 35 ---------------------------- 4 files changed, 3 insertions(+), 43 deletions(-) delete mode 100644 src/music_kraken/recurse/__init__.py delete mode 100644 src/music_kraken/recurse/build.py diff --git a/src/create_custom_objects.py b/src/create_custom_objects.py index 1312d19..47017be 100644 --- a/src/create_custom_objects.py +++ b/src/create_custom_objects.py @@ -60,8 +60,4 @@ song = objects.Song( song.compile() -print(song.option_string) -for album in song.album_collection: - print(album.option_string) -for artist in song.main_artist_collection: - print(artist.option_string) +print(song.options) diff --git a/src/metal_archives.py b/src/metal_archives.py index d3dfd4f..1d2f7e6 100644 --- a/src/metal_archives.py +++ b/src/metal_archives.py @@ -10,5 +10,7 @@ results = EncyclopaediaMetallum.search_by_query("#a Happy Days") artist = results[0] artist: objects.Artist = EncyclopaediaMetallum.fetch_details(artist) + +artist.compile() print(artist.options) print() diff --git a/src/music_kraken/recurse/__init__.py b/src/music_kraken/recurse/__init__.py deleted file mode 100644 index 98e44a1..0000000 --- a/src/music_kraken/recurse/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from . import build - -Builder = build.Builder diff --git a/src/music_kraken/recurse/build.py b/src/music_kraken/recurse/build.py deleted file mode 100644 index 4f77447..0000000 --- a/src/music_kraken/recurse/build.py +++ /dev/null @@ -1,35 +0,0 @@ -from .. import objects - -class Builder: - @classmethod - def build_album(cls, album: objects.Album, traceback: set): - print(album.option_string) - if objects.Album in traceback: - return - traceback.add(objects.Album) - - for song in album.song_collection: - song.album_collection.append(album) - - @classmethod - def build_song(cls, song: objects.Song, traceback: set): - print(song.option_string) - if objects.Song in traceback: - return - traceback.add(objects.Song) - - for album in song.album_collection: - album.song_collection.append(song) - cls.build_album(album, traceback) - - for feature_artist in song.feature_artist_collection: - feature_artist.feature_song_collection.append(song) - - @classmethod - def build(cls, data_object: objects.MusicObject): - if isinstance(data_object, objects.Song): - cls.build_song(data_object, set()) - - if isinstance(data_object, objects.Album): - cls.build_album(data_object, set()) - \ No newline at end of file From 4a199547decbdd8aebe5c18af25fdccd10b4245f Mon Sep 17 00:00:00 2001 From: Hellow2 Date: Tue, 14 Mar 2023 13:36:05 +0100 Subject: [PATCH 16/42] fixed --- src/metal_archives.py | 3 +-- src/music_kraken/pages/abstract.py | 12 +++++++++--- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/metal_archives.py b/src/metal_archives.py index 1d2f7e6..523e93f 100644 --- a/src/metal_archives.py +++ b/src/metal_archives.py @@ -6,11 +6,10 @@ from music_kraken.pages import ( ) -results = EncyclopaediaMetallum.search_by_query("#a Happy Days") +results = EncyclopaediaMetallum.search_by_query("#a Ghost Bath") artist = results[0] artist: objects.Artist = EncyclopaediaMetallum.fetch_details(artist) -artist.compile() print(artist.options) print() diff --git a/src/music_kraken/pages/abstract.py b/src/music_kraken/pages/abstract.py index 1236738..a3ca28a 100644 --- a/src/music_kraken/pages/abstract.py +++ b/src/music_kraken/pages/abstract.py @@ -103,13 +103,19 @@ class Page: """ if type(music_object) == Song: - return cls.fetch_song_details(music_object, flat=flat) + song = cls.fetch_song_details(music_object, flat=flat) + song.compile() + return song if type(music_object) == Album: - return cls.fetch_album_details(music_object, flat=flat) + album = cls.fetch_album_details(music_object, flat=flat) + album.compile() + return album if type(music_object) == Artist: - return cls.fetch_artist_details(music_object, flat=flat) + artist = cls.fetch_artist_details(music_object, flat=flat) + artist.compile() + return artist raise NotImplementedError(f"MusicObject {type(music_object)} has not been implemented yet") From f3d9025d0b6a8cf2fc240eccd844a29fe729d0a5 Mon Sep 17 00:00:00 2001 From: Hellow2 Date: Tue, 14 Mar 2023 14:48:03 +0100 Subject: [PATCH 17/42] Update musify.py --- src/music_kraken/pages/musify.py | 91 +++++++++++++++++++++++++------- 1 file changed, 71 insertions(+), 20 deletions(-) diff --git a/src/music_kraken/pages/musify.py b/src/music_kraken/pages/musify.py index b359f52..f7b67b5 100644 --- a/src/music_kraken/pages/musify.py +++ b/src/music_kraken/pages/musify.py @@ -1,7 +1,8 @@ -from typing import List +from typing import List, Optional import requests from bs4 import BeautifulSoup import pycountry +import time from ..utils.shared import ( ENCYCLOPAEDIA_METALLUM_LOGGER as LOGGER @@ -24,6 +25,9 @@ from ..utils import ( string_processing, shared ) +from ..utils.shared import ( + MUSIFY_LOGGER as LOGGER +) class EncyclopaediaMetallum(Page): @@ -42,33 +46,80 @@ class EncyclopaediaMetallum(Page): query_obj = cls.Query(query) if query_obj.is_raw: - return cls.simple_search(query_obj) - return cls.advanced_search(query_obj) + return cls.plaintext_search(query_obj.query) + return cls.plaintext_search(cls.get_plaintext_query(query_obj)) @classmethod - def advanced_search(cls, query: Page.Query) -> Options: - if query.song is not None: - return Options(cls.search_for_song(query=query)) - if query.album is not None: - return Options(cls.search_for_album(query=query)) - if query.artist is not None: - return Options(cls.search_for_artist(query=query)) - return Options + def get_plaintext_query(cls, query: Page.Query) -> str: + if query.album is None: + return f"{query.artist or '*'} - {query.song or '*'}" + return f"{query.artist or '*'} - {query.album * '*'} - {query.song or '*'}" @classmethod - def search_for_song(cls, query: Page.Query) -> List[Song]: - return [] + def get_soup_of_search(cls, query: str, trie=0) -> Optional[BeautifulSoup]: + url = f"https://musify.club/search?searchText={query}" + LOGGER.debug(f"Trying to get soup from {url}") + try: + r = cls.API_SESSION.get(url, timeout=15) + except requests.exceptions.Timeout: + return None + if r.status_code != 200: + if r.status_code in [503] and trie < cls.TRIES: + LOGGER.warning(f"youtube blocked downloading. ({trie}-{cls.TRIES})") + LOGGER.warning(f"retrying in {cls.TIMEOUT} seconds again") + time.sleep(cls.TIMEOUT) + return cls.get_soup_of_search(query, trie=trie + 1) + + LOGGER.warning("too many tries, returning") + return None + return BeautifulSoup(r.content, features="html.parser") @classmethod - def search_for_album(cls, query: Page.Query) -> List[Album]: - return [] + def plaintext_search(cls, query: Page.Query) -> List[MusicObject]: + search_soup = cls.get_soup_of_search(query=query) + if search_soup is None: + return None + + # album and songs + # child of div class: contacts row + for contact_container_soup in search_soup.find_all("div", {"class": ["contacts", "row"]}): + pass + + # song + # div class: playlist__item + for playlist_soup in search_soup.find_all("div", {"class": "playlist"}): + pass - @classmethod - def search_for_artist(cls, query: Page.Query) -> List[Artist]: - return [] + # get the soup of the container with all track results + tracklist_container_soup = search_soup.find_all("div", {"class": "playlist"}) + if len(tracklist_container_soup) == 0: + return [] + if len(tracklist_container_soup) != 1: + LOGGER.warning("HTML Layout of https://musify.club/ changed. (or bug)") + tracklist_container_soup = tracklist_container_soup[0] + + tracklist_soup = tracklist_container_soup.find_all("div", {"class": "playlist__details"}) + + def parse_track_soup(_track_soup): + anchor_soups = _track_soup.find_all("a") + artist_ = anchor_soups[0].text.strip() + track_ = anchor_soups[1].text.strip() + url_ = anchor_soups[1]['href'] + return artist_, track_, url_ + + # check each track in the container, if they match + for track_soup in tracklist_soup: + artist_option, title_option, track_url = parse_track_soup(track_soup) + + title_match, title_distance = phonetic_compares.match_titles(title, title_option) + artist_match, artist_distance = phonetic_compares.match_artists(artist, artist_option) + + logging.debug(f"{(title, title_option, title_match, title_distance)}") + logging.debug(f"{(artist, artist_option, artist_match, artist_distance)}") + + if not title_match and not artist_match: + return cls.get_download_link(track_url) - @classmethod - def simple_search(cls, query: Page.Query) -> List[Artist]: return [] @classmethod From 03d78e5f984742ca96ceaf27190f362476e329a6 Mon Sep 17 00:00:00 2001 From: Hellow2 Date: Tue, 14 Mar 2023 14:58:54 +0100 Subject: [PATCH 18/42] Update musify.py --- src/music_kraken/pages/musify.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/music_kraken/pages/musify.py b/src/music_kraken/pages/musify.py index f7b67b5..72b1fcb 100644 --- a/src/music_kraken/pages/musify.py +++ b/src/music_kraken/pages/musify.py @@ -73,9 +73,12 @@ class EncyclopaediaMetallum(Page): LOGGER.warning("too many tries, returning") return None return BeautifulSoup(r.content, features="html.parser") + + @classmethod + def parse_contact_container(cls,) @classmethod - def plaintext_search(cls, query: Page.Query) -> List[MusicObject]: + def plaintext_search(cls, query: str) -> List[MusicObject]: search_soup = cls.get_soup_of_search(query=query) if search_soup is None: return None From 9c438588a50aa6951a27cb8448b529abd78e6b03 Mon Sep 17 00:00:00 2001 From: Hellow2 Date: Wed, 15 Mar 2023 20:55:28 +0100 Subject: [PATCH 19/42] df --- src/music_kraken/pages/__init__.py | 7 ++- src/music_kraken/pages/musify.py | 82 +++++++++++++++++++++++++++--- src/musify_search.py | 13 +++++ 3 files changed, 93 insertions(+), 9 deletions(-) create mode 100644 src/musify_search.py diff --git a/src/music_kraken/pages/__init__.py b/src/music_kraken/pages/__init__.py index 614efb1..cb46e45 100644 --- a/src/music_kraken/pages/__init__.py +++ b/src/music_kraken/pages/__init__.py @@ -1,11 +1,14 @@ from .encyclopaedia_metallum import EncyclopaediaMetallum +from .musify import Musify EncyclopaediaMetallum = EncyclopaediaMetallum +Musify = Musify MetadataPages = { - EncyclopaediaMetallum + EncyclopaediaMetallum, + Musify } AudioPages = { - + Musify } diff --git a/src/music_kraken/pages/musify.py b/src/music_kraken/pages/musify.py index 72b1fcb..baa73ed 100644 --- a/src/music_kraken/pages/musify.py +++ b/src/music_kraken/pages/musify.py @@ -1,4 +1,4 @@ -from typing import List, Optional +from typing import List, Optional, Union import requests from bs4 import BeautifulSoup import pycountry @@ -30,7 +30,7 @@ from ..utils.shared import ( ) -class EncyclopaediaMetallum(Page): +class Musify(Page): API_SESSION: requests.Session = requests.Session() API_SESSION.headers = { "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:106.0) Gecko/20100101 Firefox/106.0", @@ -40,6 +40,8 @@ class EncyclopaediaMetallum(Page): API_SESSION.proxies = shared.proxies SOURCE_TYPE = SourcePages.MUSIFY + + HOST = "https://musify.club" @classmethod def search_by_query(cls, query: str) -> Options: @@ -75,24 +77,89 @@ class EncyclopaediaMetallum(Page): return BeautifulSoup(r.content, features="html.parser") @classmethod - def parse_contact_container(cls,) + def parse_artist_contact(cls, contact: BeautifulSoup) -> Artist: + source_list: List[Source] = [] + name = "" + _id = None + + # source + anchor = contact.find("a") + if anchor is not None: + href = anchor.get("href") + name = anchor.get("title") + + if "-" in href: + _id = href.split("-")[-1] + + source_list.append(Source(cls.SOURCE_TYPE, cls.HOST + href)) + + # artist image + image_soup = contact.find("img") + if image_soup is not None: + alt = image_soup.get("alt") + if alt is not None: + name = alt + + artist_thumbnail = image_soup.get("src") + + return Artist( + _id=_id, + name=name, + source_list=source_list + ) + + @classmethod + def parse_album_contact(cls, contact: BeautifulSoup) -> Album: + print(contact) + return Album(title="") + + @classmethod + def parse_contact_container(cls, contact_container_soup: BeautifulSoup) -> List[Union[Artist, Album]]: + # print(contact_container_soup.prettify) + contacts = [] + + # print(contact_container_soup) + + contact: BeautifulSoup + for contact in contact_container_soup.find_all("div", {"class": "contacts__item"}): + # print(contact) + + anchor_soup = contact.find("a") + if anchor_soup is not None: + url = anchor_soup.get("href") + if url is not None: + print(url) + if "artist" in url: + contacts.append(cls.parse_artist_contact(contact)) + elif "release" in url: + contacts.append(cls.parse_album_contact(contact)) + break + return contacts + + @classmethod + def parse_playlist_soup(cls, playlist_soup: BeautifulSoup) -> List[Song]: + # print(playlist_soup.prettify) + return [] @classmethod def plaintext_search(cls, query: str) -> List[MusicObject]: + search_results = [] + search_soup = cls.get_soup_of_search(query=query) if search_soup is None: return None # album and songs # child of div class: contacts row - for contact_container_soup in search_soup.find_all("div", {"class": ["contacts", "row"]}): - pass + for contact_container_soup in search_soup.find_all("div", {"class": "contacts"}): + search_results.extend(cls.parse_contact_container(contact_container_soup)) # song # div class: playlist__item for playlist_soup in search_soup.find_all("div", {"class": "playlist"}): - pass + search_results.extend(cls.parse_playlist_soup(playlist_soup)) + """ # get the soup of the container with all track results tracklist_container_soup = search_soup.find_all("div", {"class": "playlist"}) if len(tracklist_container_soup) == 0: @@ -122,8 +189,9 @@ class EncyclopaediaMetallum(Page): if not title_match and not artist_match: return cls.get_download_link(track_url) + """ - return [] + return search_results @classmethod def fetch_album_details(cls, album: Album, flat: bool = False) -> Album: diff --git a/src/musify_search.py b/src/musify_search.py new file mode 100644 index 0000000..5b28d5c --- /dev/null +++ b/src/musify_search.py @@ -0,0 +1,13 @@ +from music_kraken import objects +from music_kraken.pages import Musify + + +results = Musify.search_by_query("#a Ghost Bath") +print(results) +exit() + +artist = results[0] +artist: objects.Artist = Musify.fetch_details(artist) + +print(artist.options) +print() From c43ff6df69916e635d459ea28fc4a890516f8165 Mon Sep 17 00:00:00 2001 From: Hellow2 Date: Thu, 16 Mar 2023 14:36:49 +0100 Subject: [PATCH 20/42] continued musify scraper --- src/music_kraken/objects/parents.py | 4 + src/music_kraken/pages/musify.py | 154 ++++++++++++++++++++-------- 2 files changed, 117 insertions(+), 41 deletions(-) diff --git a/src/music_kraken/objects/parents.py b/src/music_kraken/objects/parents.py index fbb2993..c9993b7 100644 --- a/src/music_kraken/objects/parents.py +++ b/src/music_kraken/objects/parents.py @@ -14,12 +14,15 @@ class DatabaseObject: SIMPLE_ATTRIBUTES: tuple = tuple() def __init__(self, _id: str = None, dynamic: bool = False, **kwargs) -> None: + self.automatic_id: bool = False + if _id is None and not dynamic: """ generates a random UUID https://docs.python.org/3/library/uuid.html """ _id = str(uuid.uuid4()) + self.automatic_id = True LOGGER.debug(f"id for {type(self).__name__} isn't set. Setting to {_id}") # The id can only be None, if the object is dynamic (self.dynamic = True) @@ -71,6 +74,7 @@ class DatabaseObject: if override or getattr(self, simple_attribute) is None: setattr(self, simple_attribute, getattr(other, simple_attribute)) + @property def metadata(self) -> Metadata: return Metadata() diff --git a/src/music_kraken/pages/musify.py b/src/music_kraken/pages/musify.py index baa73ed..91f500c 100644 --- a/src/music_kraken/pages/musify.py +++ b/src/music_kraken/pages/musify.py @@ -55,7 +55,7 @@ class Musify(Page): def get_plaintext_query(cls, query: Page.Query) -> str: if query.album is None: return f"{query.artist or '*'} - {query.song or '*'}" - return f"{query.artist or '*'} - {query.album * '*'} - {query.song or '*'}" + return f"{query.artist or '*'} - {query.album or '*'} - {query.song or '*'}" @classmethod def get_soup_of_search(cls, query: str, trie=0) -> Optional[BeautifulSoup]: @@ -110,30 +110,134 @@ class Musify(Page): @classmethod def parse_album_contact(cls, contact: BeautifulSoup) -> Album: - print(contact) - return Album(title="") + """ + parsing following html: + + ```html + + ``` + """ + + source_list: List[Source] = [] + title = "" + _id = None + year = None + artist_list: List[Artist] = [] + + def parse_title_date(title_date: Optional[str], delimiter: str = " - "): + if title_date is None: + return + + title_date = title_date.strip() + split_attr = title_date.split(delimiter) + + if len(split_attr) < 2: + return + if not split_attr[-1].isdigit(): + return + + year = int(split_attr[-1]) + title = delimiter.join(split_attr[:-1]) + + # source + anchor = contact.find("a") + if anchor is not None: + href = anchor.get("href") + + # get the title and year + parse_title_date(anchor.get("title")) + + + if "-" in href: + _id = href.split("-")[-1] + + source_list.append(Source(cls.SOURCE_TYPE, cls.HOST + href)) + + # cover art + image_soup = contact.find("img") + if image_soup is not None: + alt = image_soup.get("alt") + if alt is not None: + title = alt + + cover_art = image_soup.get("src") + + contact_info_soup = contact.find("div", {"class": "contacts__info"}) + if contact_info_soup is not None: + """ + Ghost Bath - 2013 + Ghost Bath + Треков: 4 + 9,04 + """ + + title_soup = contact_info_soup.find("strong") + if title_soup is None: + parse_title_date(title_soup) + + small_list = contact_info_soup.find_all("small") + if len(small_list) == 3: + # artist + artist_soup: BeautifulSoup = small_list[0] + raw_artist_str = artist_soup.text + + for artist_str in raw_artist_str.split("&\r\n"): + artist_str = artist_str.rstrip("& ...\r\n") + artist_str = artist_str.strip() + + artist_list.append(Artist(name=artist_str)) + + track_count_soup: BeautifulSoup = small_list[1] + rating_soup: BeautifulSoup = small_list[2] + else: + LOGGER.warning("got an unequal ammount than 3 small elements") + + + + return Album( + _id=_id, + title=title, + source_list=source_list, + date=ID3Timestamp(year=year), + artist_list=artist_list + ) @classmethod def parse_contact_container(cls, contact_container_soup: BeautifulSoup) -> List[Union[Artist, Album]]: - # print(contact_container_soup.prettify) + #print(contact_container_soup.prettify) contacts = [] # print(contact_container_soup) contact: BeautifulSoup for contact in contact_container_soup.find_all("div", {"class": "contacts__item"}): - # print(contact) anchor_soup = contact.find("a") + if anchor_soup is not None: url = anchor_soup.get("href") + if url is not None: - print(url) + #print(url) if "artist" in url: contacts.append(cls.parse_artist_contact(contact)) elif "release" in url: contacts.append(cls.parse_album_contact(contact)) - break return contacts @classmethod @@ -142,7 +246,7 @@ class Musify(Page): return [] @classmethod - def plaintext_search(cls, query: str) -> List[MusicObject]: + def plaintext_search(cls, query: str) -> Options: search_results = [] search_soup = cls.get_soup_of_search(query=query) @@ -159,39 +263,7 @@ class Musify(Page): for playlist_soup in search_soup.find_all("div", {"class": "playlist"}): search_results.extend(cls.parse_playlist_soup(playlist_soup)) - """ - # get the soup of the container with all track results - tracklist_container_soup = search_soup.find_all("div", {"class": "playlist"}) - if len(tracklist_container_soup) == 0: - return [] - if len(tracklist_container_soup) != 1: - LOGGER.warning("HTML Layout of https://musify.club/ changed. (or bug)") - tracklist_container_soup = tracklist_container_soup[0] - - tracklist_soup = tracklist_container_soup.find_all("div", {"class": "playlist__details"}) - - def parse_track_soup(_track_soup): - anchor_soups = _track_soup.find_all("a") - artist_ = anchor_soups[0].text.strip() - track_ = anchor_soups[1].text.strip() - url_ = anchor_soups[1]['href'] - return artist_, track_, url_ - - # check each track in the container, if they match - for track_soup in tracklist_soup: - artist_option, title_option, track_url = parse_track_soup(track_soup) - - title_match, title_distance = phonetic_compares.match_titles(title, title_option) - artist_match, artist_distance = phonetic_compares.match_artists(artist, artist_option) - - logging.debug(f"{(title, title_option, title_match, title_distance)}") - logging.debug(f"{(artist, artist_option, artist_match, artist_distance)}") - - if not title_match and not artist_match: - return cls.get_download_link(track_url) - """ - - return search_results + return Options(search_results) @classmethod def fetch_album_details(cls, album: Album, flat: bool = False) -> Album: From 137c69399cf05d2596ce8fa1e5de4cacf02549fb Mon Sep 17 00:00:00 2001 From: Hellow2 Date: Thu, 16 Mar 2023 16:57:43 +0100 Subject: [PATCH 21/42] pretty much finished musify search --- src/music_kraken/pages/musify.py | 80 ++++++++++++++++++++++++++++++-- src/musify_search.py | 2 +- 2 files changed, 77 insertions(+), 5 deletions(-) diff --git a/src/music_kraken/pages/musify.py b/src/music_kraken/pages/musify.py index 91f500c..defd77e 100644 --- a/src/music_kraken/pages/musify.py +++ b/src/music_kraken/pages/musify.py @@ -67,7 +67,7 @@ class Musify(Page): return None if r.status_code != 200: if r.status_code in [503] and trie < cls.TRIES: - LOGGER.warning(f"youtube blocked downloading. ({trie}-{cls.TRIES})") + LOGGER.warning(f"{cls.__name__} blocked downloading. ({trie}-{cls.TRIES})") LOGGER.warning(f"retrying in {cls.TIMEOUT} seconds again") time.sleep(cls.TIMEOUT) return cls.get_soup_of_search(query, trie=trie + 1) @@ -200,13 +200,15 @@ class Musify(Page): artist_str = artist_str.rstrip("& ...\r\n") artist_str = artist_str.strip() + if artist_str.endswith("]") and "[" in artist_str: + artist_str = artist_str.rsplit("[", maxsplit=1)[0] + artist_list.append(Artist(name=artist_str)) track_count_soup: BeautifulSoup = small_list[1] rating_soup: BeautifulSoup = small_list[2] else: LOGGER.warning("got an unequal ammount than 3 small elements") - return Album( @@ -240,10 +242,80 @@ class Musify(Page): contacts.append(cls.parse_album_contact(contact)) return contacts + @classmethod + def parse_playlist_item(cls, playlist_item_soup: BeautifulSoup) -> Song: + _id = None + song_title = playlist_item_soup.get("data-name") or "" + artist_list: List[Artist] = [] + source_list: List[Source] = [] + + # details + playlist_details: BeautifulSoup = playlist_item_soup.find("div", {"class", "playlist__heading"}) + if playlist_details is not None: + anchor_list = playlist_details.find_all("a") + + if len(anchor_list) >= 2: + print(anchor_list) + # artists + artist_anchor: BeautifulSoup + for artist_anchor in anchor_list[:-1]: + _id = None + href = artist_anchor.get("href") + artist_source: Source = Source(cls.SOURCE_TYPE, cls.HOST + href) + if "-" in href: + _id = href.split("-")[-1] + + artist_list.append(Artist( + _id=_id, + name=artist_anchor.get_text(strip=True), + source_list=[artist_source] + )) + + # track + track_soup: BeautifulSoup = anchor_list[-1] + """ + TODO + this anchor text may have something like (feat. some artist) + which is not acceptable + """ + href = track_soup.get("href") + if href is not None: + if "-" in href: + raw_id: str = href.split("-")[-1] + if raw_id.isdigit(): + _id = raw_id + source_list.append(Source(cls.SOURCE_TYPE, cls.HOST + href)) + + else: + LOGGER.warning("there are not enough anchors (2) for artist and track") + LOGGER.warning(str(artist_list)) + + """ + artist_name = playlist_item_soup.get("data-artist") + if artist_name is not None: + artist_list.append(Artist(name=artist_name)) + """ + id_attribute = playlist_item_soup.get("id") + if id_attribute is not None: + raw_id = id_attribute.replace("playerDiv", "") + if raw_id.isdigit(): + _id = raw_id + + return Song( + _id=_id, + title=song_title, + main_artist_list=artist_list, + source_list=source_list + ) + @classmethod def parse_playlist_soup(cls, playlist_soup: BeautifulSoup) -> List[Song]: - # print(playlist_soup.prettify) - return [] + song_list = [] + + for playlist_item_soup in playlist_soup.find_all("div", {"class": "playlist__item"}): + song_list.append(cls.parse_playlist_item(playlist_item_soup)) + + return song_list @classmethod def plaintext_search(cls, query: str) -> Options: diff --git a/src/musify_search.py b/src/musify_search.py index 5b28d5c..3164ad4 100644 --- a/src/musify_search.py +++ b/src/musify_search.py @@ -2,7 +2,7 @@ from music_kraken import objects from music_kraken.pages import Musify -results = Musify.search_by_query("#a Ghost Bath") +results = Musify.search_by_query("#a Lorna Shore #t Wrath") print(results) exit() From c13031f9b3a96e176183f10b10d95b74cefd6903 Mon Sep 17 00:00:00 2001 From: Hellow Date: Thu, 16 Mar 2023 22:52:47 +0100 Subject: [PATCH 22/42] laied out musify apis --- src/music_kraken/pages/musify.py | 22 ++++++++++++++++++++++ src/musify_search.py | 2 +- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/music_kraken/pages/musify.py b/src/music_kraken/pages/musify.py index defd77e..6605063 100644 --- a/src/music_kraken/pages/musify.py +++ b/src/music_kraken/pages/musify.py @@ -29,6 +29,28 @@ from ..utils.shared import ( MUSIFY_LOGGER as LOGGER ) +""" +https://musify.club/artist/ghost-bath-280348?_pjax=#bodyContent +https://musify.club/artist/ghost-bath-280348/releases?_pjax=#bodyContent +https://musify.club/artist/ghost-bath-280348/clips?_pjax=#bodyContent +https://musify.club/artist/ghost-bath-280348/photos?_pjax=#bodyContent + +POST https://musify.club/artist/filtersongs +ID: 280348 +NameForUrl: ghost-bath +Page: 1 +IsAllowed: True +SortOrder.Property: dateCreated +SortOrder.IsAscending: false +X-Requested-With: XMLHttpRequest + +POST https://musify.club/artist/filteralbums +ArtistID: 280348 +SortOrder.Property: dateCreated +SortOrder.IsAscending: false +X-Requested-With: XMLHttpRequest +""" + class Musify(Page): API_SESSION: requests.Session = requests.Session() diff --git a/src/musify_search.py b/src/musify_search.py index 3164ad4..5b28d5c 100644 --- a/src/musify_search.py +++ b/src/musify_search.py @@ -2,7 +2,7 @@ from music_kraken import objects from music_kraken.pages import Musify -results = Musify.search_by_query("#a Lorna Shore #t Wrath") +results = Musify.search_by_query("#a Ghost Bath") print(results) exit() From 7b110983c2086acd76d60cddf468fa120cf4c17c Mon Sep 17 00:00:00 2001 From: Hellow2 Date: Fri, 17 Mar 2023 12:31:56 +0100 Subject: [PATCH 23/42] musify --- src/music_kraken/pages/musify.py | 151 +++++++++++++++++++++++++++++++ src/musify_search.py | 21 +++-- 2 files changed, 165 insertions(+), 7 deletions(-) diff --git a/src/music_kraken/pages/musify.py b/src/music_kraken/pages/musify.py index 6605063..0a0afb3 100644 --- a/src/music_kraken/pages/musify.py +++ b/src/music_kraken/pages/musify.py @@ -3,6 +3,9 @@ import requests from bs4 import BeautifulSoup import pycountry import time +from urllib.parse import urlparse +from enum import Enum +from dataclasses import dataclass from ..utils.shared import ( ENCYCLOPAEDIA_METALLUM_LOGGER as LOGGER @@ -51,6 +54,18 @@ SortOrder.IsAscending: false X-Requested-With: XMLHttpRequest """ +class MusifyTypes(Enum): + ARTIST = "artist" + + +@dataclass +class MusifyUrl: + source_type: MusifyTypes + name_without_id: str + name_with_id: str + musify_id: str + url: str + class Musify(Page): API_SESSION: requests.Session = requests.Session() @@ -358,6 +373,142 @@ class Musify(Page): search_results.extend(cls.parse_playlist_soup(playlist_soup)) return Options(search_results) + + @classmethod + def parse_url(cls, url: str) -> MusifyUrl: + parsed = urlparse(url) + + path = parsed.path.split("/") + + split_name = path[2].split("-") + url_id = split_name[-1] + name_for_url = "-".join(split_name[:-1]) + + return MusifyUrl( + source_type=MusifyTypes(path[1]), + name_without_id=name_for_url, + name_with_id=path[2], + musify_id=url_id, + url=url + ) + + @classmethod + def parse_album_card(cls, album_card: BeautifulSoup) -> Album: + """ +
+ + Self Loather + + +
+

+ Self Loather +

+
+ + + +
+ """ + name: str = "" + source_list: List[Source] = [] + + + anchor_list = album_card.find_all("a", recursive=False) + if len(anchor_list) > 0: + anchor = anchor_list[0] + + source_list.append(Source( + cls.SOURCE_TYPE, + cls.HOST + anchor.get("href") + )) + + thumbnail: BeautifulSoup = anchor.find("img") + if thumbnail is not None: + alt = thumbnail.get("alt") + if alt is not None: + name = alt + + image_url = thumbnail.get("src") + + else: + LOGGER.debug("the card has no thumbnail or url") + + + @classmethod + def get_discography(cls, url: MusifyUrl) -> List[Album]: + """ + POST https://musify.club/artist/filteralbums + ArtistID: 280348 + SortOrder.Property: dateCreated + SortOrder.IsAscending: false + X-Requested-With: XMLHttpRequest + """ + + endpoint = cls.HOST + "/" + url.source_type.value + "/filteralbums" + + r = cls.API_SESSION.post(url=endpoint, json={ + "ArtistID": str(url.musify_id), + "SortOrder.Property": "dateCreated", + "SortOrder.IsAscending": False, + "X-Requested-With": "XMLHttpRequest" + }) + + soup: BeautifulSoup = BeautifulSoup(r.content, features="html.parser") + + print(r) + # print(soup.prettify) + + discography: List[Album] = [] + for card_soup in soup.find_all("div", {"class": "card"}): + discography.append(cls.parse_album_card(card_soup)) + + return discography + + @classmethod + def get_artist_from_source(cls, source: Source, flat: bool = False) -> Artist: + """ + fetches artist from source + + [] discography + [] attributes + [] picture galery + + Args: + source (Source): the source to fetch + flat (bool, optional): if it is false, every album from discograohy will be fetched. Defaults to False. + + Returns: + Artist: the artist fetched + """ + + print(source) + url = cls.parse_url(source.url) + print(url) + + discography: List[Album] = cls.get_discography(url) + + return Artist( + name="", + main_album_list=discography + ) + + @classmethod + def fetch_artist_details(cls, artist: Artist, flat: bool = False) -> Artist: + source_list = artist.source_collection.get_sources_from_page(cls.SOURCE_TYPE) + if len(source_list) == 0: + return artist + + for source in source_list: + artist.merge(cls.get_artist_from_source(source, flat=flat)) + + return artist @classmethod def fetch_album_details(cls, album: Album, flat: bool = False) -> Album: diff --git a/src/musify_search.py b/src/musify_search.py index 5b28d5c..5811318 100644 --- a/src/musify_search.py +++ b/src/musify_search.py @@ -2,12 +2,19 @@ from music_kraken import objects from music_kraken.pages import Musify -results = Musify.search_by_query("#a Ghost Bath") -print(results) -exit() +def search(): + results = Musify.search_by_query("#a Ghost Bath") + print(results) -artist = results[0] -artist: objects.Artist = Musify.fetch_details(artist) -print(artist.options) -print() +def fetch_artist(): + artist = objects.Artist( + name="Ghost Bath", + source_list=[objects.Source(objects.SourcePages.MUSIFY, "https://musify.club/artist/ghost-bath-280348")] + ) + + artist = Musify.fetch_details(artist) + print(artist.options) + +if __name__ == "__main__": + fetch_artist() From bf04d5b8bec38aaff7ae75b5ab3fe6c7d038b106 Mon Sep 17 00:00:00 2001 From: Hellow2 Date: Fri, 17 Mar 2023 12:39:19 +0100 Subject: [PATCH 24/42] Update musify.py --- src/music_kraken/pages/musify.py | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/src/music_kraken/pages/musify.py b/src/music_kraken/pages/musify.py index 0a0afb3..4a04c05 100644 --- a/src/music_kraken/pages/musify.py +++ b/src/music_kraken/pages/musify.py @@ -419,16 +419,26 @@ class Musify(Page): name: str = "" source_list: List[Source] = [] - - anchor_list = album_card.find_all("a", recursive=False) - if len(anchor_list) > 0: - anchor = anchor_list[0] + def parse_release_anchor(anchor: BeautifulSoup, text_is_name=False): + if anchor is None: + return source_list.append(Source( cls.SOURCE_TYPE, cls.HOST + anchor.get("href") )) + if not text_is_name: + return + + name = anchor.text + + + anchor_list = album_card.find_all("a", recursive=False) + if len(anchor_list) > 0: + anchor = anchor_list[0] + parse_release_anchor(anchor) + thumbnail: BeautifulSoup = anchor.find("img") if thumbnail is not None: alt = thumbnail.get("alt") @@ -436,10 +446,22 @@ class Musify(Page): name = alt image_url = thumbnail.get("src") - else: LOGGER.debug("the card has no thumbnail or url") + card_body = album_card.find("div", {"class": "card-body"}) + if card_body is not None: + parse_release_anchor(card_body.find("a"), text_is_name=True) + + + + card_footer_list = album_card.find_all("div", {"class": "card-footer"}) + + return Album( + title=name, + source_list=source_list + ) + @classmethod def get_discography(cls, url: MusifyUrl) -> List[Album]: From a27c4e28c3d0bd6228faf30c62f029639f6c038b Mon Sep 17 00:00:00 2001 From: Hellow Date: Fri, 17 Mar 2023 13:10:25 +0100 Subject: [PATCH 25/42] fixed docstring --- src/music_kraken/pages/musify.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/music_kraken/pages/musify.py b/src/music_kraken/pages/musify.py index 4a04c05..c11154b 100644 --- a/src/music_kraken/pages/musify.py +++ b/src/music_kraken/pages/musify.py @@ -148,9 +148,6 @@ class Musify(Page): @classmethod def parse_album_contact(cls, contact: BeautifulSoup) -> Album: """ - parsing following html: - - ```html - ``` """ source_list: List[Source] = [] From a01ea0cfcd5ede94e863bb80d01c727b511cc1f7 Mon Sep 17 00:00:00 2001 From: Hellow Date: Fri, 17 Mar 2023 13:11:18 +0100 Subject: [PATCH 26/42] reformat --- src/music_kraken/pages/musify.py | 165 +++++++++++++++---------------- 1 file changed, 80 insertions(+), 85 deletions(-) diff --git a/src/music_kraken/pages/musify.py b/src/music_kraken/pages/musify.py index c11154b..62506d7 100644 --- a/src/music_kraken/pages/musify.py +++ b/src/music_kraken/pages/musify.py @@ -54,9 +54,10 @@ SortOrder.IsAscending: false X-Requested-With: XMLHttpRequest """ + class MusifyTypes(Enum): ARTIST = "artist" - + @dataclass class MusifyUrl: @@ -77,7 +78,7 @@ class Musify(Page): API_SESSION.proxies = shared.proxies SOURCE_TYPE = SourcePages.MUSIFY - + HOST = "https://musify.club" @classmethod @@ -112,39 +113,39 @@ class Musify(Page): LOGGER.warning("too many tries, returning") return None return BeautifulSoup(r.content, features="html.parser") - + @classmethod def parse_artist_contact(cls, contact: BeautifulSoup) -> Artist: source_list: List[Source] = [] name = "" _id = None - + # source anchor = contact.find("a") if anchor is not None: href = anchor.get("href") name = anchor.get("title") - + if "-" in href: _id = href.split("-")[-1] - + source_list.append(Source(cls.SOURCE_TYPE, cls.HOST + href)) - + # artist image image_soup = contact.find("img") if image_soup is not None: alt = image_soup.get("alt") if alt is not None: name = alt - + artist_thumbnail = image_soup.get("src") - + return Artist( _id=_id, name=name, source_list=source_list ) - + @classmethod def parse_album_contact(cls, contact: BeautifulSoup) -> Album: """ @@ -165,51 +166,50 @@ class Musify(Page): """ - + source_list: List[Source] = [] title = "" _id = None year = None artist_list: List[Artist] = [] - + def parse_title_date(title_date: Optional[str], delimiter: str = " - "): if title_date is None: return - - title_date = title_date.strip() + + title_date = title_date.strip() split_attr = title_date.split(delimiter) - + if len(split_attr) < 2: return if not split_attr[-1].isdigit(): return - + year = int(split_attr[-1]) title = delimiter.join(split_attr[:-1]) - + # source anchor = contact.find("a") if anchor is not None: href = anchor.get("href") - + # get the title and year parse_title_date(anchor.get("title")) - - + if "-" in href: _id = href.split("-")[-1] - + source_list.append(Source(cls.SOURCE_TYPE, cls.HOST + href)) - + # cover art image_soup = contact.find("img") if image_soup is not None: alt = image_soup.get("alt") if alt is not None: title = alt - + cover_art = image_soup.get("src") - + contact_info_soup = contact.find("div", {"class": "contacts__info"}) if contact_info_soup is not None: """ @@ -218,11 +218,11 @@ class Musify(Page): Треков: 4 9,04 """ - + title_soup = contact_info_soup.find("strong") if title_soup is None: parse_title_date(title_soup) - + small_list = contact_info_soup.find_all("small") if len(small_list) == 3: # artist @@ -232,18 +232,17 @@ class Musify(Page): for artist_str in raw_artist_str.split("&\r\n"): artist_str = artist_str.rstrip("& ...\r\n") artist_str = artist_str.strip() - + if artist_str.endswith("]") and "[" in artist_str: artist_str = artist_str.rsplit("[", maxsplit=1)[0] - + artist_list.append(Artist(name=artist_str)) - + track_count_soup: BeautifulSoup = small_list[1] rating_soup: BeautifulSoup = small_list[2] else: LOGGER.warning("got an unequal ammount than 3 small elements") - - + return Album( _id=_id, title=title, @@ -251,59 +250,59 @@ class Musify(Page): date=ID3Timestamp(year=year), artist_list=artist_list ) - + @classmethod def parse_contact_container(cls, contact_container_soup: BeautifulSoup) -> List[Union[Artist, Album]]: - #print(contact_container_soup.prettify) + # print(contact_container_soup.prettify) contacts = [] - + # print(contact_container_soup) - + contact: BeautifulSoup for contact in contact_container_soup.find_all("div", {"class": "contacts__item"}): - + anchor_soup = contact.find("a") if anchor_soup is not None: url = anchor_soup.get("href") - + if url is not None: - #print(url) + # print(url) if "artist" in url: contacts.append(cls.parse_artist_contact(contact)) elif "release" in url: contacts.append(cls.parse_album_contact(contact)) return contacts - + @classmethod def parse_playlist_item(cls, playlist_item_soup: BeautifulSoup) -> Song: _id = None song_title = playlist_item_soup.get("data-name") or "" artist_list: List[Artist] = [] source_list: List[Source] = [] - + # details playlist_details: BeautifulSoup = playlist_item_soup.find("div", {"class", "playlist__heading"}) if playlist_details is not None: anchor_list = playlist_details.find_all("a") - + if len(anchor_list) >= 2: print(anchor_list) # artists - artist_anchor: BeautifulSoup + artist_anchor: BeautifulSoup for artist_anchor in anchor_list[:-1]: _id = None href = artist_anchor.get("href") artist_source: Source = Source(cls.SOURCE_TYPE, cls.HOST + href) if "-" in href: _id = href.split("-")[-1] - + artist_list.append(Artist( _id=_id, name=artist_anchor.get_text(strip=True), source_list=[artist_source] )) - + # track track_soup: BeautifulSoup = anchor_list[-1] """ @@ -318,11 +317,11 @@ class Musify(Page): if raw_id.isdigit(): _id = raw_id source_list.append(Source(cls.SOURCE_TYPE, cls.HOST + href)) - + else: LOGGER.warning("there are not enough anchors (2) for artist and track") LOGGER.warning(str(artist_list)) - + """ artist_name = playlist_item_soup.get("data-artist") if artist_name is not None: @@ -333,53 +332,53 @@ class Musify(Page): raw_id = id_attribute.replace("playerDiv", "") if raw_id.isdigit(): _id = raw_id - + return Song( _id=_id, title=song_title, main_artist_list=artist_list, source_list=source_list ) - + @classmethod def parse_playlist_soup(cls, playlist_soup: BeautifulSoup) -> List[Song]: song_list = [] - + for playlist_item_soup in playlist_soup.find_all("div", {"class": "playlist__item"}): song_list.append(cls.parse_playlist_item(playlist_item_soup)) - + return song_list @classmethod def plaintext_search(cls, query: str) -> Options: search_results = [] - + search_soup = cls.get_soup_of_search(query=query) if search_soup is None: return None - + # album and songs # child of div class: contacts row for contact_container_soup in search_soup.find_all("div", {"class": "contacts"}): search_results.extend(cls.parse_contact_container(contact_container_soup)) - + # song # div class: playlist__item for playlist_soup in search_soup.find_all("div", {"class": "playlist"}): search_results.extend(cls.parse_playlist_soup(playlist_soup)) return Options(search_results) - + @classmethod def parse_url(cls, url: str) -> MusifyUrl: parsed = urlparse(url) - + path = parsed.path.split("/") - + split_name = path[2].split("-") url_id = split_name[-1] name_for_url = "-".join(split_name[:-1]) - + return MusifyUrl( source_type=MusifyTypes(path[1]), name_without_id=name_for_url, @@ -387,7 +386,7 @@ class Musify(Page): musify_id=url_id, url=url ) - + @classmethod def parse_album_card(cls, album_card: BeautifulSoup) -> Album: """ @@ -414,51 +413,47 @@ class Musify(Page): """ name: str = "" source_list: List[Source] = [] - + def parse_release_anchor(anchor: BeautifulSoup, text_is_name=False): if anchor is None: return - + source_list.append(Source( cls.SOURCE_TYPE, cls.HOST + anchor.get("href") )) - + if not text_is_name: return - + name = anchor.text - - + anchor_list = album_card.find_all("a", recursive=False) if len(anchor_list) > 0: anchor = anchor_list[0] parse_release_anchor(anchor) - + thumbnail: BeautifulSoup = anchor.find("img") if thumbnail is not None: alt = thumbnail.get("alt") if alt is not None: name = alt - + image_url = thumbnail.get("src") else: LOGGER.debug("the card has no thumbnail or url") - + card_body = album_card.find("div", {"class": "card-body"}) if card_body is not None: parse_release_anchor(card_body.find("a"), text_is_name=True) - - - + card_footer_list = album_card.find_all("div", {"class": "card-footer"}) - + return Album( title=name, source_list=source_list ) - - + @classmethod def get_discography(cls, url: MusifyUrl) -> List[Album]: """ @@ -468,27 +463,27 @@ class Musify(Page): SortOrder.IsAscending: false X-Requested-With: XMLHttpRequest """ - + endpoint = cls.HOST + "/" + url.source_type.value + "/filteralbums" - + r = cls.API_SESSION.post(url=endpoint, json={ "ArtistID": str(url.musify_id), "SortOrder.Property": "dateCreated", "SortOrder.IsAscending": False, "X-Requested-With": "XMLHttpRequest" }) - + soup: BeautifulSoup = BeautifulSoup(r.content, features="html.parser") - + print(r) # print(soup.prettify) - + discography: List[Album] = [] for card_soup in soup.find_all("div", {"class": "card"}): discography.append(cls.parse_album_card(card_soup)) - + return discography - + @classmethod def get_artist_from_source(cls, source: Source, flat: bool = False) -> Artist: """ @@ -505,13 +500,13 @@ class Musify(Page): Returns: Artist: the artist fetched """ - + print(source) url = cls.parse_url(source.url) print(url) - + discography: List[Album] = cls.get_discography(url) - + return Artist( name="", main_album_list=discography @@ -522,10 +517,10 @@ class Musify(Page): source_list = artist.source_collection.get_sources_from_page(cls.SOURCE_TYPE) if len(source_list) == 0: return artist - + for source in source_list: artist.merge(cls.get_artist_from_source(source, flat=flat)) - + return artist @classmethod From 924bd01e1d7589ad6d20f10bd933e0405b6d2688 Mon Sep 17 00:00:00 2001 From: Hellow Date: Fri, 17 Mar 2023 13:58:58 +0100 Subject: [PATCH 27/42] finished fetching of discography --- src/music_kraken/objects/album.py | 1 + src/music_kraken/pages/musify.py | 98 +++++++++++++++++++++++++++---- 2 files changed, 86 insertions(+), 13 deletions(-) diff --git a/src/music_kraken/objects/album.py b/src/music_kraken/objects/album.py index 2bab8d0..6195d6e 100644 --- a/src/music_kraken/objects/album.py +++ b/src/music_kraken/objects/album.py @@ -22,4 +22,5 @@ class AlbumType(Enum): LIVE_ALBUM = "Live Album" COMPILATION_ALBUM = "Compilation Album" MIXTAPE = "Mixtape" + DEMO = "Demo" OTHER = "Other" diff --git a/src/music_kraken/pages/musify.py b/src/music_kraken/pages/musify.py index 62506d7..8a629b8 100644 --- a/src/music_kraken/pages/musify.py +++ b/src/music_kraken/pages/musify.py @@ -1,3 +1,4 @@ +from collections import defaultdict from typing import List, Optional, Union import requests from bs4 import BeautifulSoup @@ -22,7 +23,9 @@ from ..objects import ( ID3Timestamp, FormattedText, Label, - Options + Options, + AlbumType, + AlbumStatus ) from ..utils import ( string_processing, @@ -411,22 +414,62 @@ class Musify(Page): """ + + album_type_map = defaultdict(lambda: AlbumType.OTHER, { + 1: AlbumType.OTHER, # literally other xD + 2: AlbumType.STUDIO_ALBUM, + 3: AlbumType.EP, + 4: AlbumType.SINGLE, + 5: AlbumType.OTHER, # BOOTLEG + 6: AlbumType.LIVE_ALBUM, + 7: AlbumType.COMPILATION_ALBUM, # compilation of different artists + 8: AlbumType.MIXTAPE, + 9: AlbumType.DEMO, + 10: AlbumType.MIXTAPE, # DJ Mixes + 11: AlbumType.COMPILATION_ALBUM, # compilation of only this artist + 13: AlbumType.COMPILATION_ALBUM, # unofficial + 14: AlbumType.MIXTAPE # "Soundtracks" + }) + + _id: Optional[str] = None name: str = "" source_list: List[Source] = [] + timestamp: Optional[ID3Timestamp] = None + album_status = None - def parse_release_anchor(anchor: BeautifulSoup, text_is_name=False): - if anchor is None: + album_status_id = album_card.get("data-type") + if album_status_id.isdigit(): + album_status_id = int(album_status_id) + album_type = album_type_map[album_status_id] + + if album_status_id == 5: + album_status = AlbumStatus.BOOTLEG + + def parse_release_anchor(_anchor: BeautifulSoup, text_is_name=False): + nonlocal _id + nonlocal name + nonlocal source_list + + if _anchor is None: return - source_list.append(Source( - cls.SOURCE_TYPE, - cls.HOST + anchor.get("href") - )) + href = _anchor.get("href") + if href is not None: + # add url to sources + source_list.append(Source( + cls.SOURCE_TYPE, + cls.HOST + href + )) + + # split id from url + split_href = href.split("-") + if len(split_href) > 1: + _id = split_href[-1] if not text_is_name: return - name = anchor.text + name = _anchor.text anchor_list = album_card.find_all("a", recursive=False) if len(anchor_list) > 0: @@ -447,11 +490,43 @@ class Musify(Page): if card_body is not None: parse_release_anchor(card_body.find("a"), text_is_name=True) + def parse_small_date(small_soup: BeautifulSoup): + """ + + + 13.11.2021 + + """ + nonlocal timestamp + + italic_tagging_soup: BeautifulSoup = small_soup.find("i") + if italic_tagging_soup is None: + return + if italic_tagging_soup.get("title") != "Добавлено": + # "Добавлено" can be translated to "Added (at)" + return + + raw_time = small_soup.text.strip() + timestamp = ID3Timestamp.strptime(raw_time, "%d.%m.%Y") + + # parse small date card_footer_list = album_card.find_all("div", {"class": "card-footer"}) + if len(card_footer_list) != 3: + LOGGER.debug("there are not exactly 3 card footers in a card") + + if len(card_footer_list) > 0: + for any_small_soup in card_footer_list[-1].find_all("small"): + parse_small_date(any_small_soup) + else: + LOGGER.debug("there is not even 1 footer in the album card") return Album( + _id=_id, title=name, - source_list=source_list + source_list=source_list, + date=timestamp, + album_type=album_type, + album_status=album_status ) @classmethod @@ -475,9 +550,6 @@ class Musify(Page): soup: BeautifulSoup = BeautifulSoup(r.content, features="html.parser") - print(r) - # print(soup.prettify) - discography: List[Album] = [] for card_soup in soup.find_all("div", {"class": "card"}): discography.append(cls.parse_album_card(card_soup)) @@ -489,7 +561,7 @@ class Musify(Page): """ fetches artist from source - [] discography + [x] discography [] attributes [] picture galery From 05c96106833a4c0ddbb9570a644904e65da87c30 Mon Sep 17 00:00:00 2001 From: Hellow Date: Fri, 17 Mar 2023 18:16:06 +0100 Subject: [PATCH 28/42] moved error handling of get and post request into abstract.py instead do it new in every function --- src/music_kraken/pages/abstract.py | 57 +++++++++++++++++++++++++--- src/music_kraken/pages/musify.py | 60 ++++++++++++++---------------- 2 files changed, 80 insertions(+), 37 deletions(-) diff --git a/src/music_kraken/pages/abstract.py b/src/music_kraken/pages/abstract.py index a3ca28a..6438129 100644 --- a/src/music_kraken/pages/abstract.py +++ b/src/music_kraken/pages/abstract.py @@ -1,6 +1,10 @@ -from typing import ( - List -) +from typing import Optional +import requests +import logging + +LOGGER = logging.getLogger("this shouldn't be used") + +from ..utils import shared from ..objects import ( Song, @@ -20,6 +24,49 @@ class Page: functionality for every other class fetching something """ + API_SESSION: requests.Session = requests.Session() + API_SESSION.proxies = shared.proxies + TIMEOUT = 5 + TRIES = 5 + + @classmethod + def get_request(cls, url: str, accepted_response_codes: set = set((200,)), trie: int = 0) -> Optional[ + requests.Request]: + try: + r = cls.API_SESSION.get(url, timeout=cls.TIMEOUT) + except requests.exceptions.Timeout: + return None + + if r.status_code in accepted_response_codes: + return r + + LOGGER.warning(f"{cls.__name__} responded wit {r.status_code} at {url}. ({trie}-{cls.TRIES})") + LOGGER.debug(r.content) + + if trie <= cls.TRIES: + LOGGER.warning("to many tries. Aborting.") + + return cls.get_request(url, accepted_response_codes, trie + 1) + + @classmethod + def post_request(cls, url: str, json: dict, accepted_response_codes: set = set((200,)), trie: int = 0) -> Optional[ + requests.Request]: + try: + r = cls.API_SESSION.post(url, json=json, timeout=cls.TIMEOUT) + except requests.exceptions.Timeout: + return None + + if r.status_code in accepted_response_codes: + return r + + LOGGER.warning(f"{cls.__name__} responded wit {r.status_code} at {url}. ({trie}-{cls.TRIES})") + LOGGER.debug(r.content) + + if trie <= cls.TRIES: + LOGGER.warning("to many tries. Aborting.") + + return cls.post_request(url, accepted_response_codes, trie + 1) + class Query: def __init__(self, query: str): self.query = query @@ -70,7 +117,7 @@ class Page: song_str = property(fget=lambda self: self.get_str(self.song)) @classmethod - def search_by_query(cls, query: str) -> Options: + def search_by_query(cls, query: str) -> Options: """ # The Query You can define a new parameter with "#", @@ -106,7 +153,7 @@ class Page: song = cls.fetch_song_details(music_object, flat=flat) song.compile() return song - + if type(music_object) == Album: album = cls.fetch_album_details(music_object, flat=flat) album.compile() diff --git a/src/music_kraken/pages/musify.py b/src/music_kraken/pages/musify.py index 8a629b8..fbedc62 100644 --- a/src/music_kraken/pages/musify.py +++ b/src/music_kraken/pages/musify.py @@ -79,11 +79,12 @@ class Musify(Page): "Referer": "https://musify.club/" } API_SESSION.proxies = shared.proxies + TIMEOUT = 5 + TRIES = 5 + HOST = "https://musify.club" SOURCE_TYPE = SourcePages.MUSIFY - HOST = "https://musify.club" - @classmethod def search_by_query(cls, query: str) -> Options: query_obj = cls.Query(query) @@ -98,25 +99,6 @@ class Musify(Page): return f"{query.artist or '*'} - {query.song or '*'}" return f"{query.artist or '*'} - {query.album or '*'} - {query.song or '*'}" - @classmethod - def get_soup_of_search(cls, query: str, trie=0) -> Optional[BeautifulSoup]: - url = f"https://musify.club/search?searchText={query}" - LOGGER.debug(f"Trying to get soup from {url}") - try: - r = cls.API_SESSION.get(url, timeout=15) - except requests.exceptions.Timeout: - return None - if r.status_code != 200: - if r.status_code in [503] and trie < cls.TRIES: - LOGGER.warning(f"{cls.__name__} blocked downloading. ({trie}-{cls.TRIES})") - LOGGER.warning(f"retrying in {cls.TIMEOUT} seconds again") - time.sleep(cls.TIMEOUT) - return cls.get_soup_of_search(query, trie=trie + 1) - - LOGGER.warning("too many tries, returning") - return None - return BeautifulSoup(r.content, features="html.parser") - @classmethod def parse_artist_contact(cls, contact: BeautifulSoup) -> Artist: source_list: List[Source] = [] @@ -356,9 +338,10 @@ class Musify(Page): def plaintext_search(cls, query: str) -> Options: search_results = [] - search_soup = cls.get_soup_of_search(query=query) - if search_soup is None: - return None + r = cls.get_request(f"https://musify.club/search?searchText={query}") + if r is None: + return Options() + search_soup: BeautifulSoup = BeautifulSoup(r.content, features="html.parser") # album and songs # child of div class: contacts row @@ -541,13 +524,14 @@ class Musify(Page): endpoint = cls.HOST + "/" + url.source_type.value + "/filteralbums" - r = cls.API_SESSION.post(url=endpoint, json={ + r = cls.post_request(url=endpoint, json={ "ArtistID": str(url.musify_id), "SortOrder.Property": "dateCreated", "SortOrder.IsAscending": False, "X-Requested-With": "XMLHttpRequest" }) - + if r is None: + return [] soup: BeautifulSoup = BeautifulSoup(r.content, features="html.parser") discography: List[Album] = [] @@ -556,6 +540,20 @@ class Musify(Page): return discography + @classmethod + def get_artist_attributes(cls, url: MusifyUrl) -> Artist: + """ + fetches the main Artist attributes from this endpoint + https://musify.club/artist/ghost-bath-280348?_pjax=#bodyContent + it needs to parse html + + :param url: + :return: + """ + return Artist( + name="" + ) + @classmethod def get_artist_from_source(cls, source: Source, flat: bool = False) -> Artist: """ @@ -573,16 +571,14 @@ class Musify(Page): Artist: the artist fetched """ - print(source) url = cls.parse_url(source.url) - print(url) + + artist = cls.get_artist_attributes(url) discography: List[Album] = cls.get_discography(url) + artist.main_album_collection.extend(discography) - return Artist( - name="", - main_album_list=discography - ) + return artist @classmethod def fetch_artist_details(cls, artist: Artist, flat: bool = False) -> Artist: From 5d0f9814c8e566f5f40cdac73112d9a479bdc249 Mon Sep 17 00:00:00 2001 From: Hellow Date: Fri, 17 Mar 2023 23:27:14 +0100 Subject: [PATCH 29/42] finished fetching of artist details --- src/music_kraken/pages/musify.py | 104 ++++++++++++++++++++++++++++++- 1 file changed, 101 insertions(+), 3 deletions(-) diff --git a/src/music_kraken/pages/musify.py b/src/music_kraken/pages/musify.py index fbedc62..ea26bde 100644 --- a/src/music_kraken/pages/musify.py +++ b/src/music_kraken/pages/musify.py @@ -550,8 +550,106 @@ class Musify(Page): :param url: :return: """ + + r = cls.get_request(f"https://musify.club/{url.source_type.value}/{url.name_with_id}?_pjax=#bodyContent") + if r is None: + return Artist(_id=url.musify_id, name="") + + soup = BeautifulSoup(r.content, "html.parser") + + """ + + + + +
+

Ghost Bath

+
+ ... +
+
+ +
    +
  • + + + Соединенные Штаты +
  • +
+ """ + name = "" + source_list: List[Source] = [] + county = None + + breadcrumbs: BeautifulSoup = soup.find("ol", {"class": "breadcrumb"}) + if breadcrumbs is not None: + breadcrumb_list: List[BeautifulSoup] = breadcrumbs.find_all("li", {"class": "breadcrumb"}, recursive=False) + if len(breadcrumb_list) == 3: + name = breadcrumb_list[-1].get_text(strip=True) + else: + LOGGER.debug("breadcrumb layout on artist page changed") + + nav_tabs: BeautifulSoup = soup.find("ul", {"class": "nav-tabs"}) + if nav_tabs is not None: + list_item: BeautifulSoup + for list_item in nav_tabs.find_all("li", {"class": "nav-item"}, recursive=False): + if not list_item.get_text(strip=True).startswith("песни"): + # "песни" translates to "songs" + continue + + anchor: BeautifulSoup = list_item.find("a") + if anchor is None: + continue + href = anchor.get("href") + if href is None: + continue + + source_list.append(Source( + cls.SOURCE_TYPE, + cls.HOST + href + )) + + content_title: BeautifulSoup = soup.find("header", {"class": "content__title"}) + if content_title is not None: + h1_name: BeautifulSoup = soup.find("h1", recursive=False) + if h1_name is not None: + name = h1_name.get_text(strip=True) + + icon_list: BeautifulSoup = soup.find("ul", {"class": "icon-list"}) + if icon_list is not None: + country_italic: BeautifulSoup = icon_list.find("i", {"class", "flag-icon"}) + if country_italic is not None: + style_classes: set = {'flag-icon', 'shadow'} + classes: set = set(country_italic.get("class")) + + country_set: set = classes.difference(style_classes) + if len(country_set) != 1: + LOGGER.debug("the country set contains multiple values") + if len(country_set) != 0: + """ + This is the css file, where all flags that can be used on musify + are laid out and styled. + Every flag has two upper case letters, thus I assume they follow the alpha_2 + standard, though I haven't checked. + https://musify.club/content/flags.min.css + """ + + country = pycountry.countries.get(alpha_2=list(country_set)[0]) + return Artist( - name="" + _id=url.musify_id, + name=name, + country=county, + source_list=source_list ) @classmethod @@ -560,8 +658,8 @@ class Musify(Page): fetches artist from source [x] discography - [] attributes - [] picture galery + [x] attributes *(name and country... wooooow and I waste one request for this)* + [] picture gallery Args: source (Source): the source to fetch From 5bd94391040dc9717c0ebd962387c4bffc4e0bfc Mon Sep 17 00:00:00 2001 From: Hellow Date: Fri, 17 Mar 2023 23:55:38 +0100 Subject: [PATCH 30/42] fetching notes --- src/music_kraken/pages/musify.py | 13 ++++++++++--- src/musify_search.py | 2 +- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/music_kraken/pages/musify.py b/src/music_kraken/pages/musify.py index ea26bde..1dfe02f 100644 --- a/src/music_kraken/pages/musify.py +++ b/src/music_kraken/pages/musify.py @@ -588,7 +588,8 @@ class Musify(Page): """ name = "" source_list: List[Source] = [] - county = None + country = None + notes: FormattedText = None breadcrumbs: BeautifulSoup = soup.find("ol", {"class": "breadcrumb"}) if breadcrumbs is not None: @@ -645,11 +646,17 @@ class Musify(Page): country = pycountry.countries.get(alpha_2=list(country_set)[0]) + note_soup: BeautifulSoup = soup.find(id="text-main") + if note_soup is not None: + notes = FormattedText(html=note_soup.decode_contents()) + print(notes.plaintext) + return Artist( _id=url.musify_id, name=name, - country=county, - source_list=source_list + country=country, + source_list=source_list, + notes=notes ) @classmethod diff --git a/src/musify_search.py b/src/musify_search.py index 5811318..a6ed8ef 100644 --- a/src/musify_search.py +++ b/src/musify_search.py @@ -10,7 +10,7 @@ def search(): def fetch_artist(): artist = objects.Artist( name="Ghost Bath", - source_list=[objects.Source(objects.SourcePages.MUSIFY, "https://musify.club/artist/ghost-bath-280348")] + source_list=[objects.Source(objects.SourcePages.MUSIFY, "https://musify.club/artist/psychonaut-4-83193")] ) artist = Musify.fetch_details(artist) From 862c25dd35fe81bca8bf74da281ceeabda834fbd Mon Sep 17 00:00:00 2001 From: Hellow Date: Sat, 18 Mar 2023 12:36:53 +0100 Subject: [PATCH 31/42] refactored merging function to take default values other than None into account parents.py --- src/music_kraken/objects/formatted_text.py | 19 ++++++- src/music_kraken/objects/lyrics.py | 12 ++-- src/music_kraken/objects/metadata.py | 2 + src/music_kraken/objects/parents.py | 23 ++++---- src/music_kraken/objects/song.py | 65 ++++++++++++++++++---- src/music_kraken/objects/source.py | 6 +- src/music_kraken/objects/target.py | 6 +- src/musify_search.py | 3 +- 8 files changed, 103 insertions(+), 33 deletions(-) diff --git a/src/music_kraken/objects/formatted_text.py b/src/music_kraken/objects/formatted_text.py index 238cc2c..3027319 100644 --- a/src/music_kraken/objects/formatted_text.py +++ b/src/music_kraken/objects/formatted_text.py @@ -43,19 +43,32 @@ class FormattedText: def get_markdown(self) -> str: if self.doc is None: - return None + return "" return pandoc.write(self.doc, format="markdown").strip() def get_html(self) -> str: if self.doc is None: - return None + return "" return pandoc.write(self.doc, format="html").strip() def get_plaintext(self) -> str: if self.doc is None: - return None + return "" return pandoc.write(self.doc, format="plain").strip() + @property + def is_empty(self) -> bool: + return self.doc is None + + def __eq__(self, other) -> False: + if type(other) != type(self): + return False + if self.is_empty and other.is_empty: + return True + + return self.doc == other.doc + + plaintext = property(fget=get_plaintext, fset=set_plaintext) markdown = property(fget=get_markdown, fset=set_markdown) diff --git a/src/music_kraken/objects/lyrics.py b/src/music_kraken/objects/lyrics.py index dc668ed..d2ba425 100644 --- a/src/music_kraken/objects/lyrics.py +++ b/src/music_kraken/objects/lyrics.py @@ -1,16 +1,18 @@ from typing import List - +from collections import defaultdict import pycountry from .parents import DatabaseObject from .source import Source, SourceCollection -from .metadata import Metadata from .formatted_text import FormattedText class Lyrics(DatabaseObject): COLLECTION_ATTRIBUTES = ("source_collection",) - SIMPLE_ATTRIBUTES = ("text", "language") + SIMPLE_ATTRIBUTES = { + "text": FormattedText(), + "language": None + } def __init__( self, @@ -21,9 +23,9 @@ class Lyrics(DatabaseObject): source_list: List[Source] = None, **kwargs ) -> None: - DatabaseObject.__init__(self, _id=_id, dynamic=dynamic) + DatabaseObject.__init__(self, _id=_id, dynamic=dynamic, **kwargs) - self.text: FormattedText = text + self.text: FormattedText = text or FormattedText() self.language: pycountry.Languages = language self.source_collection: SourceCollection = SourceCollection(source_list) diff --git a/src/music_kraken/objects/metadata.py b/src/music_kraken/objects/metadata.py index 1c7b294..404af9c 100644 --- a/src/music_kraken/objects/metadata.py +++ b/src/music_kraken/objects/metadata.py @@ -154,6 +154,8 @@ class ID3Timestamp: return self.date_obj >= other.date_obj def __eq__(self, other): + if type(other) != type(self): + return False return self.date_obj == other.date_obj def get_time_format(self) -> str: diff --git a/src/music_kraken/objects/parents.py b/src/music_kraken/objects/parents.py index c9993b7..855ba71 100644 --- a/src/music_kraken/objects/parents.py +++ b/src/music_kraken/objects/parents.py @@ -11,11 +11,11 @@ from .option import Options class DatabaseObject: COLLECTION_ATTRIBUTES: tuple = tuple() - SIMPLE_ATTRIBUTES: tuple = tuple() - + SIMPLE_ATTRIBUTES: dict = dict() + def __init__(self, _id: str = None, dynamic: bool = False, **kwargs) -> None: self.automatic_id: bool = False - + if _id is None and not dynamic: """ generates a random UUID @@ -46,7 +46,7 @@ class DatabaseObject: return True return False - + @property def indexing_values(self) -> List[Tuple[str, object]]: """ @@ -56,9 +56,9 @@ class DatabaseObject: Returns: List[Tuple[str, object]]: the first element in the tuple is the name of the attribute, the second the value. """ - + return list() - + def merge(self, other, override: bool = False): if not isinstance(other, type(self)): LOGGER.warning(f"can't merge \"{type(other)}\" into \"{type(self)}\"") @@ -67,14 +67,13 @@ class DatabaseObject: for collection in type(self).COLLECTION_ATTRIBUTES: getattr(self, collection).extend(getattr(other, collection)) - for simple_attribute in type(self).SIMPLE_ATTRIBUTES: - if getattr(other, simple_attribute) is None: + for simple_attribute, default_value in type(self).SIMPLE_ATTRIBUTES.items(): + if getattr(other, simple_attribute) == default_value: continue - if override or getattr(self, simple_attribute) is None: + if override or getattr(self, simple_attribute) == default_value: setattr(self, simple_attribute, getattr(other, simple_attribute)) - @property def metadata(self) -> Metadata: return Metadata() @@ -86,7 +85,7 @@ class DatabaseObject: @property def option_string(self) -> str: return self.__repr__() - + def compile(self) -> bool: """ compiles the recursive structures, @@ -111,7 +110,7 @@ class MainObject(DatabaseObject): It has all the functionality of the "DatabaseObject" (it inherits from said class) but also some added functions as well. """ - + def __init__(self, _id: str = None, dynamic: bool = False, **kwargs): DatabaseObject.__init__(self, _id=_id, dynamic=dynamic, **kwargs) diff --git a/src/music_kraken/objects/song.py b/src/music_kraken/objects/song.py index 252678d..4169994 100644 --- a/src/music_kraken/objects/song.py +++ b/src/music_kraken/objects/song.py @@ -1,6 +1,7 @@ import os from typing import List, Optional, Dict, Tuple import pycountry +from collections import defaultdict from .metadata import ( Mapping as id3Mapping, @@ -46,7 +47,15 @@ class Song(MainObject): COLLECTION_ATTRIBUTES = ( "lyrics_collection", "album_collection", "main_artist_collection", "feature_artist_collection", "source_collection") - SIMPLE_ATTRIBUTES = ("title", "unified_title", "isrc", "length", "tracksort", "genre") + SIMPLE_ATTRIBUTES = { + "title": None, + "unified_title": None, + "isrc": None, + "length": None, + "tracksort": 0, + "genre": None, + "notes": FormattedText() + } def __init__( self, @@ -64,17 +73,21 @@ class Song(MainObject): album_list: List['Album'] = None, main_artist_list: List['Artist'] = None, feature_artist_list: List['Artist'] = None, + notes: FormattedText = None, **kwargs ) -> None: MainObject.__init__(self, _id=_id, dynamic=dynamic, **kwargs) # attributes self.title: str = title - self.unified_title: str = unified_title or unify(title) + self.unified_title: str = unified_title + if unified_title is None and title is not None: + self.unified_title = unify(title) self.isrc: str = isrc self.length: int = length self.tracksort: int = tracksort or 0 self.genre: str = genre + self.notes: FormattedText = notes or FormattedText() self.source_collection: SourceCollection = SourceCollection(source_list) self.target_collection: Collection = Collection(data=target_list, element_type=Target) @@ -181,7 +194,17 @@ All objects dependent on Album class Album(MainObject): COLLECTION_ATTRIBUTES = ("label_collection", "artist_collection", "song_collection") - SIMPLE_ATTRIBUTES = ("title", "album_status", "album_type", "language", "date", "barcode", "albumsort") + SIMPLE_ATTRIBUTES = { + "title": None, + "unified_title": None, + "album_status": None, + "album_type": AlbumType.OTHER, + "language": None, + "date": ID3Timestamp(), + "barcode": None, + "albumsort": None, + "notes": FormattedText() + } def __init__( self, @@ -199,15 +222,18 @@ class Album(MainObject): album_status: AlbumStatus = None, album_type: AlbumType = None, label_list: List['Label'] = None, + notes: FormattedText = None, **kwargs ) -> None: MainObject.__init__(self, _id=_id, dynamic=dynamic, **kwargs) self.title: str = title - self.unified_title: str = unified_title or unify(self.title) + self.unified_title: str = unified_title + if unified_title is None and title is not None: + self.unified_title = unify(title) self.album_status: AlbumStatus = album_status - self.album_type: AlbumType = album_type + self.album_type: AlbumType = album_type or AlbumType.OTHER self.language: pycountry.Languages = language self.date: ID3Timestamp = date or ID3Timestamp() @@ -223,6 +249,7 @@ class Album(MainObject): to set albumsort with help of the release year """ self.albumsort: Optional[int] = albumsort + self.notes = notes or FormattedText() self.source_collection: SourceCollection = SourceCollection(source_list) self.song_collection: Collection = Collection(data=song_list, element_type=Song) @@ -230,7 +257,7 @@ class Album(MainObject): self.label_collection: Collection = Collection(data=label_list, element_type=Label) def compile(self): - song: "Song" + song: Song for song in self.song_collection: if song.album_collection.insecure_append(self): song.compile() @@ -351,7 +378,15 @@ All objects dependent on Artist class Artist(MainObject): COLLECTION_ATTRIBUTES = ("feature_song_collection", "main_album_collection", "label_collection") - SIMPLE_ATTRIBUTES = ("name", "name", "country", "formed_in", "notes", "lyrical_themes", "general_genre") + SIMPLE_ATTRIBUTES = { + "name": None, + "unified_name": None, + "country": None, + "formed_in": ID3Timestamp(), + "notes": FormattedText(), + "lyrical_themes": [], + "general_genre": "" + } def __init__( self, @@ -373,7 +408,9 @@ class Artist(MainObject): MainObject.__init__(self, _id=_id, dynamic=dynamic, **kwargs) self.name: str = name - self.unified_name: str = unified_name or unify(self.name) + self.unified_name: str = unified_name + if unified_name is None and name is not None: + self.unified_name = unify(name) """ TODO implement album type and notes @@ -512,7 +549,11 @@ Label class Label(MainObject): COLLECTION_ATTRIBUTES = ("album_collection", "current_artist_collection") - SIMPLE_ATTRIBUTES = ("name",) + SIMPLE_ATTRIBUTES = { + "name": None, + "unified_name": None, + "notes": FormattedText() + } def __init__( self, @@ -520,6 +561,7 @@ class Label(MainObject): dynamic: bool = False, name: str = None, unified_name: str = None, + notes: FormattedText = None, album_list: List[Album] = None, current_artist_list: List[Artist] = None, source_list: List[Source] = None, @@ -528,7 +570,10 @@ class Label(MainObject): MainObject.__init__(self, _id=_id, dynamic=dynamic, **kwargs) self.name: str = name - self.unified_name: str = unified_name or unify(self.name) + self.unified_name: str = unified_name + if unified_name is None and name is not None: + self.unified_name = unify(name) + self.notes = notes or FormattedText() self.source_collection: SourceCollection = SourceCollection(source_list) self.album_collection: Collection = Collection(data=album_list, element_type=Album) diff --git a/src/music_kraken/objects/source.py b/src/music_kraken/objects/source.py index 8fe03fc..7213575 100644 --- a/src/music_kraken/objects/source.py +++ b/src/music_kraken/objects/source.py @@ -56,7 +56,11 @@ class Source(DatabaseObject): ``` """ COLLECTION_ATTRIBUTES = tuple() - SIMPLE_ATTRIBUTES = ("type_enum", "page_enum", "url") + SIMPLE_ATTRIBUTES = { + "type_enum": None, + "page_enum": None, + "url": None + } def __init__(self, page_enum: SourcePages, url: str, id_: str = None, type_enum=None) -> None: DatabaseObject.__init__(self, id_=id_) diff --git a/src/music_kraken/objects/target.py b/src/music_kraken/objects/target.py index 895e367..3766eb0 100644 --- a/src/music_kraken/objects/target.py +++ b/src/music_kraken/objects/target.py @@ -1,5 +1,6 @@ from typing import Optional, List, Tuple from pathlib import Path +from collections import defaultdict from ..utils import shared from .parents import DatabaseObject @@ -14,7 +15,10 @@ class Target(DatabaseObject): ``` """ - SIMPLE_ATTRIBUTES = ("_file", "_path") + SIMPLE_ATTRIBUTES = { + "_file": None, + "_path": None + } COLLECTION_ATTRIBUTES = tuple() def __init__( diff --git a/src/musify_search.py b/src/musify_search.py index a6ed8ef..a95e172 100644 --- a/src/musify_search.py +++ b/src/musify_search.py @@ -12,9 +12,10 @@ def fetch_artist(): name="Ghost Bath", source_list=[objects.Source(objects.SourcePages.MUSIFY, "https://musify.club/artist/psychonaut-4-83193")] ) - + artist = Musify.fetch_details(artist) print(artist.options) + if __name__ == "__main__": fetch_artist() From 2724eb3e365483f9586b4ab96eeeb02c689ac545 Mon Sep 17 00:00:00 2001 From: Hellow Date: Sat, 18 Mar 2023 13:01:27 +0100 Subject: [PATCH 32/42] added fetching of linked sources to musify --- src/music_kraken/objects/song.py | 2 +- src/music_kraken/objects/source.py | 19 ++++++++++++++----- src/music_kraken/pages/musify.py | 26 ++++++++++++++++++++------ src/musify_search.py | 1 - 4 files changed, 35 insertions(+), 13 deletions(-) diff --git a/src/music_kraken/objects/song.py b/src/music_kraken/objects/song.py index 4169994..82a2cf0 100644 --- a/src/music_kraken/objects/song.py +++ b/src/music_kraken/objects/song.py @@ -377,7 +377,7 @@ All objects dependent on Artist class Artist(MainObject): - COLLECTION_ATTRIBUTES = ("feature_song_collection", "main_album_collection", "label_collection") + COLLECTION_ATTRIBUTES = ("feature_song_collection", "main_album_collection", "label_collection", "source_collection") SIMPLE_ATTRIBUTES = { "name": None, "unified_name": None, diff --git a/src/music_kraken/objects/source.py b/src/music_kraken/objects/source.py index 7213575..3214513 100644 --- a/src/music_kraken/objects/source.py +++ b/src/music_kraken/objects/source.py @@ -1,6 +1,7 @@ from collections import defaultdict from enum import Enum -from typing import List, Dict, Tuple +from typing import List, Dict, Tuple, Optional +from urllib.parse import urlparse from .metadata import Mapping, Metadata from .parents import DatabaseObject @@ -27,7 +28,8 @@ class SourcePages(Enum): # This has nothing to do with audio, but bands can be here INSTAGRAM = "instagram" FACEBOOK = "facebook" - TWITTER = "twitter" # I will use nitter though lol + TWITTER = "twitter" # I will use nitter though lol + MYSPACE = "myspace" # Yes somehow this ancient site is linked EVERYWHERE @classmethod def get_homepage(cls, attribute) -> str: @@ -42,7 +44,8 @@ class SourcePages(Enum): cls.INSTAGRAM: "https://www.instagram.com/", cls.FACEBOOK: "https://www.facebook.com/", cls.SPOTIFY: "https://open.spotify.com/", - cls.TWITTER: "https://twitter.com/" + cls.TWITTER: "https://twitter.com/", + cls.MYSPACE: "https://myspace.com/" } return homepage_map[attribute] @@ -71,11 +74,14 @@ class Source(DatabaseObject): self.url = url @classmethod - def match_url(cls, url: str): + def match_url(cls, url: str) -> Optional["Source"]: """ this shouldn't be used, unlesse you are not certain what the source is for the reason is that it is more inefficient """ + parsed = urlparse(url) + url = parsed.geturl() + if url.startswith("https://www.youtube"): return cls(SourcePages.YOUTUBE, url) @@ -101,6 +107,9 @@ class Source(DatabaseObject): if url.startswith("https://twitter"): return cls(SourcePages.TWITTER, url) + if url.startswith("https://myspace.com"): + return cls(SourcePages.MYSPACE, url) + def get_song_metadata(self) -> Metadata: return Metadata({ Mapping.FILE_WEBPAGE_URL: [self.url], @@ -157,4 +166,4 @@ class SourceCollection(Collection): getting the sources for a specific page like YouTube or musify """ - return self._page_to_source_list[source_page] + return self._page_to_source_list[source_page].copy() diff --git a/src/music_kraken/pages/musify.py b/src/music_kraken/pages/musify.py index 1dfe02f..6059495 100644 --- a/src/music_kraken/pages/musify.py +++ b/src/music_kraken/pages/musify.py @@ -102,7 +102,7 @@ class Musify(Page): @classmethod def parse_artist_contact(cls, contact: BeautifulSoup) -> Artist: source_list: List[Source] = [] - name = "" + name = None _id = None # source @@ -153,12 +153,15 @@ class Musify(Page): """ source_list: List[Source] = [] - title = "" + title = None _id = None year = None artist_list: List[Artist] = [] def parse_title_date(title_date: Optional[str], delimiter: str = " - "): + nonlocal year + nonlocal title + if title_date is None: return @@ -262,7 +265,7 @@ class Musify(Page): @classmethod def parse_playlist_item(cls, playlist_item_soup: BeautifulSoup) -> Song: _id = None - song_title = playlist_item_soup.get("data-name") or "" + song_title = playlist_item_soup.get("data-name") artist_list: List[Artist] = [] source_list: List[Source] = [] @@ -415,7 +418,7 @@ class Musify(Page): }) _id: Optional[str] = None - name: str = "" + name: str = None source_list: List[Source] = [] timestamp: Optional[ID3Timestamp] = None album_status = None @@ -586,7 +589,7 @@ class Musify(Page): """ - name = "" + name = None source_list: List[Source] = [] country = None notes: FormattedText = None @@ -625,6 +628,7 @@ class Musify(Page): if h1_name is not None: name = h1_name.get_text(strip=True) + # country and sources icon_list: BeautifulSoup = soup.find("ul", {"class": "icon-list"}) if icon_list is not None: country_italic: BeautifulSoup = icon_list.find("i", {"class", "flag-icon"}) @@ -646,10 +650,20 @@ class Musify(Page): country = pycountry.countries.get(alpha_2=list(country_set)[0]) + # get all additional sources + additional_source: BeautifulSoup + for additional_source in icon_list.find_all("a", {"class", "link"}): + href = additional_source.get("href") + if href is None: + continue + new_src = Source.match_url(href) + if new_src is None: + continue + source_list.append(new_src) + note_soup: BeautifulSoup = soup.find(id="text-main") if note_soup is not None: notes = FormattedText(html=note_soup.decode_contents()) - print(notes.plaintext) return Artist( _id=url.musify_id, diff --git a/src/musify_search.py b/src/musify_search.py index a95e172..bb80746 100644 --- a/src/musify_search.py +++ b/src/musify_search.py @@ -9,7 +9,6 @@ def search(): def fetch_artist(): artist = objects.Artist( - name="Ghost Bath", source_list=[objects.Source(objects.SourcePages.MUSIFY, "https://musify.club/artist/psychonaut-4-83193")] ) From 1ae01ed1fd19925c0829f825720a748896b00514 Mon Sep 17 00:00:00 2001 From: Hellow Date: Sat, 18 Mar 2023 17:06:12 +0100 Subject: [PATCH 33/42] added wikipedia --- src/music_kraken/objects/source.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/music_kraken/objects/source.py b/src/music_kraken/objects/source.py index 3214513..b48cb64 100644 --- a/src/music_kraken/objects/source.py +++ b/src/music_kraken/objects/source.py @@ -26,6 +26,7 @@ class SourcePages(Enum): SPOTIFY = "spotify" # This has nothing to do with audio, but bands can be here + WIKIPEDIA = "wikipedia" INSTAGRAM = "instagram" FACEBOOK = "facebook" TWITTER = "twitter" # I will use nitter though lol @@ -45,7 +46,8 @@ class SourcePages(Enum): cls.FACEBOOK: "https://www.facebook.com/", cls.SPOTIFY: "https://open.spotify.com/", cls.TWITTER: "https://twitter.com/", - cls.MYSPACE: "https://myspace.com/" + cls.MYSPACE: "https://myspace.com/", + cls.WIKIPEDIA: "https://en.wikipedia.org/wiki/Main_Page" } return homepage_map[attribute] @@ -94,6 +96,9 @@ class Source(DatabaseObject): if "bandcamp" in url: return cls(SourcePages.BANDCAMP, url) + if "wikipedia" in parsed.netloc: + return cls(SourcePages.WIKIPEDIA, url) + if url.startswith("https://www.metal-archives.com/"): return cls(SourcePages.ENCYCLOPAEDIA_METALLUM, url) From dc6a176c246b90d44432edfb695c3560ed76b273 Mon Sep 17 00:00:00 2001 From: Hellow2 Date: Mon, 20 Mar 2023 14:40:32 +0100 Subject: [PATCH 34/42] fixed DISGUSTING bug --- src/music_kraken/pages/abstract.py | 32 +++++++++++++++++- src/music_kraken/pages/musify.py | 53 ++++++++++-------------------- 2 files changed, 48 insertions(+), 37 deletions(-) diff --git a/src/music_kraken/pages/abstract.py b/src/music_kraken/pages/abstract.py index 6438129..83fae1a 100644 --- a/src/music_kraken/pages/abstract.py +++ b/src/music_kraken/pages/abstract.py @@ -14,7 +14,8 @@ from ..objects import ( Lyrics, Target, MusicObject, - Options + Options, + SourcePages ) @@ -28,6 +29,8 @@ class Page: API_SESSION.proxies = shared.proxies TIMEOUT = 5 TRIES = 5 + + SOURCE_TYPE: SourcePages @classmethod def get_request(cls, url: str, accepted_response_codes: set = set((200,)), trie: int = 0) -> Optional[ @@ -166,6 +169,10 @@ class Page: raise NotImplementedError(f"MusicObject {type(music_object)} has not been implemented yet") + @classmethod + def fetch_song_from_source(cls, source: Source, flat: bool = False) -> Song: + return Song() + @classmethod def fetch_song_details(cls, song: Song, flat: bool = False) -> Song: """ @@ -181,9 +188,18 @@ class Page: :return detailed_song: it modifies the input song """ + + source: Source + for source in song.source_collection.get_sources_from_page(cls.SOURCE_TYPE): + new_song = cls.fetch_song_from_source(source, flat) + song.merge(new_song) return song + @classmethod + def fetch_album_from_source(cls, source: Source, flat: bool = False) -> Album: + return Album() + @classmethod def fetch_album_details(cls, album: Album, flat: bool = False) -> Album: """ @@ -201,8 +217,17 @@ class Page: :return detailed_artist: it modifies the input artist """ + source: Source + for source in album.source_collection.get_sources_from_page(cls.SOURCE_TYPE): + new_album: Album = cls.fetch_album_from_source(source, flat) + album.merge(new_album) + return album + @classmethod + def fetch_artist_from_source(cls, source: Source, flat: bool = False) -> Artist: + return Artist() + @classmethod def fetch_artist_details(cls, artist: Artist, flat: bool = False) -> Artist: """ @@ -217,5 +242,10 @@ class Page: :return detailed_artist: it modifies the input artist """ + + source: Source + for source in artist.source_collection.get_sources_from_page(cls.SOURCE_TYPE): + new_artist: Artist = cls.fetch_artist_from_source(source, flat) + artist.merge(new_artist) return artist diff --git a/src/music_kraken/pages/musify.py b/src/music_kraken/pages/musify.py index 6059495..3e6a4e5 100644 --- a/src/music_kraken/pages/musify.py +++ b/src/music_kraken/pages/musify.py @@ -516,7 +516,7 @@ class Musify(Page): ) @classmethod - def get_discography(cls, url: MusifyUrl) -> List[Album]: + def get_discography(cls, url: MusifyUrl, flat=False) -> List[Album]: """ POST https://musify.club/artist/filteralbums ArtistID: 280348 @@ -539,7 +539,13 @@ class Musify(Page): discography: List[Album] = [] for card_soup in soup.find_all("div", {"class": "card"}): - discography.append(cls.parse_album_card(card_soup)) + new_album: Album = cls.parse_album_card(card_soup) + album_source: Source + if not flat: + for album_source in new_album.source_collection.get_sources_from_page(cls.SOURCE_TYPE): + new_album.merge(cls.get_album_from_source(album_source)) + + discography.append(new_album) return discography @@ -556,7 +562,7 @@ class Musify(Page): r = cls.get_request(f"https://musify.club/{url.source_type.value}/{url.name_with_id}?_pjax=#bodyContent") if r is None: - return Artist(_id=url.musify_id, name="") + return Artist(_id=url.musify_id) soup = BeautifulSoup(r.content, "html.parser") @@ -592,11 +598,11 @@ class Musify(Page): name = None source_list: List[Source] = [] country = None - notes: FormattedText = None + notes: FormattedText = FormattedText() breadcrumbs: BeautifulSoup = soup.find("ol", {"class": "breadcrumb"}) if breadcrumbs is not None: - breadcrumb_list: List[BeautifulSoup] = breadcrumbs.find_all("li", {"class": "breadcrumb"}, recursive=False) + breadcrumb_list: List[BeautifulSoup] = breadcrumbs.find_all("li", {"class": "breadcrumb-item"}, recursive=False) if len(breadcrumb_list) == 3: name = breadcrumb_list[-1].get_text(strip=True) else: @@ -624,7 +630,7 @@ class Musify(Page): content_title: BeautifulSoup = soup.find("header", {"class": "content__title"}) if content_title is not None: - h1_name: BeautifulSoup = soup.find("h1", recursive=False) + h1_name: BeautifulSoup = content_title.find("h1", recursive=False) if h1_name is not None: name = h1_name.get_text(strip=True) @@ -663,7 +669,7 @@ class Musify(Page): note_soup: BeautifulSoup = soup.find(id="text-main") if note_soup is not None: - notes = FormattedText(html=note_soup.decode_contents()) + notes.html = note_soup.decode_contents() return Artist( _id=url.musify_id, @@ -674,7 +680,7 @@ class Musify(Page): ) @classmethod - def get_artist_from_source(cls, source: Source, flat: bool = False) -> Artist: + def fetch_artist_from_source(cls, source: Source, flat: bool = False) -> Artist: """ fetches artist from source @@ -698,32 +704,7 @@ class Musify(Page): artist.main_album_collection.extend(discography) return artist - + @classmethod - def fetch_artist_details(cls, artist: Artist, flat: bool = False) -> Artist: - source_list = artist.source_collection.get_sources_from_page(cls.SOURCE_TYPE) - if len(source_list) == 0: - return artist - - for source in source_list: - artist.merge(cls.get_artist_from_source(source, flat=flat)) - - return artist - - @classmethod - def fetch_album_details(cls, album: Album, flat: bool = False) -> Album: - - return album - - @classmethod - def fetch_song_details(cls, song: Song, flat: bool = False) -> Song: - source_list = song.source_collection.get_sources_from_page(cls.SOURCE_TYPE) - if len(source_list) == 0: - return song - - """ - TODO - lyrics - """ - - return song + def get_album_from_source(cls, source: Source, flat: bool = False) -> Album: + return Album() From 718c911c81ab0b9a6ee9c14e62d7adf9731d07c1 Mon Sep 17 00:00:00 2001 From: Hellow2 Date: Mon, 20 Mar 2023 15:33:45 +0100 Subject: [PATCH 35/42] parsed names to fix a bug --- src/music_kraken/pages/musify.py | 33 ++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/src/music_kraken/pages/musify.py b/src/music_kraken/pages/musify.py index 3e6a4e5..af3fdcb 100644 --- a/src/music_kraken/pages/musify.py +++ b/src/music_kraken/pages/musify.py @@ -377,7 +377,7 @@ class Musify(Page): ) @classmethod - def parse_album_card(cls, album_card: BeautifulSoup) -> Album: + def parse_album_card(cls, album_card: BeautifulSoup, artist_name: str = None) -> Album: """
@@ -423,6 +423,27 @@ class Musify(Page): timestamp: Optional[ID3Timestamp] = None album_status = None + def set_name(new_name: str): + nonlocal name + nonlocal artist_name + + # example of just setting not working: https://musify.club/release/unjoy-eurythmie-psychonaut-4-tired-numb-still-alive-2012-324067 + if new_name.count(" - ") != 1: + name = new_name + return + + potential_artist_list, potential_name = new_name.split(" - ") + unified_artist_list = string_processing.unify(potential_artist_list) + if artist_name is not None: + if string_processing.unify(artist_name) not in unified_artist_list: + name = new_name + return + + name = potential_name + return + + name = new_name + album_status_id = album_card.get("data-type") if album_status_id.isdigit(): album_status_id = int(album_status_id) @@ -455,7 +476,7 @@ class Musify(Page): if not text_is_name: return - name = _anchor.text + set_name(_anchor.text) anchor_list = album_card.find_all("a", recursive=False) if len(anchor_list) > 0: @@ -466,7 +487,7 @@ class Musify(Page): if thumbnail is not None: alt = thumbnail.get("alt") if alt is not None: - name = alt + set_name(alt) image_url = thumbnail.get("src") else: @@ -516,7 +537,7 @@ class Musify(Page): ) @classmethod - def get_discography(cls, url: MusifyUrl, flat=False) -> List[Album]: + def get_discography(cls, url: MusifyUrl, artist_name: str = None, flat=False) -> List[Album]: """ POST https://musify.club/artist/filteralbums ArtistID: 280348 @@ -539,7 +560,7 @@ class Musify(Page): discography: List[Album] = [] for card_soup in soup.find_all("div", {"class": "card"}): - new_album: Album = cls.parse_album_card(card_soup) + new_album: Album = cls.parse_album_card(card_soup, artist_name) album_source: Source if not flat: for album_source in new_album.source_collection.get_sources_from_page(cls.SOURCE_TYPE): @@ -700,7 +721,7 @@ class Musify(Page): artist = cls.get_artist_attributes(url) - discography: List[Album] = cls.get_discography(url) + discography: List[Album] = cls.get_discography(url, artist.name) artist.main_album_collection.extend(discography) return artist From 96f2d7ef6ebcbe3120a3e5d6254bcc4d77400658 Mon Sep 17 00:00:00 2001 From: Hellow2 Date: Mon, 20 Mar 2023 17:03:14 +0100 Subject: [PATCH 36/42] started fetching of album details --- .../pages/html/musify/album_overview.html | 1193 +++++++++++++++++ src/music_kraken/pages/musify.py | 16 +- src/musify_search.py | 10 +- 3 files changed, 1215 insertions(+), 4 deletions(-) create mode 100644 src/music_kraken/pages/html/musify/album_overview.html diff --git a/src/music_kraken/pages/html/musify/album_overview.html b/src/music_kraken/pages/html/musify/album_overview.html new file mode 100644 index 0000000..f64d89f --- /dev/null +++ b/src/music_kraken/pages/html/musify/album_overview.html @@ -0,0 +1,1193 @@ + + + Hybrid Theory (2000) - Linkin Park скачать в mp3 бесплатно | слушать альбом целиком онлайн на портале Musify + + + + + + + + + + + + + + + + +
+
+
+
+ + + + + + + + + + + + +
+

Linkin Park - Hybrid Theory (2000)

+
+ +
+
+ + +
+
+ Hybrid Theory + + + Скачать альбом + +
+ +
+ + +

+ #Alternative Metal + #Nu-Metal + #Rap-Rock +

+ +

+

Hybrid Theory (в пер с. англ. Теория гибридов) — дебютный альбом американской рок-группы Linkin Park. Релиз состоялся 24 октября 2000 года лейблом Warner Bros. Диск имел огромный коммерческий успех. Только в Соединённых Штатах по состоянию на 2012 год было продано свыше 10 млн экземпляров. Альбом поднялся на вторую позицию в Billboard 200 и, несмотря на смешанные отзывы критиков, он разошёлся в 24 млн экземпляров по всему миру. Hybrid Theory стал самым продаваемым альбомом группы и занял седьмое место среди самых продаваемых альбомов десятилетия.
В 2002 Linkin Park получили премию «Грэмми» за лучшее хард-рок исполнение песни «Crawling», также Hybrid Theory был номинирован на «Грэмми» как «Лучший рок-альбом». Специальное издание Hybrid Theory было выпущено 11 марта 2002, через два года после выхода оригинала.
Предыстория
Группа Linkin Park сформировалась в 1996 в Южной Калифорнии. SuperXero было первым названием группы (позднее сменилось на Xero), в состав которой входили Майк Шинода, его школьные друзья Брэд Делсон и Роб Бурдон, а также сосед Делсона по комнате в университетском общежитии Дэвид Фаррелл и колледжский друг Шиноды Джо Хан. Позже к ним присоединился Марк Уэйкфилд. После отказов многих лейблов от контракта, Уэйкфилд покинул группу, решив, что у Xero нет будущего. В то же время Фаррелл оставит группу, чтобы отправиться в гастрольный тур с другой своей группой Tasty Snax.
Xero занялись поиском нового вокалиста: они отослали свою запись Джеффу Блю из Zomba Music, работавшему с ними до этого, и попросили отослать её кому-нибудь. 20 марта 1999 года, когда Блю позвонил в Аризону бывшему вокалисту группы Grey Daze Честеру Беннингтону, чтобы рассказать ему о Xero, тот праздновал свой 23-й день рождения. На следующий день, получив запись, Беннингтон тут же отправился в студию, записал, смонтировал вокал и сразу перезвонил Джеффу. Он прокрутил ему и музыкантам кассету с записью по телефону. Его голос произвел на них сильное впечатление, он тут же был приглашен на прослушивание, после чего был принят в группу. Секстет изменил свое название на Hybrid Theory и выпустил одноименный мини-альбом. Для того, чтобы избежать судебных разбирательств с другой группой, они меняют свое название на Lincoln Park. А чтобы приобрести своё доменное имя, группа меняет название Lincoln Park на Linkin Park.
Содержание
Музыка Hybrid Theory содержит различные вдохновения. Стиль пения Беннингтона находится под влиянием таких исполнителей, как Depeche Mode и Stone Temple Pilots, а риффы и техника игры гитариста Брэда Делсона вдохновлены Deftones, Guns N' Roses, U2 и The Smiths. Рэп Майка Шиноды, который присутствует на семи треках, очень близок к стилю The Roots. Лирическое содержание песен прежде всего затрагивает проблемы Беннингтона, возникающие во время его отрочества, включая жестокое обращение, постоянное и чрезмерное злоупотребление наркотиков и алкоголя, развод его родителей, изоляция, разочарования и чувства последствия неудачных отношений.
«Points of Authority», четвёртый трек в альбоме, имеет свой видеоклип, который можно найти на Frat Party at the Pankake Festival, первом DVD группы. Барабанщик Роб Бурдон описывает процесс записи песни: «Брэд написал этот рифф, затем пошел домой. Майк решил разрезать его на различные части и переставить их на компьютере. Брэду в свою очередь пришлось узнать свою собственную часть с компьютера». Что касается песни, Делсон похвалил мастерство Шиноды, охарактеризовав его как «гения» и «талантливого Трента Резнора».
+ + Развернуть + +

+
+
+ +
+
+

Песни в альбоме
Linkin Park - Hybrid Theory (2000)

+
+
+
+ + +
+
+ 1 +
+
+
+Linkin Park - Papercut + + + +
+
+
+ 326,3K +
+
+ 03:05 + 320 Кб/с +
+
+ + +
+
+ + + +
+
+
+
+ + +
+
+ 2 +
+
+ +
+
+ 287,9K +
+
+ 02:36 + 320 Кб/с +
+
+ + +
+
+ + + +
+
+
+
+ + +
+
+ 3 +
+
+
+Linkin Park - With You + + + +
+
+
+ 232,8K +
+
+ 03:23 + 320 Кб/с +
+
+ + +
+
+ + + +
+
+
+
+ + +
+
+ 4 +
+ +
+ 147K +
+
+ 03:20 + 320 Кб/с +
+
+ + +
+
+ + + +
+
+
+
+ + +
+
+ 5 +
+
+
+Linkin Park - Crawling + + + +
+
+
+ 230,9K +
+
+ 03:29 + 320 Кб/с +
+
+ + +
+
+ + + +
+
+
+
+ + +
+
+ 6 +
+
+
+Linkin Park - Runaway + + + +
+
+
+ 164,8K +
+
+ 03:04 + 320 Кб/с +
+
+ + +
+
+ + + +
+
+
+
+ + +
+
+ 7 +
+
+
+Linkin Park - By Myself + + + +
+
+
+ 148,6K +
+
+ 03:10 + 320 Кб/с +
+
+ + +
+
+ + + +
+
+
+
+ + +
+
+ 8 +
+
+
+Linkin Park - In The End + + + +
+
+
+ 832,4K +
+
+ 03:36 + 320 Кб/с +
+
+ + +
+
+ + + +
+
+
+
+ + +
+
+ 9 +
+ +
+ 127,3K +
+
+ 03:05 + 320 Кб/с +
+
+ + +
+
+ + + +
+
+
+
+ + +
+
+ 10 +
+
+
+Linkin Park - Forgotten + + + +
+
+
+ 112,6K +
+
+ 03:15 + 320 Кб/с +
+
+ + +
+
+ + + +
+
+
+
+ + +
+
+ 11 +
+ +
+ 76,6K +
+
+ 02:37 + 320 Кб/с +
+
+ + +
+
+ + + +
+
+
+
+ + +
+
+ 12 +
+
+ +
+
+ 98,8K +
+
+ 03:12 + 320 Кб/с +
+
+ + +
+
+ + + +
+
+
+
+ + +
+
+ 13 +
+
+ +
+
+ 121,2K +
+
+ 04:21 + 320 Кб/с +
+
+ + +
+
+ + + +
+
+
+
+ + +
+
+ 14 +
+
+ +
+
+ 80,8K +
+
+ 03:45 + 320 Кб/с +
+
+ + +
+
+ + + +
+
+
+
+ + +
+
+ 15 +
+
+ +
+
+ 18,7K +
+
+ 03:13 + 320 Кб/с +
+
+ + +
+
+ + + +
+
+
+
+ + +
+
+ 16 +
+ +
+ 13,4K +
+
+ 03:11 + 320 Кб/с +
+
+ + +
+
+ + + +
+
+
+
+ + +
+
+ 17 +
+ +
+ 11K +
+
+ 03:30 + 320 Кб/с +
+
+ + +
+
+ + + +
+
+
+
+ + +
+
+ 18 +
+ +
+ 9,9K +
+
+ 03:38 + 320 Кб/с +
+
+ + +
+
+ + + +
+
+
+
+ + +
+
+ 19 +
+ +
+ 13,2K +
+
+ 04:57 + 320 Кб/с +
+
+ + +
+
+ + + +
+
+
+
+ + +
+
+ 20 +
+ +
+ 9,7K +
+
+ 03:51 + 320 Кб/с +
+
+ + +
+
+ + + +
+
+
+
+
+ + + + + + Залогиньтесь для того чтобы проголосовать за альбом + + +

Всего голосов: 784

+ + +
+
+

Другие альбомы исполнителя Linkin Park

+
+
+ + Wastelands + + + + + + + +
+ + LPU10 + + + + + + + +
+ + Demos + + + + + + + +
+
+
+ + +
+
+
+
+
Комментарии
+
+
+ + +
+ +
+ +
+
+ Харамаки Зоро +
+
+ Харамаки Зоро +
+

Весьма симпатишно.
Сочетание ярости с лиризмом и грустью.
Кажется логичным продолжением RATM, правда не такое качевое.

+
+
+
+ germanost +
+
+ germanost +
+

Хи, хи хардер чего овцой прикидываешься.. Смущаешь юные умы..

+
+
+
+ TradeShark1 +
+
+ TradeShark1 +
+

Я бы рекомендовал бы вам послушать Дельфина, Токио, Красная плесень, они не далеко от Линкин Парк, только интереснее, изобретательнее, драйвовее, мелодичнее. Ну ещё Bestie Boys. Они все намного круче. Потом спасибо мне скажите 😉

+
+
+
+ TradeShark1 +
+
+ TradeShark1 +
+

Послушал, слабенькая группа, одну мелодию на весь альбом. Красная плесень намного круче, у них хотя бы разнообразие в музыке, много красивых мелодий, сотни красивых песен. Такова реальность. Думаю фанатам Линкин Парк зайдёт Красная плесень в таком же стиле, но более мелодично

+
+
+
+ way +
+
+ way +
+

Лучший их альбом и нет лучше. Все песни тут огонь!

+
+
+
+ Dungeon Freestyler +
+
+ Dungeon Freestyler +
+

Не зацепили только High Voltage, Pushing Me Away, Cure For The Itch. Остальное просто улет!

+
+
+
+ goreThrasher +
+
+ goreThrasher +
+

}{уйня какая-то

+
+
+
+ Владислав Владислав +
+
+ Владислав Владислав +
+

Самый лучший альбом Линкинов!
Лучшие треки:
Papercut
One Step Closer
Crawling
Runaway
In the End
Pushing Me Away

+
+
+
+ insane Hwo +
+
+ insane Hwo +
+

В детстве казались намного тяжелее. Сейчас слушается как легенькая поп-музыка.

+
+
+
+ Hi123 +
+
+ Hi123 +
+

Этот альбом шедевр на все времена!

+
+
+ +
+
+
+ Loading... +
+
+ +
+
+
+
+ +
+
+
+

© Musify - 2023

+ +

+ ООО «АдвМьюзик» заключил лицензионные соглашения с крупнейшими российскими правообладателями на использование музыкальных произведений. Полная информация +

+
+
+
+ +
+
+
+ + + diff --git a/src/music_kraken/pages/musify.py b/src/music_kraken/pages/musify.py index af3fdcb..abdd774 100644 --- a/src/music_kraken/pages/musify.py +++ b/src/music_kraken/pages/musify.py @@ -564,7 +564,7 @@ class Musify(Page): album_source: Source if not flat: for album_source in new_album.source_collection.get_sources_from_page(cls.SOURCE_TYPE): - new_album.merge(cls.get_album_from_source(album_source)) + new_album.merge(cls.fetch_album_from_source(album_source)) discography.append(new_album) @@ -727,5 +727,15 @@ class Musify(Page): return artist @classmethod - def get_album_from_source(cls, source: Source, flat: bool = False) -> Album: - return Album() + def fetch_album_from_source(cls, source: Source, flat: bool = False) -> Album: + """_summary_ + + Args: + source (Source): _description_ + flat (bool, optional): _description_. Defaults to False. + + Returns: + Album: _description_ + """ + + return Album(title="works") diff --git a/src/musify_search.py b/src/musify_search.py index bb80746..6664fd5 100644 --- a/src/musify_search.py +++ b/src/musify_search.py @@ -15,6 +15,14 @@ def fetch_artist(): artist = Musify.fetch_details(artist) print(artist.options) +def fetch_album(): + album = objects.Album( + source_list=[objects.Source(objects.SourcePages.MUSIFY, "https://musify.club/release/linkin-park-hybrid-theory-2000-188")] + ) + + album = Musify.fetch_details(album) + print(album.options) + if __name__ == "__main__": - fetch_artist() + fetch_album() From f39c3c11ad10188651a638fec691d1f4f29f0900 Mon Sep 17 00:00:00 2001 From: Hellow Date: Mon, 20 Mar 2023 21:50:19 +0100 Subject: [PATCH 37/42] layed out the complete fetching of album --- .../pages/html/musify/album_overview.html | 693 ++++-------------- src/music_kraken/pages/musify.py | 76 +- src/musify_search.py | 6 +- 3 files changed, 207 insertions(+), 568 deletions(-) diff --git a/src/music_kraken/pages/html/musify/album_overview.html b/src/music_kraken/pages/html/musify/album_overview.html index f64d89f..4691b11 100644 --- a/src/music_kraken/pages/html/musify/album_overview.html +++ b/src/music_kraken/pages/html/musify/album_overview.html @@ -21,154 +21,159 @@ soup.find("div", {"id": "bodyContent"}) --> - -
-
-
-
- +
+
+ + + + - - - - + + + - - - - - -
-

Linkin Park - Hybrid Theory (2000)

-
-
+ + +
+
+ Hybrid Theory + + + Скачать альбом + +
+ +
+ + +

+ #Alternative Metal + #Nu-Metal + #Rap-Rock +

+ +

+

Hybrid Theory (в пер с. англ. Теория гибридов) — дебютный альбом американской рок-группы Linkin Park. Релиз состоялся 24 октября 2000 года лейблом Warner Bros. Диск имел огромный коммерческий успех. Только в Соединённых Штатах по состоянию на 2012 год было продано свыше 10 млн экземпляров. Альбом поднялся на вторую позицию в Billboard 200 и, несмотря на смешанные отзывы критиков, он разошёлся в 24 млн экземпляров по всему миру. Hybrid Theory стал самым продаваемым альбомом группы и занял седьмое место среди самых продаваемых альбомов десятилетия.
В 2002 Linkin Park получили премию «Грэмми» за лучшее хард-рок исполнение песни «Crawling», также Hybrid Theory был номинирован на «Грэмми» как «Лучший рок-альбом». Специальное издание Hybrid Theory было выпущено 11 марта 2002, через два года после выхода оригинала.
Предыстория
Группа Linkin Park сформировалась в 1996 в Южной Калифорнии. SuperXero было первым названием группы (позднее сменилось на Xero), в состав которой входили Майк Шинода, его школьные друзья Брэд Делсон и Роб Бурдон, а также сосед Делсона по комнате в университетском общежитии Дэвид Фаррелл и колледжский друг Шиноды Джо Хан. Позже к ним присоединился Марк Уэйкфилд. После отказов многих лейблов от контракта, Уэйкфилд покинул группу, решив, что у Xero нет будущего. В то же время Фаррелл оставит группу, чтобы отправиться в гастрольный тур с другой своей группой Tasty Snax.
Xero занялись поиском нового вокалиста: они отослали свою запись Джеффу Блю из Zomba Music, работавшему с ними до этого, и попросили отослать её кому-нибудь. 20 марта 1999 года, когда Блю позвонил в Аризону бывшему вокалисту группы Grey Daze Честеру Беннингтону, чтобы рассказать ему о Xero, тот праздновал свой 23-й день рождения. На следующий день, получив запись, Беннингтон тут же отправился в студию, записал, смонтировал вокал и сразу перезвонил Джеффу. Он прокрутил ему и музыкантам кассету с записью по телефону. Его голос произвел на них сильное впечатление, он тут же был приглашен на прослушивание, после чего был принят в группу. Секстет изменил свое название на Hybrid Theory и выпустил одноименный мини-альбом. Для того, чтобы избежать судебных разбирательств с другой группой, они меняют свое название на Lincoln Park. А чтобы приобрести своё доменное имя, группа меняет название Lincoln Park на Linkin Park.
Содержание
Музыка Hybrid Theory содержит различные вдохновения. Стиль пения Беннингтона находится под влиянием таких исполнителей, как Depeche Mode и Stone Temple Pilots, а риффы и техника игры гитариста Брэда Делсона вдохновлены Deftones, Guns N' Roses, U2 и The Smiths. Рэп Майка Шиноды, который присутствует на семи треках, очень близок к стилю The Roots. Лирическое содержание песен прежде всего затрагивает проблемы Беннингтона, возникающие во время его отрочества, включая жестокое обращение, постоянное и чрезмерное злоупотребление наркотиков и алкоголя, развод его родителей, изоляция, разочарования и чувства последствия неудачных отношений.
«Points of Authority», четвёртый трек в альбоме, имеет свой видеоклип, который можно найти на Frat Party at the Pankake Festival, первом DVD группы. Барабанщик Роб Бурдон описывает процесс записи песни: «Брэд написал этот рифф, затем пошел домой. Майк решил разрезать его на различные части и переставить их на компьютере. Брэду в свою очередь пришлось узнать свою собственную часть с компьютера». Что касается песни, Делсон похвалил мастерство Шиноды, охарактеризовав его как «гения» и «талантливого Трента Резнора».
+ + Развернуть + +

+
+

Песни в альбоме
Linkin Park - Hybrid Theory (2000)

-
-
+
+
-
+
1
-Linkin Park - Papercut - - - + Linkin Park - Papercut + + + +
-
- 326,3K -
-
- 03:05 - 320 Кб/с +
+
+ + + 326,3K +
-
- - -
-
+
+
+ 03:05 + 320 Кб/с +
+
+ + +
+
- - -
+ + + +
+ + +
@@ -199,7 +204,7 @@
- +
@@ -232,7 +237,7 @@
- +
@@ -265,7 +270,7 @@
- +
@@ -298,7 +303,7 @@
- +
@@ -331,7 +336,7 @@
- +
@@ -364,7 +369,7 @@
- +
@@ -397,7 +402,7 @@
- +
@@ -430,7 +435,7 @@
- +
@@ -463,7 +468,7 @@
- +
@@ -496,7 +501,7 @@
- +
@@ -529,7 +534,7 @@
- +
@@ -562,7 +567,7 @@
- +
@@ -595,7 +600,7 @@
- +
@@ -628,7 +633,7 @@
- +
@@ -656,12 +661,12 @@
- +
- +
@@ -694,7 +699,7 @@
- +
@@ -727,7 +732,7 @@
- +
@@ -755,12 +760,12 @@
- +
- +
@@ -793,401 +798,13 @@
- +
+
-
- - - - - - Залогиньтесь для того чтобы проголосовать за альбом - - -

Всего голосов: 784

- - -
-
-

Другие альбомы исполнителя Linkin Park

-
-
- - Wastelands - - - - - - - -
- - LPU10 - - - - - - - -
- - Demos - - - - - - - -
-
-
- - -
-
-
-
-
Комментарии
-
-
- - -
- -
- -
-
- Харамаки Зоро -
-
- Харамаки Зоро -
-

Весьма симпатишно.
Сочетание ярости с лиризмом и грустью.
Кажется логичным продолжением RATM, правда не такое качевое.

-
-
-
- germanost -
-
- germanost -
-

Хи, хи хардер чего овцой прикидываешься.. Смущаешь юные умы..

-
-
-
- TradeShark1 -
-
- TradeShark1 -
-

Я бы рекомендовал бы вам послушать Дельфина, Токио, Красная плесень, они не далеко от Линкин Парк, только интереснее, изобретательнее, драйвовее, мелодичнее. Ну ещё Bestie Boys. Они все намного круче. Потом спасибо мне скажите 😉

-
-
-
- TradeShark1 -
-
- TradeShark1 -
-

Послушал, слабенькая группа, одну мелодию на весь альбом. Красная плесень намного круче, у них хотя бы разнообразие в музыке, много красивых мелодий, сотни красивых песен. Такова реальность. Думаю фанатам Линкин Парк зайдёт Красная плесень в таком же стиле, но более мелодично

-
-
-
- way -
-
- way -
-

Лучший их альбом и нет лучше. Все песни тут огонь!

-
-
-
- Dungeon Freestyler -
-
- Dungeon Freestyler -
-

Не зацепили только High Voltage, Pushing Me Away, Cure For The Itch. Остальное просто улет!

-
-
-
- goreThrasher -
-
- goreThrasher -
-

}{уйня какая-то

-
-
-
- Владислав Владислав -
-
- Владислав Владислав -
-

Самый лучший альбом Линкинов!
Лучшие треки:
Papercut
One Step Closer
Crawling
Runaway
In the End
Pushing Me Away

-
-
-
- insane Hwo -
-
- insane Hwo -
-

В детстве казались намного тяжелее. Сейчас слушается как легенькая поп-музыка.

-
-
-
- Hi123 -
-
- Hi123 -
-

Этот альбом шедевр на все времена!

-
-
- -
-
-
- Loading... -
-
- -
-
-
-
- -
-
-
-

© Musify - 2023

- -

- ООО «АдвМьюзик» заключил лицензионные соглашения с крупнейшими российскими правообладателями на использование музыкальных произведений. Полная информация -

-
-
-
- -
-
-
- +
+
- diff --git a/src/music_kraken/pages/musify.py b/src/music_kraken/pages/musify.py index abdd774..b6467f1 100644 --- a/src/music_kraken/pages/musify.py +++ b/src/music_kraken/pages/musify.py @@ -60,6 +60,7 @@ X-Requested-With: XMLHttpRequest class MusifyTypes(Enum): ARTIST = "artist" + RELEASE = "release" @dataclass @@ -85,6 +86,30 @@ class Musify(Page): SOURCE_TYPE = SourcePages.MUSIFY + @classmethod + def parse_url(cls, url: str) -> MusifyUrl: + parsed = urlparse(url) + + path = parsed.path.split("/") + + split_name = path[2].split("-") + url_id = split_name[-1] + name_for_url = "-".join(split_name[:-1]) + + try: + type_enum = MusifyTypes(path[1]) + except ValueError as e: + print(f"{path[1]} is not yet implemented, add it to MusifyTypes") + raise e + + return MusifyUrl( + source_type=type_enum, + name_without_id=name_for_url, + name_with_id=path[2], + musify_id=url_id, + url=url + ) + @classmethod def search_by_query(cls, query: str) -> Options: query_obj = cls.Query(query) @@ -358,24 +383,6 @@ class Musify(Page): return Options(search_results) - @classmethod - def parse_url(cls, url: str) -> MusifyUrl: - parsed = urlparse(url) - - path = parsed.path.split("/") - - split_name = path[2].split("-") - url_id = split_name[-1] - name_for_url = "-".join(split_name[:-1]) - - return MusifyUrl( - source_type=MusifyTypes(path[1]), - name_without_id=name_for_url, - name_with_id=path[2], - musify_id=url_id, - url=url - ) - @classmethod def parse_album_card(cls, album_card: BeautifulSoup, artist_name: str = None) -> Album: """ @@ -706,7 +713,7 @@ class Musify(Page): fetches artist from source [x] discography - [x] attributes *(name and country... wooooow and I waste one request for this)* + [x] attributes [] picture gallery Args: @@ -728,14 +735,27 @@ class Musify(Page): @classmethod def fetch_album_from_source(cls, source: Source, flat: bool = False) -> Album: - """_summary_ - - Args: - source (Source): _description_ - flat (bool, optional): _description_. Defaults to False. - - Returns: - Album: _description_ """ - + fetches album from source: + eg. 'https://musify.club/release/linkin-park-hybrid-theory-2000-188' + + /html/musify/album_overview.html + [] tracklist + [] attributes *(name and country... wooooow and I waste one request for this)* + [] ratings + + :param source: + :param flat: + :return: + """ + url = cls.parse_url(source.url) + + endpoint = cls.HOST + "/release/" + url.name_with_id + r = cls.get_request(endpoint) + if r is None: + return Album() + + soup = BeautifulSoup(r.content, "html.parser") + + return Album(title="works") diff --git a/src/musify_search.py b/src/musify_search.py index 6664fd5..1dbdb68 100644 --- a/src/musify_search.py +++ b/src/musify_search.py @@ -15,11 +15,13 @@ def fetch_artist(): artist = Musify.fetch_details(artist) print(artist.options) + def fetch_album(): album = objects.Album( - source_list=[objects.Source(objects.SourcePages.MUSIFY, "https://musify.club/release/linkin-park-hybrid-theory-2000-188")] + source_list=[objects.Source(objects.SourcePages.MUSIFY, + "https://musify.club/release/linkin-park-hybrid-theory-2000-188")] ) - + album = Musify.fetch_details(album) print(album.options) From 6f7763ada508e3c87f6125d783aaa2ef781cea50 Mon Sep 17 00:00:00 2001 From: Hellow Date: Mon, 20 Mar 2023 22:27:05 +0100 Subject: [PATCH 38/42] sdfa --- .../pages/html/musify/album_overview.html | 93 ++++++++++--------- src/music_kraken/pages/musify.py | 84 ++++++++++++++++- 2 files changed, 129 insertions(+), 48 deletions(-) diff --git a/src/music_kraken/pages/html/musify/album_overview.html b/src/music_kraken/pages/html/musify/album_overview.html index 4691b11..c023354 100644 --- a/src/music_kraken/pages/html/musify/album_overview.html +++ b/src/music_kraken/pages/html/musify/album_overview.html @@ -89,56 +89,59 @@ Скачать альбом +
+ +
+ + +

+ #Alternative Metal + #Nu-Metal + #Rap-Rock +

+ +

+

Hybrid Theory (в пер с. англ. Теория гибридов) — дебютный альбом американской рок-группы Linkin Park. Релиз состоялся 24 октября 2000 года лейблом Warner Bros. Диск имел огромный коммерческий успех. Только в Соединённых Штатах по состоянию на 2012 год было продано свыше 10 млн экземпляров. Альбом поднялся на вторую позицию в Billboard 200 и, несмотря на смешанные отзывы критиков, он разошёлся в 24 млн экземпляров по всему миру. Hybrid Theory стал самым продаваемым альбомом группы и занял седьмое место среди самых продаваемых альбомов десятилетия.
В 2002 Linkin Park получили премию «Грэмми» за лучшее хард-рок исполнение песни «Crawling», также Hybrid Theory был номинирован на «Грэмми» как «Лучший рок-альбом». Специальное издание Hybrid Theory было выпущено 11 марта 2002, через два года после выхода оригинала.
Предыстория
Группа Linkin Park сформировалась в 1996 в Южной Калифорнии. SuperXero было первым названием группы (позднее сменилось на Xero), в состав которой входили Майк Шинода, его школьные друзья Брэд Делсон и Роб Бурдон, а также сосед Делсона по комнате в университетском общежитии Дэвид Фаррелл и колледжский друг Шиноды Джо Хан. Позже к ним присоединился Марк Уэйкфилд. После отказов многих лейблов от контракта, Уэйкфилд покинул группу, решив, что у Xero нет будущего. В то же время Фаррелл оставит группу, чтобы отправиться в гастрольный тур с другой своей группой Tasty Snax.
Xero занялись поиском нового вокалиста: они отослали свою запись Джеффу Блю из Zomba Music, работавшему с ними до этого, и попросили отослать её кому-нибудь. 20 марта 1999 года, когда Блю позвонил в Аризону бывшему вокалисту группы Grey Daze Честеру Беннингтону, чтобы рассказать ему о Xero, тот праздновал свой 23-й день рождения. На следующий день, получив запись, Беннингтон тут же отправился в студию, записал, смонтировал вокал и сразу перезвонил Джеффу. Он прокрутил ему и музыкантам кассету с записью по телефону. Его голос произвел на них сильное впечатление, он тут же был приглашен на прослушивание, после чего был принят в группу. Секстет изменил свое название на Hybrid Theory и выпустил одноименный мини-альбом. Для того, чтобы избежать судебных разбирательств с другой группой, они меняют свое название на Lincoln Park. А чтобы приобрести своё доменное имя, группа меняет название Lincoln Park на Linkin Park.
Содержание
Музыка Hybrid Theory содержит различные вдохновения. Стиль пения Беннингтона находится под влиянием таких исполнителей, как Depeche Mode и Stone Temple Pilots, а риффы и техника игры гитариста Брэда Делсона вдохновлены Deftones, Guns N' Roses, U2 и The Smiths. Рэп Майка Шиноды, который присутствует на семи треках, очень близок к стилю The Roots. Лирическое содержание песен прежде всего затрагивает проблемы Беннингтона, возникающие во время его отрочества, включая жестокое обращение, постоянное и чрезмерное злоупотребление наркотиков и алкоголя, развод его родителей, изоляция, разочарования и чувства последствия неудачных отношений.
«Points of Authority», четвёртый трек в альбоме, имеет свой видеоклип, который можно найти на Frat Party at the Pankake Festival, первом DVD группы. Барабанщик Роб Бурдон описывает процесс записи песни: «Брэд написал этот рифф, затем пошел домой. Майк решил разрезать его на различные части и переставить их на компьютере. Брэду в свою очередь пришлось узнать свою собственную часть с компьютера». Что касается песни, Делсон похвалил мастерство Шиноды, охарактеризовав его как «гения» и «талантливого Трента Резнора».
+ + Развернуть + +

+
-
- -

- #Alternative Metal - #Nu-Metal - #Rap-Rock -

+
+

Песни в альбоме
Linkin Park - Hybrid Theory (2000)

+
-

-

Hybrid Theory (в пер с. англ. Теория гибридов) — дебютный альбом американской рок-группы Linkin Park. Релиз состоялся 24 октября 2000 года лейблом Warner Bros. Диск имел огромный коммерческий успех. Только в Соединённых Штатах по состоянию на 2012 год было продано свыше 10 млн экземпляров. Альбом поднялся на вторую позицию в Billboard 200 и, несмотря на смешанные отзывы критиков, он разошёлся в 24 млн экземпляров по всему миру. Hybrid Theory стал самым продаваемым альбомом группы и занял седьмое место среди самых продаваемых альбомов десятилетия.
В 2002 Linkin Park получили премию «Грэмми» за лучшее хард-рок исполнение песни «Crawling», также Hybrid Theory был номинирован на «Грэмми» как «Лучший рок-альбом». Специальное издание Hybrid Theory было выпущено 11 марта 2002, через два года после выхода оригинала.
Предыстория
Группа Linkin Park сформировалась в 1996 в Южной Калифорнии. SuperXero было первым названием группы (позднее сменилось на Xero), в состав которой входили Майк Шинода, его школьные друзья Брэд Делсон и Роб Бурдон, а также сосед Делсона по комнате в университетском общежитии Дэвид Фаррелл и колледжский друг Шиноды Джо Хан. Позже к ним присоединился Марк Уэйкфилд. После отказов многих лейблов от контракта, Уэйкфилд покинул группу, решив, что у Xero нет будущего. В то же время Фаррелл оставит группу, чтобы отправиться в гастрольный тур с другой своей группой Tasty Snax.
Xero занялись поиском нового вокалиста: они отослали свою запись Джеффу Блю из Zomba Music, работавшему с ними до этого, и попросили отослать её кому-нибудь. 20 марта 1999 года, когда Блю позвонил в Аризону бывшему вокалисту группы Grey Daze Честеру Беннингтону, чтобы рассказать ему о Xero, тот праздновал свой 23-й день рождения. На следующий день, получив запись, Беннингтон тут же отправился в студию, записал, смонтировал вокал и сразу перезвонил Джеффу. Он прокрутил ему и музыкантам кассету с записью по телефону. Его голос произвел на них сильное впечатление, он тут же был приглашен на прослушивание, после чего был принят в группу. Секстет изменил свое название на Hybrid Theory и выпустил одноименный мини-альбом. Для того, чтобы избежать судебных разбирательств с другой группой, они меняют свое название на Lincoln Park. А чтобы приобрести своё доменное имя, группа меняет название Lincoln Park на Linkin Park.
Содержание
Музыка Hybrid Theory содержит различные вдохновения. Стиль пения Беннингтона находится под влиянием таких исполнителей, как Depeche Mode и Stone Temple Pilots, а риффы и техника игры гитариста Брэда Делсона вдохновлены Deftones, Guns N' Roses, U2 и The Smiths. Рэп Майка Шиноды, который присутствует на семи треках, очень близок к стилю The Roots. Лирическое содержание песен прежде всего затрагивает проблемы Беннингтона, возникающие во время его отрочества, включая жестокое обращение, постоянное и чрезмерное злоупотребление наркотиков и алкоголя, развод его родителей, изоляция, разочарования и чувства последствия неудачных отношений.
«Points of Authority», четвёртый трек в альбоме, имеет свой видеоклип, который можно найти на Frat Party at the Pankake Festival, первом DVD группы. Барабанщик Роб Бурдон описывает процесс записи песни: «Брэд написал этот рифф, затем пошел домой. Майк решил разрезать его на различные части и переставить их на компьютере. Брэду в свою очередь пришлось узнать свою собственную часть с компьютера». Что касается песни, Делсон похвалил мастерство Шиноды, охарактеризовав его как «гения» и «талантливого Трента Резнора».
- - Развернуть - -

-
-
-
-
-

Песни в альбоме
Linkin Park - Hybrid Theory (2000)

-
-
-
- - -
-
- 1 -
+
+ +
+ + +
+
+ 1 +
Linkin Park - Papercut diff --git a/src/music_kraken/pages/musify.py b/src/music_kraken/pages/musify.py index b6467f1..72cacdd 100644 --- a/src/music_kraken/pages/musify.py +++ b/src/music_kraken/pages/musify.py @@ -61,6 +61,7 @@ X-Requested-With: XMLHttpRequest class MusifyTypes(Enum): ARTIST = "artist" RELEASE = "release" + SONG = "track" @dataclass @@ -732,7 +733,75 @@ class Musify(Page): artist.main_album_collection.extend(discography) return artist - + + @classmethod + def parse_song_card(cls, song_card: BeautifulSoup) -> Song: + """ +
+
+ + +
+
+ 1 +
+
+
+ Linkin Park - Papercut + + + + +
+
+
+
+ + + 326,3K + +
+
+
+ 03:05 + 320 Кб/с +
+
+ + +
+
+ + + + +
+
+ """ + song_name = song_card.get("data-name") + artist_list: List[Artist] = [] + tracksort = None + + # get from parent div + _artist_name = song_card.get("data-artist") + if _artist_name is not None: + artist_list.append(Artist(name=_artist_name)) + + # get tracksort + tracksort_soup: BeautifulSoup = song_card.find("div", {"class": "playlist__position"}) + if tracksort_soup is not None: + raw_tracksort: str = tracksort_soup.get_text(strip=True) + if raw_tracksort.isdigit(): + tracksort = int(raw_tracksort) + + # playlist details + + return Song( + title=song_name, + tracksort=tracksort, + main_artist_list=artist_list + ) + @classmethod def fetch_album_from_source(cls, source: Source, flat: bool = False) -> Album: """ @@ -748,14 +817,23 @@ class Musify(Page): :param flat: :return: """ + album = Album() + url = cls.parse_url(source.url) endpoint = cls.HOST + "/release/" + url.name_with_id r = cls.get_request(endpoint) if r is None: - return Album() + return album soup = BeautifulSoup(r.content, "html.parser") + #
...
+ cards_soup: BeautifulSoup = soup.find("div", {"class": "card-body"}) + if cards_soup is not None: + card_soup: BeautifulSoup + for card_soup in cards_soup.find_all("div", {"class": "playlist__item"}): + album.song_collection.append(cls.parse_song_card(card_soup)) + album.update_tracksort() - return Album(title="works") + return album From 0f47cdadb8a98ef2a50b325ef790d2d6c8e26455 Mon Sep 17 00:00:00 2001 From: Hellow Date: Mon, 20 Mar 2023 23:11:55 +0100 Subject: [PATCH 39/42] some more scraping --- src/music_kraken/objects/song.py | 25 +++++++-------- src/music_kraken/pages/musify.py | 52 ++++++++++++++++++++++++++++++-- 2 files changed, 61 insertions(+), 16 deletions(-) diff --git a/src/music_kraken/objects/song.py b/src/music_kraken/objects/song.py index 82a2cf0..d323e69 100644 --- a/src/music_kraken/objects/song.py +++ b/src/music_kraken/objects/song.py @@ -51,7 +51,7 @@ class Song(MainObject): "title": None, "unified_title": None, "isrc": None, - "length": None, + "length": None, "tracksort": 0, "genre": None, "notes": FormattedText() @@ -101,12 +101,12 @@ class Song(MainObject): for album in self.album_collection: if album.song_collection.insecure_append(self): album.compile() - + artist: Artist for artist in self.feature_artist_collection: if artist.feature_song_collection.insecure_append(self): artist.compile() - + for artist in self.main_artist_collection: if artist.main_album_collection.insecure_extend(self.album_collection): artist.compile() @@ -261,18 +261,16 @@ class Album(MainObject): for song in self.song_collection: if song.album_collection.insecure_append(self): song.compile() - + artist: Artist for artist in self.artist_collection: if artist.main_album_collection.insecure_append(self): artist.compile() - + label: Label for label in self.label_collection: if label.album_collection.insecure_append(self): label.compile() - - @property def indexing_values(self) -> List[Tuple[str, object]]: @@ -369,15 +367,14 @@ class Album(MainObject): return len(self.artist_collection) > 1 - - """ All objects dependent on Artist """ class Artist(MainObject): - COLLECTION_ATTRIBUTES = ("feature_song_collection", "main_album_collection", "label_collection", "source_collection") + COLLECTION_ATTRIBUTES = ( + "feature_song_collection", "main_album_collection", "label_collection", "source_collection") SIMPLE_ATTRIBUTES = { "name": None, "unified_name": None, @@ -440,12 +437,12 @@ class Artist(MainObject): for song in self.feature_song_collection: if song.feature_artist_collection.insecure_append(self): song.compile() - + album: "Album" for album in self.main_album_collection: if album.artist_collection.insecure_append(self): album.compile() - + label: Label for label in self.label_collection: if label.current_artist_collection.insecure_append(self): @@ -584,7 +581,7 @@ class Label(MainObject): for album in self.album_collection: if album.label_collection.insecure_append(self): album.compile() - + artist: Artist for artist in self.current_artist_collection: if artist.label_collection.insecure_append(self): @@ -602,4 +599,4 @@ class Label(MainObject): def options(self) -> Options: options = [self] options.extend(self.current_artist_collection.shallow_list) - options.extend(self.album_collection.shallow_list) \ No newline at end of file + options.extend(self.album_collection.shallow_list) diff --git a/src/music_kraken/pages/musify.py b/src/music_kraken/pages/musify.py index 72cacdd..bdf6f4c 100644 --- a/src/music_kraken/pages/musify.py +++ b/src/music_kraken/pages/musify.py @@ -780,12 +780,18 @@ class Musify(Page): """ song_name = song_card.get("data-name") artist_list: List[Artist] = [] + source_list: List[Source] = [] tracksort = None + def parse_title(_title: str) -> str: + return _title + + """ # get from parent div _artist_name = song_card.get("data-artist") if _artist_name is not None: artist_list.append(Artist(name=_artist_name)) + """ # get tracksort tracksort_soup: BeautifulSoup = song_card.find("div", {"class": "playlist__position"}) @@ -795,6 +801,48 @@ class Musify(Page): tracksort = int(raw_tracksort) # playlist details + playlist_details: BeautifulSoup = song_card.find("div", {"class": "playlist__details"}) + if playlist_details is not None: + """ +
+ Tamas ft.Zombiez - Voodoo (Feat. Zombiez) + + + + + + + + +
+ """ + # track + anchor_list: List[BeautifulSoup] = playlist_details.find_all("a") + if len(anchor_list) > 1: + track_anchor: BeautifulSoup = anchor_list[-1] + href: str = track_anchor.get("href") + if href is not None: + source_list.append(Source(cls.SOURCE_TYPE, cls.HOST + href)) + song_name = parse_title(track_anchor.get_text(strip=True)) + + # artist + artist_span: BeautifulSoup + for artist_span in playlist_details.find_all("span", {"itemprop": "byArtist"}): + _artist_src = None + _artist_name = None + meta_artist_src = artist_span.find("meta", {"itemprop": "url"}) + if meta_artist_src is not None: + meta_artist_url = meta_artist_src.get("content") + if meta_artist_url is not None: + _artist_src = [Source(cls.SOURCE_TYPE, cls.HOST + meta_artist_url)] + + meta_artist_name = artist_span.find("meta", {"itemprop": "name"}) + if meta_artist_name is not None: + meta_artist_name_text = meta_artist_name.get("content") + _artist_name = meta_artist_name_text + + if _artist_name is not None or _artist_src is not None: + artist_list.append(Artist(name=_artist_name, source_list=_artist_src)) return Song( title=song_name, @@ -810,14 +858,14 @@ class Musify(Page): /html/musify/album_overview.html [] tracklist - [] attributes *(name and country... wooooow and I waste one request for this)* + [] attributes [] ratings :param source: :param flat: :return: """ - album = Album() + album = Album(title="Hi :)") url = cls.parse_url(source.url) From c648a330e0386eef4f8dabe289219f7eb6b7a6d7 Mon Sep 17 00:00:00 2001 From: Hellow2 Date: Tue, 21 Mar 2023 12:46:32 +0100 Subject: [PATCH 40/42] ahhh fuck me :( --- src/music_kraken/objects/__init__.py | 4 +- src/music_kraken/objects/cache.py | 110 +++++++++++++++++++++++++ src/music_kraken/objects/collection.py | 9 +- src/music_kraken/pages/abstract.py | 15 +++- src/music_kraken/pages/musify.py | 18 ++-- 5 files changed, 142 insertions(+), 14 deletions(-) create mode 100644 src/music_kraken/objects/cache.py diff --git a/src/music_kraken/objects/__init__.py b/src/music_kraken/objects/__init__.py index 03907fc..80e4064 100644 --- a/src/music_kraken/objects/__init__.py +++ b/src/music_kraken/objects/__init__.py @@ -5,7 +5,8 @@ from . import ( parents, formatted_text, album, - option + option, + collection ) MusicObject = parents.DatabaseObject @@ -31,3 +32,4 @@ Album = song.Album FormattedText = formatted_text.FormattedText Options = option.Options +Collection = collection.Collection diff --git a/src/music_kraken/objects/cache.py b/src/music_kraken/objects/cache.py new file mode 100644 index 0000000..181a13c --- /dev/null +++ b/src/music_kraken/objects/cache.py @@ -0,0 +1,110 @@ +from collections import defaultdict +from typing import Dict, List, Optional +import weakref + +from .parents import DatabaseObject + +""" +This is a cache for the objects, that et pulled out of the database. +This is necessary, to not have duplicate objects with the same id. + +Using a cache that maps the ojects to their id has multiple benefits: + - if you modify the object at any point, all objects with the same id get modified *(copy by reference)* + - less ram usage + - to further decrease ram usage I only store weak refs and not a strong reference, for the gc to still work +""" + + +class ObjectCache: + """ + ObjectCache is a cache for the objects retrieved from a database. + It maps each object to its id and uses weak references to manage its memory usage. + Using a cache for these objects provides several benefits: + + - Modifying an object updates all objects with the same id (due to copy by reference) + - Reduced memory usage + + :attr object_to_id: Dictionary that maps DatabaseObjects to their id. + :attr weakref_map: Dictionary that uses weak references to DatabaseObjects as keys and their id as values. + + :method exists: Check if a DatabaseObject already exists in the cache. + :method append: Add a DatabaseObject to the cache if it does not already exist. + :method extent: Add a list of DatabaseObjects to the cache. + :method remove: Remove a DatabaseObject from the cache by its id. + :method get: Retrieve a DatabaseObject from the cache by its id. """ + object_to_id: Dict[str, DatabaseObject] + weakref_map: Dict[weakref.ref, str] + + def __init__(self) -> None: + self.object_to_id = dict() + self.weakref_map = defaultdict() + + def exists(self, database_object: DatabaseObject) -> bool: + """ + Check if a DatabaseObject with the same id already exists in the cache. + + :param database_object: The DatabaseObject to check for. + :return: True if the DatabaseObject exists, False otherwise. + """ + if database_object.dynamic: + return True + return database_object.id in self.object_to_id + + def on_death(self, weakref_: weakref.ref) -> None: + """ + Callback function that gets triggered when the reference count of a DatabaseObject drops to 0. + This function removes the DatabaseObject from the cache. + + :param weakref_: The weak reference of the DatabaseObject that has been garbage collected. + """ + data_id = self.weakref_map.pop(weakref_) + self.object_to_id.pop(data_id) + + def get_weakref(self, database_object: DatabaseObject) -> weakref.ref: + return weakref.ref(database_object, self.on_death) + + + def append(self, database_object: DatabaseObject) -> bool: + """ + Add a DatabaseObject to the cache. + + :param database_object: The DatabaseObject to add to the cache. + :return: True if the DatabaseObject already exists in the cache, False otherwise. + """ + if self.exists(database_object): + return True + + self.weakref_map[weakref.ref(database_object, self.on_death)] = database_object.id + self.object_to_id[database_object.id] = database_object + + return False + + def extent(self, database_object_list: List[DatabaseObject]): + """ + adjacent to the extent method of list, this appends n Object + """ + for database_object in database_object_list: + self.append(database_object) + + def remove(self, _id: str): + """ + Remove a DatabaseObject from the cache. + + :param _id: The id of the DatabaseObject to remove from the cache. + """ + data = self.object_to_id.get(_id) + if data: + self.weakref_map.pop(weakref.ref(data)) + self.object_to_id.pop(_id) + + def __getitem__(self, item) -> Optional[DatabaseObject]: + """ + this returns the data obj + :param item: the id of the music object + :return: + """ + + return self.object_to_id.get(item) + + def get(self, _id: str) -> Optional[DatabaseObject]: + return self.__getitem__(_id) diff --git a/src/music_kraken/objects/collection.py b/src/music_kraken/objects/collection.py index d70617b..841f47e 100644 --- a/src/music_kraken/objects/collection.py +++ b/src/music_kraken/objects/collection.py @@ -50,7 +50,7 @@ class Collection: self._used_ids.add(element.id) - def append(self, element: DatabaseObject, merge_on_conflict: bool = True): + def append(self, element: DatabaseObject, merge_on_conflict: bool = True) -> DatabaseObject: """ :param element: :param merge_on_conflict: @@ -63,17 +63,20 @@ class Collection: for name, value in element.indexing_values: if value in self._attribute_to_object_map[name]: + existing_object = self._attribute_to_object_map[name][value] + if merge_on_conflict: # if the object does already exist # thus merging and don't add it afterwards - existing_object = self._attribute_to_object_map[name][value] existing_object.merge(element) # in case any relevant data has been added (e.g. it remaps the old object) self.map_element(existing_object) - return + return existing_object self._data.append(element) self.map_element(element) + + return element def extend(self, element_list: Iterable[DatabaseObject], merge_on_conflict: bool = True): for element in element_list: diff --git a/src/music_kraken/pages/abstract.py b/src/music_kraken/pages/abstract.py index 83fae1a..dfd0030 100644 --- a/src/music_kraken/pages/abstract.py +++ b/src/music_kraken/pages/abstract.py @@ -15,15 +15,24 @@ from ..objects import ( Target, MusicObject, Options, - SourcePages + SourcePages, + Collection ) +class PageCache(Collection): + def clear(self): + self.__init__(element_type=self.element_type) + + class Page: """ This is an abstract class, laying out the functionality for every other class fetching something """ + SONG_CACHE = PageCache(element_type=Song) + ALBUM_CACHE = PageCache(element_type=Album) + ARTIST_CACHE = PageCache(element_type=Artist) API_SESSION: requests.Session = requests.Session() API_SESSION.proxies = shared.proxies @@ -151,6 +160,10 @@ class Page: tracklist of every album of the artist. :return detailed_music_object: IT MODIFIES THE INPUT OBJ """ + + cls.ARTIST_CACHE.clear() + cls.ALBUM_CACHE.clear() + cls.SONG_CACHE.clear() if type(music_object) == Song: song = cls.fetch_song_details(music_object, flat=flat) diff --git a/src/music_kraken/pages/musify.py b/src/music_kraken/pages/musify.py index bdf6f4c..7604dfb 100644 --- a/src/music_kraken/pages/musify.py +++ b/src/music_kraken/pages/musify.py @@ -151,11 +151,11 @@ class Musify(Page): artist_thumbnail = image_soup.get("src") - return Artist( + return cls.ARTIST_CACHE.append(Artist( _id=_id, name=name, source_list=source_list - ) + )) @classmethod def parse_album_contact(cls, contact: BeautifulSoup) -> Album: @@ -257,13 +257,13 @@ class Musify(Page): else: LOGGER.warning("got an unequal ammount than 3 small elements") - return Album( + return cls.ALBUM_CACHE.append(Album( _id=_id, title=title, source_list=source_list, date=ID3Timestamp(year=year), artist_list=artist_list - ) + )) @classmethod def parse_contact_container(cls, contact_container_soup: BeautifulSoup) -> List[Union[Artist, Album]]: @@ -535,14 +535,14 @@ class Musify(Page): else: LOGGER.debug("there is not even 1 footer in the album card") - return Album( + return cls.ALBUM_CACHE.append(Album( _id=_id, title=name, source_list=source_list, date=timestamp, album_type=album_type, album_status=album_status - ) + )) @classmethod def get_discography(cls, url: MusifyUrl, artist_name: str = None, flat=False) -> List[Album]: @@ -700,13 +700,13 @@ class Musify(Page): if note_soup is not None: notes.html = note_soup.decode_contents() - return Artist( + return cls.ARTIST_CACHE.append(Artist( _id=url.musify_id, name=name, country=country, source_list=source_list, notes=notes - ) + )) @classmethod def fetch_artist_from_source(cls, source: Source, flat: bool = False) -> Artist: @@ -842,7 +842,7 @@ class Musify(Page): _artist_name = meta_artist_name_text if _artist_name is not None or _artist_src is not None: - artist_list.append(Artist(name=_artist_name, source_list=_artist_src)) + artist_list.append(cls.ARTIST_CACHE.append(Artist(name=_artist_name, source_list=_artist_src))) return Song( title=song_name, From 1f0ae30f02560d61fe67632a9af6a51baa8cd08a Mon Sep 17 00:00:00 2001 From: Hellow2 Date: Wed, 22 Mar 2023 12:58:11 +0100 Subject: [PATCH 41/42] fixed disgusting bug --- src/music_kraken/objects/collection.py | 54 ++++++++++++++------------ src/music_kraken/objects/song.py | 25 ++++++------ src/music_kraken/pages/abstract.py | 13 ------- src/music_kraken/pages/musify.py | 10 ++--- 4 files changed, 47 insertions(+), 55 deletions(-) diff --git a/src/music_kraken/objects/collection.py b/src/music_kraken/objects/collection.py index 841f47e..eb59142 100644 --- a/src/music_kraken/objects/collection.py +++ b/src/music_kraken/objects/collection.py @@ -49,12 +49,25 @@ class Collection: self._attribute_to_object_map[name][value] = element self._used_ids.add(element.id) + + def unmap_element(self, element: DatabaseObject): + for name, value in element.indexing_values: + if value is None: + continue + + if value in self._attribute_to_object_map[name]: + if element is self._attribute_to_object_map[name][value]: + try: + self._attribute_to_object_map[name].pop(value) + except KeyError: + pass - def append(self, element: DatabaseObject, merge_on_conflict: bool = True) -> DatabaseObject: + def append(self, element: DatabaseObject, merge_on_conflict: bool = True, merge_into_existing: bool = True) -> bool: """ :param element: :param merge_on_conflict: - :return: + :param merge_into_existing: + :return did_not_exist: """ # if the element type has been defined in the initializer it checks if the type matches @@ -68,15 +81,24 @@ class Collection: if merge_on_conflict: # if the object does already exist # thus merging and don't add it afterwards - existing_object.merge(element) - # in case any relevant data has been added (e.g. it remaps the old object) - self.map_element(existing_object) - return existing_object + if merge_into_existing: + existing_object.merge(element) + # in case any relevant data has been added (e.g. it remaps the old object) + self.map_element(existing_object) + else: + element.merge(existing_object) + + exists_at = self._data.index(existing_object) + self._data[exists_at] = element + + self.unmap_element(existing_object) + self.map_element(element) + return True self._data.append(element) self.map_element(element) - return element + return False def extend(self, element_list: Iterable[DatabaseObject], merge_on_conflict: bool = True): for element in element_list: @@ -104,21 +126,3 @@ class Collection: returns a shallow copy of the data list """ return self._data.copy() - - def insecure_append(self, element: DatabaseObject): - if element.id in self._used_ids: - return False - self._used_ids.add(element.id) - - self._data.append(element) - self.map_element(element) - return True - - def insecure_extend(self, element_list: Iterable[DatabaseObject]): - success = False - - for element in element_list: - if self.insecure_append(element): - success = True - - return success diff --git a/src/music_kraken/objects/song.py b/src/music_kraken/objects/song.py index d323e69..ff2df84 100644 --- a/src/music_kraken/objects/song.py +++ b/src/music_kraken/objects/song.py @@ -99,17 +99,18 @@ class Song(MainObject): def compile(self): album: Album for album in self.album_collection: - if album.song_collection.insecure_append(self): + if album.song_collection.append(self, merge_into_existing=False): album.compile() artist: Artist for artist in self.feature_artist_collection: - if artist.feature_song_collection.insecure_append(self): + if artist.feature_song_collection.append(self, merge_into_existing=False): artist.compile() for artist in self.main_artist_collection: - if artist.main_album_collection.insecure_extend(self.album_collection): - artist.compile() + for album in self.album_collection: + if artist.main_album_collection.append(album, merge_into_existing=False): + artist.compile() @property def indexing_values(self) -> List[Tuple[str, object]]: @@ -259,17 +260,17 @@ class Album(MainObject): def compile(self): song: Song for song in self.song_collection: - if song.album_collection.insecure_append(self): + if song.album_collection.append(self, merge_into_existing=False): song.compile() artist: Artist for artist in self.artist_collection: - if artist.main_album_collection.insecure_append(self): + if artist.main_album_collection.append(self, merge_into_existing=False): artist.compile() label: Label for label in self.label_collection: - if label.album_collection.insecure_append(self): + if label.album_collection.append(self, merge_into_existing=False): label.compile() @property @@ -435,17 +436,17 @@ class Artist(MainObject): def compile(self): song: "Song" for song in self.feature_song_collection: - if song.feature_artist_collection.insecure_append(self): + if song.feature_artist_collection.append(self, merge_into_existing=False): song.compile() album: "Album" for album in self.main_album_collection: - if album.artist_collection.insecure_append(self): + if album.artist_collection.append(self, merge_into_existing=False): album.compile() label: Label for label in self.label_collection: - if label.current_artist_collection.insecure_append(self): + if label.current_artist_collection.append(self, merge_into_existing=False): label.compile() @property @@ -579,12 +580,12 @@ class Label(MainObject): def compile(self) -> bool: album: Album for album in self.album_collection: - if album.label_collection.insecure_append(self): + if album.label_collection.append(self, merge_into_existing=False): album.compile() artist: Artist for artist in self.current_artist_collection: - if artist.label_collection.insecure_append(self): + if artist.label_collection.append(self, merge_into_existing=False): artist.compile() @property diff --git a/src/music_kraken/pages/abstract.py b/src/music_kraken/pages/abstract.py index dfd0030..24d67f4 100644 --- a/src/music_kraken/pages/abstract.py +++ b/src/music_kraken/pages/abstract.py @@ -20,20 +20,11 @@ from ..objects import ( ) -class PageCache(Collection): - def clear(self): - self.__init__(element_type=self.element_type) - - class Page: """ This is an abstract class, laying out the functionality for every other class fetching something """ - SONG_CACHE = PageCache(element_type=Song) - ALBUM_CACHE = PageCache(element_type=Album) - ARTIST_CACHE = PageCache(element_type=Artist) - API_SESSION: requests.Session = requests.Session() API_SESSION.proxies = shared.proxies TIMEOUT = 5 @@ -160,10 +151,6 @@ class Page: tracklist of every album of the artist. :return detailed_music_object: IT MODIFIES THE INPUT OBJ """ - - cls.ARTIST_CACHE.clear() - cls.ALBUM_CACHE.clear() - cls.SONG_CACHE.clear() if type(music_object) == Song: song = cls.fetch_song_details(music_object, flat=flat) diff --git a/src/music_kraken/pages/musify.py b/src/music_kraken/pages/musify.py index 7604dfb..e874ca7 100644 --- a/src/music_kraken/pages/musify.py +++ b/src/music_kraken/pages/musify.py @@ -151,11 +151,11 @@ class Musify(Page): artist_thumbnail = image_soup.get("src") - return cls.ARTIST_CACHE.append(Artist( + return Artist( _id=_id, name=name, source_list=source_list - )) + ) @classmethod def parse_album_contact(cls, contact: BeautifulSoup) -> Album: @@ -700,13 +700,13 @@ class Musify(Page): if note_soup is not None: notes.html = note_soup.decode_contents() - return cls.ARTIST_CACHE.append(Artist( + return Artist( _id=url.musify_id, name=name, country=country, source_list=source_list, notes=notes - )) + ) @classmethod def fetch_artist_from_source(cls, source: Source, flat: bool = False) -> Artist: @@ -842,7 +842,7 @@ class Musify(Page): _artist_name = meta_artist_name_text if _artist_name is not None or _artist_src is not None: - artist_list.append(cls.ARTIST_CACHE.append(Artist(name=_artist_name, source_list=_artist_src))) + artist_list.append(Artist(name=_artist_name, source_list=_artist_src)) return Song( title=song_name, From 5918147ff9913bf5912d8e3f9117293b034bb8c8 Mon Sep 17 00:00:00 2001 From: Hellow2 Date: Thu, 23 Mar 2023 09:00:39 +0100 Subject: [PATCH 42/42] Update collection.py --- src/music_kraken/objects/collection.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/music_kraken/objects/collection.py b/src/music_kraken/objects/collection.py index eb59142..5273d7f 100644 --- a/src/music_kraken/objects/collection.py +++ b/src/music_kraken/objects/collection.py @@ -93,12 +93,13 @@ class Collection: self.unmap_element(existing_object) self.map_element(element) - return True + + return False self._data.append(element) self.map_element(element) - return False + return True def extend(self, element_list: Iterable[DatabaseObject], merge_on_conflict: bool = True): for element in element_list: