feat: cleaned song title from youtube music

fix: properly stored encoding now
fix: encoding of cache
2024-04-26 14:29:56 +02:00 · 2024-04-26 14:24:14 +02:00 · 2024-04-26 14:04:44 +02:00 · 2024-04-26 13:50:17 +02:00
7 changed files with 56 additions and 27 deletions
--- a/development/actual_donwload.py
+++ b/development/actual_donwload.py
@ -6,8 +6,8 @@ logging.getLogger().setLevel(logging.DEBUG)

 if __name__ == "__main__":
    commands = [
-        "s: #a Ruffiction",
-        "d: 8",
+        "s: #a Crystal F",
+        "d: 20",
    ]

    
--- a/music_kraken/connection/cache.py
+++ b/music_kraken/connection/cache.py
@ -1,6 +1,6 @@
 import json
 from pathlib import Path
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from datetime import datetime, timedelta
 from typing import List, Optional
 from functools import lru_cache
@ -18,6 +18,8 @@ class CacheAttribute:
    created: datetime
    expires: datetime

+    additional_info: dict = field(default_factory=dict)
+
    @property
    def id(self):
        return f"{self.module}_{self.name}"
@ -32,6 +34,12 @@ class CacheAttribute:
        return self.__dict__ == other.__dict__


+@dataclass
+class CacheResult:
+    content: bytes
+    attribute: CacheAttribute
+
+
 class Cache:
    def __init__(self, module: str, logger: logging.Logger):
        self.module = module
@ -49,13 +57,16 @@ class Cache:

        self._time_fields = {"created", "expires"}
        with self.index.open("r") as i:
-            for c in json.loads(i.read()):
-                for key in self._time_fields:
-                    c[key] = datetime.fromisoformat(c[key])
+            try:
+                for c in json.loads(i.read()):
+                    for key in self._time_fields:
+                        c[key] = datetime.fromisoformat(c[key])

-                ca = CacheAttribute(**c)
-                self.cached_attributes.append(ca)
-                self._id_to_attribute[ca.id] = ca
+                    ca = CacheAttribute(**c)
+                    self.cached_attributes.append(ca)
+                    self._id_to_attribute[ca.id] = ca
+            except json.JSONDecodeError:
+                pass

    @lru_cache()
    def _init_module(self, module: str) -> Path:
@ -100,7 +111,7 @@ class Cache:

        return True

-    def set(self, content: bytes, name: str, expires_in: float = 10, module: str = ""):
+    def set(self, content: bytes, name: str, expires_in: float = 10, module: str = "", additional_info: dict = None):
        """
        :param content:
        :param module:
@ -111,6 +122,7 @@ class Cache:
        if name == "":
            return

+        additional_info = additional_info or {}
        module = self.module if module == "" else module

        module_path = self._init_module(module)
@ -120,6 +132,7 @@ class Cache:
            name=name,
            created=datetime.now(),
            expires=datetime.now() + timedelta(days=expires_in),
+            additional_info=additional_info,
        )
        self._write_attribute(cache_attribute)

@ -128,7 +141,7 @@ class Cache:
            self.logger.debug(f"writing cache to {cache_path}")
            content_file.write(content)

-    def get(self, name: str) -> Optional[bytes]:
+    def get(self, name: str) -> Optional[CacheResult]:
        path = fit_to_file_system(Path(self._dir, self.module, name), hidden_ok=True)

        if not path.is_file():
@ -140,7 +153,7 @@ class Cache:
            return

        with path.open("rb") as f:
-            return f.read()
+            return CacheResult(content=f.read(), attribute=existing_attribute)

    def clean(self):
        keep = set()
--- a/music_kraken/connection/connection.py
+++ b/music_kraken/connection/connection.py
@ -125,12 +125,17 @@ class Connection:

        return headers

-    def save(self, r: requests.Response, name: str, error: bool = False, **kwargs):
+    def save(self, r: requests.Response, name: str, error: bool = False, no_update_if_valid_exists: bool = False, **kwargs):
        n_kwargs = {}
        if error:
            n_kwargs["module"] = "failed_requests"

-        self.cache.set(r.content, name, expires_in=kwargs.get("expires_in", self.cache_expiring_duration), **n_kwargs)
+        if self.cache.get(name) is not None and no_update_if_valid_exists:
+            return
+
+        self.cache.set(r.content, name, expires_in=kwargs.get("expires_in", self.cache_expiring_duration), additional_info={
+            "encoding": r.encoding,
+        }, **n_kwargs)

    def request(
            self,
@ -145,6 +150,7 @@ class Connection:
            sleep_after_404: float = None,
            is_heartbeat: bool = False,
            disable_cache: bool = None,
+            enable_cache_readonly: bool = False,
            method: str = None,
            name: str = "",
            exclude_headers: List[str] = None,
@ -178,17 +184,23 @@ class Connection:

        request_url = parsed_url.geturl() if not raw_url else url

-        if name != "" and not disable_cache:
+        if name != "" and (not disable_cache or enable_cache_readonly):
            cached = self.cache.get(name)

            if cached is not None:
                request_trace(f"{trace_string}\t[cached]")

                with responses.RequestsMock() as resp:
+                    additional_info = cached.attribute.additional_info
+
+                    body = cached.content
+                    if "encoding" in additional_info:
+                        body = body.decode(additional_info["encoding"])
+
                    resp.add(
                        method=method,
                        url=request_url,
-                        body=cached,
+                        body=body,
                    )
                    return requests.request(method=method, url=url, timeout=timeout, headers=headers, **kwargs)

--- a/music_kraken/pages/abstract.py
+++ b/music_kraken/pages/abstract.py
@ -451,7 +451,7 @@ class Page:
        source = sources[0]

        if not found_on_disc:
-            r = self.download_song_to_target(source=source, target=temp_target, desc=song.title)
+            r = self.download_song_to_target(source=source, target=temp_target, desc=song.option_string)

        if not r.is_fatal_error:
            r.merge(self._post_process_targets(song, temp_target,
--- a/music_kraken/pages/musify.py
+++ b/music_kraken/pages/musify.py
@ -1128,4 +1128,4 @@ class Musify(Page):

            self.LOGGER.warning(f"The source has no audio link. Falling back to {endpoint}.")

-        return self.stream_connection.stream_into(endpoint, target, raw_url=True, exclude_headers=["Host"])
+        return self.stream_connection.stream_into(endpoint, target, raw_url=True, exclude_headers=["Host"], name=desc)
--- a/music_kraken/pages/youtube_music/_music_object_render.py
+++ b/music_kraken/pages/youtube_music/_music_object_render.py
@ -2,6 +2,7 @@ from typing import List, Optional
 from enum import Enum

 from ...utils.config import youtube_settings, logging_settings
+from ...utils.string_processing import clean_song_title
 from ...objects import Source, DatabaseObject
 from ..abstract import Page
 from ...objects import (
@ -59,7 +60,7 @@ def parse_run_element(run_element: dict) -> Optional[DatabaseObject]:
    
    if element_type == PageType.SONG or (element_type == PageType.VIDEO and not youtube_settings["youtube_music_clean_data"]) or (element_type == PageType.OFFICIAL_MUSIC_VIDEO and not youtube_settings["youtube_music_clean_data"]):
        source = Source(SOURCE_PAGE, f"https://music.youtube.com/watch?v={element_id}")
-        return Song(title=element_text, source_list=[source])
+        return Song(title=clean_song_title(element_text), source_list=[source])

    if element_type == PageType.ARTIST or (element_type == PageType.CHANNEL and not youtube_settings["youtube_music_clean_data"]):
        source = Source(SOURCE_PAGE, f"https://music.youtube.com/channel/{element_id}")
--- a/music_kraken/pages/youtube_music/youtube_music.py
+++ b/music_kraken/pages/youtube_music/youtube_music.py
@ -171,7 +171,7 @@ class YoutubeMusic(SuperYouTube):
    def __init__(self, *args, ydl_opts: dict = None, **kwargs):
        self.yt_music_connection: YoutubeMusicConnection = YoutubeMusicConnection(
            logger=self.LOGGER,
-            accept_language="en-US,en;q=0.5"
+            accept_language="en-US,en;q=0.5",
        )
        self.credentials: YouTubeMusicCredentials = YouTubeMusicCredentials(
            api_key=youtube_settings["youtube_music_api_key"],
@ -212,7 +212,7 @@ class YoutubeMusic(SuperYouTube):
        search for: "innertubeApiKey"
        """

-        r = self.yt_music_connection.get("https://music.youtube.com/")
+        r = self.yt_music_connection.get("https://music.youtube.com/", name="youtube_music_index.html", disable_cache=True, enable_cache_readonly=True)
        if r is None:
            return

@ -232,7 +232,7 @@ class YoutubeMusic(SuperYouTube):
                'set_ytc': 'true',
                'set_apyt': 'true',
                'set_eom': 'false'
-            })
+            }, disable_cache=True)
            if r is None:
                return

@ -247,9 +247,9 @@ class YoutubeMusic(SuperYouTube):
            # save cookies in settings
            youtube_settings["youtube_music_consent_cookies"] = cookie_dict
        else:
-            self.yt_music_connection.save(r, "index.html")
+            self.yt_music_connection.save(r, "youtube_music_index.html", no_update_if_valid_exists=True)

-        r = self.yt_music_connection.get("https://music.youtube.com/", name="index.html")
+        r = self.yt_music_connection.get("https://music.youtube.com/", name="youtube_music_index.html")
        if r is None:
            return

@ -374,7 +374,8 @@ class YoutubeMusic(SuperYouTube):
            },
            headers={
                "Referer": get_youtube_url(path=f"/search", query=f"q={urlescaped_query}")
-            }
+            },
+            name=f"search_{search_query}.json"
        )

        if r is None:
@ -411,7 +412,8 @@ class YoutubeMusic(SuperYouTube):
            json={
                "browseId": browse_id,
                "context": {**self.credentials.context, "adSignalsInfo": {"params": []}}
-            }
+            },
+            name=f"fetch_artist_{browse_id}.json"
        )
        if r is None:
            return artist
@ -454,7 +456,8 @@ class YoutubeMusic(SuperYouTube):
            json={
                "browseId": browse_id,
                "context": {**self.credentials.context, "adSignalsInfo": {"params": []}}
-            }
+            },
+            name=f"fetch_album_{browse_id}.json"
        )
        if r is None:
            return album
Author	SHA1	Message	Date
Lars Noack	ae921c3626	feat: cleaned song title from youtube music All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2024-04-26 14:29:56 +02:00
Lars Noack	f52b5e6325	fix: properly stored encoding now	2024-04-26 14:24:14 +02:00
Lars Noack	25eceb727b	fix: encoding of cache	2024-04-26 14:04:44 +02:00
Lars Noack	e77afa584b	feat: added caching to youtube	2024-04-26 13:50:17 +02:00