draft: string processing
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
This commit is contained in:
@@ -6,6 +6,7 @@ from functools import lru_cache
|
||||
from transliterate.exceptions import LanguageDetectionError
|
||||
from transliterate import translit
|
||||
from pathvalidate import sanitize_filename
|
||||
from urllib.parse import urlparse, ParseResult, parse_qs
|
||||
|
||||
|
||||
COMMON_TITLE_APPENDIX_LIST: Tuple[str, ...] = (
|
||||
@@ -21,6 +22,7 @@ def unify(string: str) -> str:
|
||||
returns a unified str, to make comparisons easy.
|
||||
a unified string has the following attributes:
|
||||
- is lowercase
|
||||
- is transliterated to Latin characters from e.g. Cyrillic
|
||||
"""
|
||||
|
||||
if string is None:
|
||||
@@ -132,8 +134,27 @@ def unify_punctuation(to_unify: str) -> str:
|
||||
to_unify = to_unify.replace(char, UNIFY_TO)
|
||||
return to_unify
|
||||
|
||||
def hash_url(url: str) -> int:
|
||||
return url.strip().lower().lstrip("https://").lstrip("http://")
|
||||
def hash_url(url: Union[str, ParseResult]) -> str:
|
||||
if isinstance(url, str):
|
||||
url = urlparse(url)
|
||||
|
||||
query = url.query
|
||||
query_dict: Optional[dict] = None
|
||||
try:
|
||||
query_dict: dict = parse_qs(url.query, strict_parsing=True)
|
||||
except ValueError:
|
||||
# the query couldn't be parsed
|
||||
pass
|
||||
|
||||
if isinstance(query_dict, dict):
|
||||
# sort keys alphabetically
|
||||
query = ""
|
||||
for key, value in sorted(query_dict.items(), key=lambda i: i[0]):
|
||||
query += f"_{key.strip()}_{''.join(i.strip() for i in value)}"
|
||||
|
||||
r = f"{url.netloc}_{url.path.replace('/', '_')}{query}"
|
||||
r = r.lower().strip()
|
||||
return r
|
||||
|
||||
|
||||
def remove_feature_part_from_track(title: str) -> str:
|
||||
|
||||
Reference in New Issue
Block a user