This commit is contained in:
@@ -129,15 +129,27 @@ UNIFY_TO = " "
|
||||
ALLOWED_LENGTH_DISTANCE = 20
|
||||
|
||||
|
||||
def unify_punctuation(to_unify: str) -> str:
|
||||
def unify_punctuation(to_unify: str, unify_to: str = UNIFY_TO) -> str:
|
||||
for char in string.punctuation:
|
||||
to_unify = to_unify.replace(char, UNIFY_TO)
|
||||
to_unify = to_unify.replace(char, unify_to)
|
||||
return to_unify
|
||||
|
||||
def hash_url(url: Union[str, ParseResult]) -> str:
|
||||
if isinstance(url, str):
|
||||
url = urlparse(url)
|
||||
|
||||
unify_to = "-"
|
||||
|
||||
def unify_part(part: str) -> str:
|
||||
nonlocal unify_to
|
||||
return unify_punctuation(part.lower(), unify_to=unify_to).strip(unify_to)
|
||||
|
||||
# netloc
|
||||
netloc = unify_part(url.netloc)
|
||||
if netloc.startswith("www" + unify_to):
|
||||
netloc = netloc[3 + len(unify_to):]
|
||||
|
||||
# query
|
||||
query = url.query
|
||||
query_dict: Optional[dict] = None
|
||||
try:
|
||||
@@ -150,9 +162,9 @@ def hash_url(url: Union[str, ParseResult]) -> str:
|
||||
# sort keys alphabetically
|
||||
query = ""
|
||||
for key, value in sorted(query_dict.items(), key=lambda i: i[0]):
|
||||
query += f"_{key.strip()}_{''.join(i.strip() for i in value)}"
|
||||
query += f"{key.strip()}-{''.join(i.strip() for i in value)}"
|
||||
|
||||
r = f"{url.netloc}_{url.path.replace('/', '_')}{query}"
|
||||
r = f"{netloc}_{unify_part(url.path)}_{unify_part(query)}"
|
||||
r = r.lower().strip()
|
||||
return r
|
||||
|
||||
|
Reference in New Issue
Block a user