implemented a class for formatted text. It can return text in markdown, html and plantext, and convert between those.

This commit is contained in:
Hellow2 2023-02-03 16:33:22 +01:00
parent bc091807fb
commit 7d570038b3

View File

@ -1,44 +1,16 @@
# https://github.com/Alir3z4/html2text
import html2text
import pandoc import pandoc
""" """
>>> import html2text TODO
>>> implement in setup.py a skript to install pandocs
>>> h = html2text.HTML2Text() https://pandoc.org/installing.html
>>> # Ignore converting links from HTML
>>> h.ignore_links = True
>>> print h.handle("<p>Hello, <a href='https://www.google.com/earth/'>world</a>!")
Hello, world!
>>> print(h.handle("<p>Hello, <a href='https://www.google.com/earth/'>world</a>!")) !!!!!!!!!!!!!!!!!!IMPORTANT!!!!!!!!!!!!!!!!!!
Hello, world!
>>> # Don't Ignore links anymore, I like links
>>> h.ignore_links = False
>>> print(h.handle("<p>Hello, <a href='https://www.google.com/earth/'>world</a>!"))
Hello, [world](https://www.google.com/earth/)!
""" """
class FormattedText: class FormattedText:
_plaintext: str
_markdown: str
_html: str
doc = None doc = None
def __new__(cls, **kwargs):
self = object.__new__(cls)
self._plaintext = ""
self._markdown = ""
self._html = ""
return self
def __init__( def __init__(
self, self,
plaintext: str = None, plaintext: str = None,
@ -52,14 +24,12 @@ class FormattedText:
def set_plaintext(self, plaintext: str): def set_plaintext(self, plaintext: str):
if plaintext is None: if plaintext is None:
return return
self.doc = pandoc.read(plaintext)
self._plaintext = plaintext
def set_markdown(self, markdown: str): def set_markdown(self, markdown: str):
if markdown is None: if markdown is None:
return return
self.doc = pandoc.read(markdown, format="markdown")
self._markdown = markdown
def set_html(self, html: str): def set_html(self, html: str):
if html is None: if html is None:
@ -76,13 +46,38 @@ class FormattedText:
return None return None
return pandoc.write(self.doc, format="html").strip() return pandoc.write(self.doc, format="html").strip()
def get_plaintext(self)-> str:
if self.doc is None:
return None
return pandoc.write(self.doc, format="plain").strip()
class NotesAttributes: class NotesAttributes:
def __init__(self) -> None: def __init__(self) -> None:
pass pass
if __name__ == "__main__": if __name__ == "__main__":
markdown = """ _plaintext = """
World of Work
1. The right to help out society, and being paied for it
2. The right to get paied, so you can get along well.
3. The right for every individual to sell their products to provide for
themselfes or for others
4. The right of fair competitions, meaning eg. no monopoles.
5. The right for a home.
6. The right to good healthcare
7. The right of protections against tragedies, be it personal ones, or
global ones.
8. The right to be educated in a way that enables you to work.
3 most important ones
1. The right to get paied, so you can get along well.
2. The right for a home.
3. The right for a good healthcare.
"""
_markdown = """
# World of Work # World of Work
1. The right to help out society, and being paied for it 1. The right to help out society, and being paied for it
@ -100,7 +95,7 @@ if __name__ == "__main__":
2. The right for a home. 2. The right for a home.
3. The right for a good healthcare. 3. The right for a good healthcare.
""" """
html = """ _html = """
<b>Contact:</b> <a href="mailto:ghostbath@live.com">ghostbath@live.com</a><br /> <b>Contact:</b> <a href="mailto:ghostbath@live.com">ghostbath@live.com</a><br />
<br /> <br />
Although the band originally claimed that they were from Chongqing, China, it has been revealed in a 2015 interview with <b>Noisey</b> that they're an American band based in Minot, North Dakota.<br /> Although the band originally claimed that they were from Chongqing, China, it has been revealed in a 2015 interview with <b>Noisey</b> that they're an American band based in Minot, North Dakota.<br />
@ -111,7 +106,12 @@ According to the band, "Ghost Bath" refers to "the act of committing suicide by
- "Luminescence" on <i>Jericho Vol.36 - Nyctophobia</i> (2018) [] - "Luminescence" on <i>Jericho Vol.36 - Nyctophobia</i> (2018) []
""" """
notes = FormattedText(html=html) # notes = FormattedText(html=html)
print(notes.get_html()) # notes = FormattedText(markdown=_markdown)
print("-"*30) notes = FormattedText(plaintext=_plaintext)
# print(notes.get_html())
# print("-"*30)
# print(notes.get_markdown())
print(notes.get_markdown()) print(notes.get_markdown())