Skip to content

Commit

Permalink
minor ebook and html adjustments
Browse files Browse the repository at this point in the history
  • Loading branch information
entorb committed Mar 27, 2024
1 parent 405e4eb commit 68c6922
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 14 deletions.
40 changes: 36 additions & 4 deletions scripts/ebook/3.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""
Modify flattened .tex file.
"""

import datetime as dt
import os
import re
Expand Down Expand Up @@ -70,15 +71,46 @@
cont,
)

# OMakeIV sections
# not used in DE version

# \censor
cont = re.sub(r"\\censor\{.*?\}", r"xxxxxx", cont)


# # remove Deathly_Hallows_Sign.pdf and other pdf images
# # \includegraphics[scale=0.125]{images/Deathly_Hallows_Sign.pdf}
# cont = re.sub(
# # r"\\includegraphics.*?\{images/Deathly_Hallows_Sign.*?\}",
# r"\\includegraphics.*?\.pdf\}",
# "",
# cont,
# )

# remove all images
cont = re.sub(
r"\\censor\{.*?\}",
r"xxxxxx",
r"\\includegraphics\[.*?\]\{.*?\}",
"",
cont,
flags=re.DOTALL,
)

# for spellcheck doc version -> not working, make_ebook-sh runs forever...
# cont = re.sub(r"\\spell\{.*?\}+", "spell", cont)
# remove empty envs
cont = re.sub(
r"\\begin\{([^\}]*)\}\s*\\end\{\1}",
"",
cont,
flags=re.DOTALL,
)

# remove end stuff
cont = re.sub(
r"(.*)\\end\{chapterOpeningAuthorNote\}.*?\\end\{document\}",
r"\1\\end{chapterOpeningAuthorNote}\n\\end{document}",
cont,
flags=re.DOTALL,
count=1,
)

with open(target_file, mode="w", encoding="utf-8", newline="\n") as fhOut:
fhOut.write(cont)
34 changes: 24 additions & 10 deletions scripts/ebook/6.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""
HTML modifications.
"""

import os
import re
import sys
Expand All @@ -18,22 +19,13 @@
with open(source_file, encoding="utf-8", newline="\n") as fhIn:
cont = fhIn.read()

# done via pandoc paramter -V lang=de in 5.sh
# # set html lang to de
# cont = re.sub(
# r'(<html .*?) lang="")',
# r'\1 lang="de"',
# cont,
# flags=re.IGNORECASE,
# count=1,
# )

# remove strange leftovers from tex -> html conversion
cont = re.sub(
r"(</header>).*?(<p>Fanfiction von)",
r"\1\n\2",
cont,
flags=re.DOTALL | re.IGNORECASE,
count=1,
)

# remove duplication of author name
Expand All @@ -45,6 +37,28 @@
count=1,
)

# now done via pandoc -V lang=de in 5.sh
# # set language
# cont = re.sub(
# r'(<html [^>]*) lang="" xml:lang=""',
# r'\1 lang="de" xml:lang="de"',
# cont,
# count=1,
# )

# remove training slashes to satisfy https://validator.w3.org
cont = cont.replace("<br />", "<br>")
cont = cont.replace("<hr />", "<hr>")

cont = re.sub(
r"(<meta [^>]*) />",
r"\1>",
cont,
)

# remove bad span ids (containing spaces) from newspaper spans
cont = re.sub(r'<span id="[^"]+" label="[^"]+">', r"<span>", cont, count=5)

# doc structure (not needed any more, using calibi --level1-toc flag instead)
# sed -i 's/<h1 /<h1 class="part"/g' $target_file
# sed -i 's/<h2 /<h2 class="chapter"/g' $target_file
Expand Down

0 comments on commit 68c6922

Please sign in to comment.