diff --git a/.gitignore b/.gitignore index d89697e..a772013 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ _*/ - +dist-newstyle/ +site .ds_store .vscode/ *.sage.py diff --git a/bib_style.csl b/bib_style.csl new file mode 100644 index 0000000..732f9cc --- /dev/null +++ b/bib_style.csl @@ -0,0 +1,130 @@ + + diff --git a/css/chao-theorems.css b/css/chao-theorems.css new file mode 100644 index 0000000..a53d009 --- /dev/null +++ b/css/chao-theorems.css @@ -0,0 +1,100 @@ +.theorem-environment { + font-style: italic; + margin-top: 1em; +} + +.theorem-header { + font-weight: bold; + font-style: normal; +} + +.theorem-header .index:before { + content: ' '; +} + +.theorem-header .name:before { + content: ' ('; +} + +.theorem-header .name:after { + content: ')'; +} + +.theorem-header:after { + content: '.\2002\2002'; +} + +.theorem-header+p { + display: inline; +} + +.Proof .type { + font-style: italic; + font-weight: normal; +} + +.Proof { + font-style: normal; + position: relative; +} + +.Proof:after { + content: '∎'; + position: absolute; + right: 0px; + bottom: 0px; +} + +.Proof span.theorem-header span.name { + font-weight: normal; + font-style: italic; +} + +.Proof span.theorem-header span.name:before { + content: ' '; +} + +.Proof span.theorem-header span.name:after { + content: ' '; +} + +table.postindex { + width: 100%; +} + +table.postindex cite { + font-style: normal; +} + +table.postindex td.right { + text-align: right; + width: 11ex; +} + +.header-section-number { + margin-right: 10px; +} + +.header-section-number:after { + content: '.'; +} + +.csl-entry { + display: table; + width: 100%; + table-layout: auto; +} + +.csl-left-margin { + display: table-cell; + padding-right: 0.5em; + white-space: nowrap; + width: 1px; +} + +.csl-right-inline { + display: table-cell; +} +.csl-right-inline a{ + word-break: break-all; +} \ No newline at end of file diff --git a/css/default.css b/css/default.css new file mode 100644 index 0000000..570c049 --- /dev/null +++ b/css/default.css @@ -0,0 +1,362 @@ +:root { + --color-text: black; + --color-tag1: gray; + --color-tag2: darkolivegreen; + --color-bg: white; + --color-notice: #fb4f4f; +} + +html { + scrollbar-gutter: stable; + scroll-behavior: smooth; + font-size: 110%; +} + +body { + font-family: 'Lato', -apple-system, BlinkMacSystemFont, 'PingFang SC', 'Microsoft YaHei', sans-serif; + font-optical-sizing: auto; + font-weight: 400; + font-style: normal; + font-size: 1rem; + line-height: 140%; + color: var(--color-text); + background-color: var(--color-bg); + text-rendering: optimizeLegibility; +} + +body a { + text-decoration: none; +} + +body a:hover { + text-decoration: underline; +} +details { + padding-left: 1em; + border: 2px solid var(--color-text); +} +summary:hover { + cursor: pointer; +} + +/*mathML*/ +.htmlmathparagraph, mtext,math { + font-family: Lete Sans Math; +} +#math-container { + display: block; + overflow-x: auto; + overflow-y: hidden; + padding: .5em; +} + +.text-space .langtag { + color: var(--color-tag1); +} + +.sc { + font-variant-caps: small-caps; +} + +a.url { + word-break: break-all; +} + +html body div.text-space main ul.post-list { + list-style-type: none; + padding-left: 1em; +} + +/* top bar */ +header { + font-weight: 400; + font-family: "IosevkaC", sans-serif; +} +nav a { + display: inline-block; + text-decoration: none; +} + +.uri { + word-wrap: break-word; + overflow-wrap: break-word; + word-break: break-all; + white-space: normal; +} + +footer { + color: var(--color-text); + font-size: 0.8rem; + margin-top: 2em; + text-align: right; + padding-right: 1em; +} + +.pagetitle { + font-size: 2rem; + font-weight: normal; + font-style: normal; + text-align: left; + line-height: 100%; +} + +h1 { + margin-top: 1em; + font-size: 1.44rem; + font-weight: bold; + font-style: normal; +} + +h2 { + margin-top: 1em; + font-size: 1.2rem; + font-weight: bold; + font-style: normal +} + +h3 { + margin-top: 1em; + font-size: 1rem; + font-weight: bold; + font-style: normal +} + +article .header { + font-size: 1rem; + font-style: normal; + color: var(--color-tag1); + text-align: left; +} + + +.info,.info a { + color: var(--color-tag2); + font-size: 1rem; + font-style: normal; + text-align: left; +} + +.info a:visited { + color: var(--color-tag2); +} + +section.body { + margin-top: 2rem; +} + +.ascii-art { + font-family: monospace; + line-height: normal; +} + +/* table. copied from https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/table */ +table { + border-collapse: collapse; + border: 2px solid rgb(140 140 140); + font-size: 0.8rem; + letter-spacing: 1px; +} + +caption { + caption-side: bottom; + padding: 10px; + font-weight: bold; +} + +thead, +tfoot { + background-color: rgb(228 240 245); +} + +th, +td { + border: 1px solid rgb(160 160 160); + padding: 8px 10px; +} + +td:last-of-type { + text-align: center; +} + +tbody > tr:nth-of-type(even) { + background-color: rgb(237 238 242); +} + +tfoot th { + text-align: right; +} + +tfoot td { + font-weight: bold; +} + + +figure { + display: flex; + flex-flow: column; + padding: 5px; + margin: auto; + max-width: 80%; +} + +.centerimg img { + margin: 0 auto 0 auto; + display: block; +} + + +div.highlight, +pre code { + margin: auto; + padding: 10px; + overflow: auto; + display: block; +} + +code { + font-family: "IosevkaC", monospace; + margin: 0 auto; + display: inline-block; + padding: 0px 2px; + border-radius: 2px; + font-variant-ligatures: none; + font-kerning: none; + text-rendering: optimizeSpeed; +} + + +.draft-notice { + color: var(--color-notice); + margin: 1em auto; + text-align: center +} + + +.subtitle { + text-align: left; + font-size: 1.2rem; + margin-top: 0 +} +.gallery { + margin-top: 2em; + display: grid; + grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); + gap: 12px; +} + +.gallery img { + width: 100%; + max-width: 320px; + display: block; +} + +/* phones -- no sidebar no sidenotes*/ +@media (max-width: 768px) { + body { + /* width: 90%; */ + margin: auto; + padding: 0 5%; + text-align: left; + max-width: 876px; + } + mjx-container[display="true"] + /*, .katex-display */ { + overflow-x: auto; + overflow-y: hidden; + } + + /* .katex-display>.katex>.katex-html>.tag { + display: inline-block; + position: relative; + padding-left: 10pt; + } */ +} + +.toc { + display: none; +} + +/* sidebar. no sidenotes */ +@media (min-width: 769px) { + body { + max-width: 1350px; + display: -webkit-flex; + -webkit-flex-flow: row wrap; + display: -ms-flexbox; + -ms-flex-flow: row wrap; + flex-flow: row wrap; + width: 95%; + padding-right: 5%; + margin: auto; + } + + .toc { + margin-top: 5rem; + margin-left: 0; + margin-right: 0; + width: 33%; + display: inline-block; + } + + div#contents ul, + div#contents-big ul { + margin-top: 0.5em; + margin-bottom: 0.5em; + padding-left: 1em; + line-height: 1.2; + list-style-type: decimal; + margin-left: 0 + } + + div#contents-big ul ul { + list-style-type: none; + } + + div#contents-big li+li { + margin-top: 0.5em + } + + div#contents-big { + font-size: 80%; + padding-top: 0; + padding-left: 1rem; + text-align: left; + max-width: 60%; + clear: both; + margin-right: 4em; + position: sticky; + top: 5rem; + left: 100% + } + + div#contents-big .mini-header { + font-weight: bold; + margin: 0; + font-variant: small-caps; + } + + .text-space { + display: inline-block; + width: 66%; + max-width: 800px; + } +} +/* sidebar+sidenotes */ +@media (min-width: 1200px) { + body { + width: 75%; + padding-right: 25%; + } +} + +@media print { + + .no-print, + .no-print * { + display: none !important; + } + + body { + margin: auto; + } +} \ No newline at end of file diff --git a/css/fonts.css b/css/fonts.css new file mode 100644 index 0000000..e16a175 --- /dev/null +++ b/css/fonts.css @@ -0,0 +1,52 @@ + +/* fonts */ + +@font-face { + font-family: "Lato"; + src: url("/fonts/Lato-Regular.woff2") format("woff2"); + font-weight: normal; + font-style: normal; +} +@font-face { + font-family: "Lato"; + src: url("/fonts/Lato-Bold.woff2") format("woff2"); + font-weight: bold; + font-style: normal; +} +@font-face { + font-family: "Lato"; + src: url("/fonts/Lato-Italic.woff2") format("woff2"); + font-weight: normal; + font-style: italic; +} +@font-face { + font-family: "Lato"; + src: url("/fonts/Lato-BoldItalic.woff2") format("woff2"); + font-weight: bold; + font-style: italic; +} +@font-face { + font-family: "Lete Sans Math"; + src: url("/fonts/LeteSansMath.woff2") format("woff2"); + font-weight: normal; + font-style: normal; +} +@font-face { + font-family: "Lete Sans Math"; + src: url("/fonts/LeteSansMath-Bold.woff2") format("woff2"); + font-weight: bold; + font-style: normal; +} + +@font-face { + font-family: "IosevkaC"; + src: url("/fonts/IosevkaCustom-Regular.woff2") format("woff2"); + font-weight: normal; + font-style: normal; +} +@font-face { + font-family: "IosevkaC"; + src: url("/fonts/IosevkaCustom-Bold.woff2") format("woff2"); + font-weight: bold; + font-style: normal; +} \ No newline at end of file diff --git a/css/pygentize.css b/css/pygentize.css new file mode 100644 index 0000000..acc1557 --- /dev/null +++ b/css/pygentize.css @@ -0,0 +1,35 @@ + +code.sourceCode +{ + background: inherit +} +pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; } +code span.al { color: #CB4B16; font-weight: bold; } /* Alert */ +code span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */ +code span.at { color: #7d9029; } /* Attribute */ +code span.bn { color: #D33682; } /* BaseN */ +code span.bu { } /* BuiltIn */ +code span.cf { color: #5F8700; font-weight: bold; } /* ControlFlow */ +code span.ch { color: #16801a; } /* Char */ +code span.cn { color: #880000; } /* Constant */ +code span.co { color: #93A1A1; font-style: italic; } /* Comment */ +code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */ +code span.do { color: #ba2121; font-style: italic; } /* Documentation */ +code span.dt { background-color: #f8edff; } /* DataType */ +code span.dv { color: #D33682; } /* DecVal */ +code span.er { color: #D30102; font-weight: bold; } /* Error */ +code span.ex { } /* Extension */ +code span.fl { color: #D33682; } /* Float */ +code span.fu { } /* Function */ +code span.im { color: #D70000} /* Import */ +code span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */ +code span.kw { font-weight: bold; } /* Keyword */ +code span.op { font-weight: bold; } /* Operator */ +code span.ot { font-weight: bold; } /* Other */ +code span.pp { color: #bc7a00; } /* Preprocessor */ +code span.sc { color: #4070a0; } /* SpecialChar */ +code span.ss { color: #bb6688; } /* SpecialString */ +code span.st { color: #16801a; } /* String */ +code span.va { color: #19177c; } /* Variable */ +code span.vs { color: #4070a0; } /* VerbatimString */ +code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */ \ No newline at end of file diff --git a/css/sidenotes.css b/css/sidenotes.css new file mode 100644 index 0000000..4468400 --- /dev/null +++ b/css/sidenotes.css @@ -0,0 +1,179 @@ +/* +This file is copied from +https://github.com/slotThe/slotThe.github.io/blob/main/css/sidenotes.css +with minor modifications made by Yu Cong. + +The original author is Tony Zorman. + +Extracted from: + + https://github.com/edwardtufte/tufte-css + + and modified to fit my website's theme. +*/ + +body { + counter-reset: sidenote-counter; +} + +.sidenote, +.marginnote, +.marginnote-left { + float: right; + clear: right; + margin-right: -45%; + width: 40%; + margin-top: 0.3rem; + margin-bottom: 0; + font-size: 0.8em; + line-height: 1.2; + vertical-align: baseline; + position: relative; + text-align: left; +} +@media (max-width: 1200px) { + .sidenote, + .marginnote, + .marginnote-left { + margin-right: -40%; + width: 33%; + } +} + +.marginnote-left { + float: left; + clear: left; + margin-left: -32%; + width: 25%; + position: relative; + text-align: right; +} +/* The first condition is for the case of a left-aligned layout (on a + smaller screen), and the second condition for a more centered layout + on a larger screen. It's a bit awkward, sadly :/ */ +@media (max-width: 1349px) or ((min-width: 1367px) and (max-width: 1620px)) { + .marginnote-left { + margin-left: -33%; + width: 30%; + } +} + +.sidenote code { + font-size: 0.94em; +} + +/* For some reason, although only `overflow-x` is set in `default.css`, + block code in side and marginnotes gets a vertical (!) scrollbar no + matter what; disable that. +*/ +div .marginnote pre, +div .sidenote pre { + overflow-y: hidden; +} + +.sidenote-number { + counter-increment: sidenote-counter; + color: var(--color-link); +} + +.sidenote-number:after, +.sidenote:before { + position: relative; + vertical-align: baseline; +} + +.sidenote-number:after { + content: counter(sidenote-counter); + font-size: 0.8rem; + top: -0.5rem; +} + +/* Properly position siednote number and adjust position of sidenote + paragraphs: + https://github.com/edwardtufte/tufte-css/issues/93#issuecomment-670695382 +*/ +.sidenote::before { + content: counter(sidenote-counter) " "; + font-size: 0.8rem; + top: -0.55rem; + position: absolute; + right: calc(100% + 0.5em); +} + +.sidenote p { + margin: 1em 0; +} + +.sidenote p:first-child { + margin-top: 0; +} + +.sidenote p:last-child { + margin-bottom: 0; +} + +/* */ + +input.margin-toggle { + display: none; +} + +label.sidenote-number { + display: inline-block; + max-height: 2rem; /* should be less than or equal to paragraph line-height */ +} + +label.margin-toggle:not(.sidenote-number) { + display: none; +} + +.iframe-wrapper { + position: relative; + padding-bottom: 56.25%; /* 16:9 */ + padding-top: 25px; + height: 0; +} + +.iframe-wrapper iframe { + position: absolute; + top: 0; + left: 0; + width: 100%; + height: 100%; +} + +@media (max-width: 1200px) { + label.margin-toggle:not(.sidenote-number) { + display: inline; + } + + .sidenote, + .marginnote, + .marginnote-left { + display: none; + } + + /* Linkify sidenotes iff they are clickable */ + .margin-toggle, + .sidenote-number:after { + color: var(--color-link); + text-decoration: none; + } + + .margin-toggle:checked + .sidenote, + .margin-toggle:checked + .marginnote, + .margin-toggle:checked + .marginnote-left { + display: block; + float: left; + left: 1rem; + clear: both; + width: 95%; + margin: 1rem 2.5%; + position: relative; + text-align: left; + } + + label { + cursor: pointer; + } +} diff --git a/favicon.ico b/favicon.ico new file mode 100644 index 0000000..778fc71 Binary files /dev/null and b/favicon.ico differ diff --git a/fonts/IosevkaCustom-Bold.woff2 b/fonts/IosevkaCustom-Bold.woff2 new file mode 100644 index 0000000..3018ffc Binary files /dev/null and b/fonts/IosevkaCustom-Bold.woff2 differ diff --git a/fonts/IosevkaCustom-Italic.woff2 b/fonts/IosevkaCustom-Italic.woff2 new file mode 100644 index 0000000..dfe97fe Binary files /dev/null and b/fonts/IosevkaCustom-Italic.woff2 differ diff --git a/fonts/IosevkaCustom-Regular.woff2 b/fonts/IosevkaCustom-Regular.woff2 new file mode 100644 index 0000000..9e420ce Binary files /dev/null and b/fonts/IosevkaCustom-Regular.woff2 differ diff --git a/fonts/Lato-Bold.woff2 b/fonts/Lato-Bold.woff2 new file mode 100644 index 0000000..1e55706 Binary files /dev/null and b/fonts/Lato-Bold.woff2 differ diff --git a/fonts/Lato-BoldItalic.woff2 b/fonts/Lato-BoldItalic.woff2 new file mode 100644 index 0000000..02856a8 Binary files /dev/null and b/fonts/Lato-BoldItalic.woff2 differ diff --git a/fonts/Lato-Italic.woff2 b/fonts/Lato-Italic.woff2 new file mode 100644 index 0000000..abf1203 Binary files /dev/null and b/fonts/Lato-Italic.woff2 differ diff --git a/fonts/Lato-Regular.woff2 b/fonts/Lato-Regular.woff2 new file mode 100644 index 0000000..44d6a45 Binary files /dev/null and b/fonts/Lato-Regular.woff2 differ diff --git a/fonts/LeteSansMath-Bold.otf b/fonts/LeteSansMath-Bold.otf new file mode 100644 index 0000000..e71bfd5 Binary files /dev/null and b/fonts/LeteSansMath-Bold.otf differ diff --git a/fonts/LeteSansMath-Bold.woff2 b/fonts/LeteSansMath-Bold.woff2 new file mode 100644 index 0000000..c640b3b Binary files /dev/null and b/fonts/LeteSansMath-Bold.woff2 differ diff --git a/fonts/LeteSansMath.otf b/fonts/LeteSansMath.otf new file mode 100644 index 0000000..7e4490c Binary files /dev/null and b/fonts/LeteSansMath.otf differ diff --git a/fonts/LeteSansMath.woff2 b/fonts/LeteSansMath.woff2 new file mode 100644 index 0000000..9e85a49 Binary files /dev/null and b/fonts/LeteSansMath.woff2 differ diff --git a/makefile b/makefile new file mode 100644 index 0000000..64a2c97 --- /dev/null +++ b/makefile @@ -0,0 +1,25 @@ + +COMMANDS := build watch rebuild clean +.PHONY: $(COMMANDS), publish + +# Set the default goal, so running 'make' without arguments will run 'make build'. +.DEFAULT_GOAL := build + +# --- +$(COMMANDS): site + @echo "Running command: ./site $@" + -@./site $@ + + +# --- Rules --- +# using relative symlinks should be fine since everything only works at ./ + + +site: src/site.hs src/ChaoDoc.hs + cabal build + ln -sf "$(shell cabal list-bin exe:site)" site + +# move from katex to mathjax +# katex_cli: +# cd katex_rust_fork && cargo build --release +# ln -sf ./katex_rust_fork/target/release/katex_cli katex_cli \ No newline at end of file diff --git a/math-macros.md b/math-macros.tex similarity index 100% rename from math-macros.md rename to math-macros.tex diff --git a/AGENT.md b/prompts/ask-question.md similarity index 100% rename from AGENT.md rename to prompts/ask-question.md diff --git a/paper-review.md b/prompts/paper-review.md similarity index 100% rename from paper-review.md rename to prompts/paper-review.md diff --git a/proof-review.md b/prompts/proof-review.md similarity index 100% rename from proof-review.md rename to prompts/proof-review.md diff --git a/src/ChaoDoc.hs b/src/ChaoDoc.hs new file mode 100644 index 0000000..c6ecf84 --- /dev/null +++ b/src/ChaoDoc.hs @@ -0,0 +1,278 @@ +{-# LANGUAGE BlockArguments #-} +{-# LANGUAGE OverloadedStrings #-} + +module ChaoDoc (chaoDocRead, chaoDocWrite, chaoDocPandocCompiler, chaoDocCompiler) where + +import Control.Monad.State +import Data.Either +import Data.Functor +import Data.List (intersect) +import qualified Data.Map as M +import Data.Maybe +import Data.Text (Text, pack) +import qualified Data.Text as T +import Hakyll +import Pangu (isCJK, pangu) +import SideNoteHTML (usingSideNotesHTML) +import System.IO.Unsafe +import Text.Pandoc +-- import Text.Pandoc.Builder +import Text.Pandoc.Walk (query, walk, walkM) + +-- setMeta key val (Pandoc (Meta ms) bs) = Pandoc (Meta $ M.insert key val ms) bs + +-- On mac, please do `export LANG=C` before using this thing +chaoDocRead :: ReaderOptions +chaoDocRead = + def + { readerExtensions = + enableExtension Ext_tex_math_double_backslash $ + enableExtension Ext_tex_math_single_backslash $ + enableExtension Ext_latex_macros $ + enableExtension Ext_raw_tex pandocExtensions + } + +chaoDocWrite :: WriterOptions +chaoDocWrite = + def + { writerHTMLMathMethod = MathML, + -- writerHtml5 = True, + -- writerHighlightStyle = Just syntaxHighlightingStyle, + writerNumberSections = True, + writerTableOfContents = True, + writerTOCDepth = 2 + } + +cslFile :: String +cslFile = "bib_style.csl" + +bibFile :: String +bibFile = "reference.bib" + +chaoDocPandocCompiler :: Compiler (Item Pandoc) +chaoDocPandocCompiler = do + macros <- T.pack <$> loadBody "math-macros.tex" + csl <- load $ fromFilePath cslFile + bib <- load $ fromFilePath bibFile + body <- getResourceBody + let bodyWithMacros = + fmap (T.unpack . prependMacros macros . T.pack) body + prepare = + addMeta "link-citations" (MetaBool True) + . addMeta "reference-section-title" (MetaInlines [Str "References"]) + . myFilter + readPandocWith chaoDocRead bodyWithMacros + >>= processPandocBiblio csl bib . fmap prepare + +chaoDocCompiler :: Compiler (Item String) +chaoDocCompiler = chaoDocPandocCompiler <&> writePandocWith chaoDocWrite + +addMeta :: T.Text -> MetaValue -> Pandoc -> Pandoc +addMeta name value (Pandoc meta a) = + let prevMap = unMeta meta + newMap = M.insert name value prevMap + newMeta = Meta newMap + in Pandoc newMeta a + +myFilter :: Pandoc -> Pandoc +myFilter = usingSideNotesHTML chaoDocWrite . theoremFilter . panguFilter . displayMathFilter + +pandocToInline :: Pandoc -> [Inline] +pandocToInline (Pandoc _ blocks) = go (reverse blocks) + where + go (Plain inlines : _) = inlines + go (Para inlines : _) = inlines + go (_ : xs) = go xs + go [] = [] + +incrementalBlock :: [Text] +incrementalBlock = + [ "Theorem", + "Conjecture", + "Definition", + "Example", + "Lemma", + "Problem", + "Proposition", + "Corollary", + "Observation", + "定理", + "猜想", + "定义", + "例", + "引理", + "问题", + "命题", + "推论", + "观察" + ] + +otherBlock :: [Text] +otherBlock = ["Proof", "Remark", "证明", "备注"] + +theoremClasses :: [Text] +theoremClasses = incrementalBlock ++ otherBlock + +-- create a filter for theorems +getClass :: Attr -> [Text] +getClass (_, c, _) = c + +addClass :: Attr -> Text -> Attr +addClass (a, b, c) d = (a, d : b, c) + +addAttr :: Attr -> Text -> Text -> Attr +addAttr (a, b, c) x y = (a, b, (x, y) : c) + +-- For each theorem, add a number, and also add add class theorem +preprocessTheorems :: Block -> State Int Block +preprocessTheorems (Div attr xs) + | isIncremental = do + curId <- get + put (curId + 1) + return $ Div (addAttr attr' "index" (pack $ show curId)) xs + | isOtherBlock = return $ Div attr' xs + | otherwise = return (Div attr xs) + where + isIncremental = getClass attr `intersect` incrementalBlock /= [] + isOtherBlock = getClass attr `intersect` otherBlock /= [] + theoremType = head (getClass attr `intersect` theoremClasses) + attr' = addAttr attr "type" theoremType +preprocessTheorems x = return x + +theoremFilter :: Pandoc -> Pandoc +theoremFilter doc = walk makeTheorem $ autorefFilter $ evalState (walkM preprocessTheorems doc) 1 + +-- [index, type, idx] +theoremIndex :: Block -> [(Text, (Text, Text))] +theoremIndex (Div attr _) + | isNothing t = [] + | isIncremental = [(idx, (fromJust t, fromJust index))] + | otherwise = [] + where + (idx, _, parm) = attr + t = lookup "type" parm + index = lookup "index" parm + isIncremental = fromJust t `elem` incrementalBlock +theoremIndex _ = [] + +autoref :: [(Text, (Text, Text))] -> Inline -> Inline +autoref x (Cite citations inlines) + | valid = Link nullAttr [Str linkTitle] ("#" <> citeid, linkTitle) + | otherwise = Cite citations inlines + where + citeid = citationId $ head citations + valid = citeid `elem` map fst x + (theoremType, num) = fromJust $ lookup citeid x + linkTitle = theoremType <> " " <> num +autoref _ y = y + +autorefFilter :: Pandoc -> Pandoc +autorefFilter x = walk (autoref links) x + where + links = query theoremIndex x + +-- processCitations works on AST. If you want to use citations in theorem name, +-- then you need to convert citations there to AST as well and then use processCitations\ +-- Thus one need to apply the theorem filter first. +-- autoref still does not work. +mathMacros :: Text +mathMacros = unsafePerformIO (pack <$> readFile "math-macros.tex") +{-# NOINLINE mathMacros #-} + +prependMacros :: Text -> Text -> Text +prependMacros macros body = macros <> "\n\n" <> body + +prependMathMacros :: Text -> Text +prependMathMacros = prependMacros mathMacros + +thmNamePandoc :: Text -> Pandoc +thmNamePandoc x = + fromRight (Pandoc nullMeta []) . runPure $ + readMarkdown chaoDocRead (prependMathMacros x) + +makeTheorem :: Block -> Block +makeTheorem (Div attr xs) + | isNothing t = Div attr xs + | otherwise = Div (addClass attr "theorem-environment") (Plain [header] : xs) + where + (_, _, parm) = attr + t = lookup "type" parm + name = lookup "title" parm + index = lookup "index" parm + header = Span (addClass nullAttr "theorem-header") [typetext, indextext, nametext] + typetext = Span (addClass nullAttr "type") [Str $ fromJust t] + indextext = + if isNothing index + then Str "" + else Span (addClass nullAttr "index") [Str $ fromJust index] + nametext = + if isNothing name + then Str "" + else Span (addClass nullAttr "name") (pandocToInline $ thmNamePandoc $ fromJust name) +makeTheorem x = x + +-- pangu filter +lastChar :: Inline -> Maybe Char +lastChar e = case e of + Str s -> if null (T.unpack s) then Nothing else Just (last (T.unpack s)) + Emph is -> lastCharList is + Strong is -> lastCharList is + Strikeout is -> lastCharList is + Link _ is _ -> lastCharList is + Span _ is -> lastCharList is + Quoted _ is -> lastCharList is + _ -> Nothing + where + lastCharList [] = Nothing + lastCharList is = lastChar (last is) + +firstChar :: Inline -> Maybe Char +firstChar e = case e of + Str s -> if null (T.unpack s) then Nothing else Just (head (T.unpack s)) + Emph is -> firstCharList is + Strong is -> firstCharList is + Strikeout is -> firstCharList is + Link _ is _ -> firstCharList is + Span _ is -> firstCharList is + Quoted _ is -> firstCharList is + _ -> Nothing + where + firstCharList [] = Nothing + firstCharList is = firstChar (head is) + +panguInline :: Inline -> Inline +panguInline e = case e of + Str s -> Str (pangu s) + Emph is -> Emph (panguInlines is) + Strong is -> Strong (panguInlines is) + Strikeout is -> Strikeout (panguInlines is) + Link at is tg -> Link at (panguInlines is) tg + Span at is -> Span at (panguInlines is) + Quoted qt is -> Quoted qt (panguInlines is) + _ -> e + +panguInlines :: [Inline] -> [Inline] +panguInlines = foldr (addSpace . panguInline) [] + where + addSpace x [] = [x] + addSpace x (y : ys) + | shouldSpace x y = x : Space : y : ys + | otherwise = x : y : ys + shouldSpace x y = case (lastChar x, firstChar y) of + (Just lc, Just fc) -> isCJK lc /= isCJK fc + _ -> False + +panguFilter :: Pandoc -> Pandoc +panguFilter = walk transformBlocks + where + transformBlocks :: Block -> Block + transformBlocks (Para inlines) = Para (panguInlines inlines) + transformBlocks x = x + +-- display math wrapper for MathML +displayMathFilter :: Pandoc -> Pandoc +displayMathFilter = walk wrapDisplayMath + where + wrapDisplayMath m@(Math DisplayMath _) = + Span ("math-container", [], []) [m] + wrapDisplayMath x = x diff --git a/src/Pangu.hs b/src/Pangu.hs new file mode 100644 index 0000000..38ee6eb --- /dev/null +++ b/src/Pangu.hs @@ -0,0 +1,227 @@ +{-# LANGUAGE OverloadedStrings #-} + +module Pangu (pangu, isCJK) where + +import Data.Function (fix) +import Data.Text (Text) +import qualified Data.Text as T +import Data.Void (Void) +import Replace.Megaparsec (streamEdit) +import Text.Megaparsec +import Text.Megaparsec.Char + +------------------------------------------------------------------------------- +type Parser = Parsec Void Text + +type Rule = Parser Text + +type RuleSet = [Rule] + +applyUntilFixed :: Rule -> Text -> Text +applyUntilFixed rule = + fix + ( \loop current -> + let next = streamEdit (try rule) id current + in if next == current then next else loop next + ) + +applyRulesRecursively :: RuleSet -> Text -> Text +applyRulesRecursively rules input = foldl (flip applyUntilFixed) input rules + +applyRules :: RuleSet -> Text -> Text +applyRules rules input = foldl (flip applyOnce) input rules + where + applyOnce rule = streamEdit (try rule) id + +------------------------------------------------------------------------------- +-- rules for pangu + +-- alphaNumChar from megaparsec matches CJK chars... +-- need to implement a new one +alphanumericChar :: Parser Char +alphanumericChar = satisfy $ \c -> + (c >= 'a' && c <= 'z') + || (c >= 'A' && c <= 'Z') + || (c >= '0' && c <= '9') + +-- | Check if a character falls within the CJK ranges provided +isCJK :: Char -> Bool +isCJK c = any (\(start, end) -> c >= start && c <= end) cjkRanges + where + cjkRanges = + [ ('\x2e80', '\x2eff'), + ('\x2f00', '\x2fdf'), + ('\x3040', '\x309f'), + ('\x30a0', '\x30fa'), + ('\x30fc', '\x30ff'), + ('\x3100', '\x312f'), + ('\x3200', '\x32ff'), + ('\x3400', '\x4dbf'), + ('\x4e00', '\x9fff'), + ('\xf900', '\xfaff') + ] + +convertToFullwidth :: Char -> Char +convertToFullwidth c = + case c of + ':' -> ':' + '.' -> '。' + '~' -> '~' + '!' -> '!' + '?' -> '?' + ',' -> ',' + ';' -> ';' + '\"' -> '”' + '\'' -> '’' + _ -> c + +-- A parser that matches a single CJK character +cjkChar :: Parser Char +cjkChar = satisfy isCJK + +-- use python.py as reference for these rules + +fullwidthCJKsymCJK :: Rule +fullwidthCJKsymCJK = do + lcjk <- cjkChar + _ <- many (char ' ') + sym <- try (some (char ':')) <|> count 1 (char '.') + _ <- many (char ' ') + rcjk <- cjkChar + let transformedsym = map convertToFullwidth sym + return $ T.pack $ [lcjk] ++ transformedsym ++ [rcjk] + +fullwidthCJKsym :: Rule +fullwidthCJKsym = do + cjk <- cjkChar + _ <- many (char ' ') + sym <- some $ oneOf ("~!?,;" :: [Char]) + _ <- many (char ' ') + let transformedsym = T.pack $ map convertToFullwidth sym + return $ T.pack [cjk] <> transformedsym + +dotsCJK :: Rule +dotsCJK = do + dots <- chunk "..." <|> chunk "…" + cjk <- cjkChar + return $ dots <> T.pack (" " ++ [cjk]) + +fixCJKcolAN :: Rule +fixCJKcolAN = do + cjk <- cjkChar + _ <- char ':' + an <- alphanumericChar + return $ T.pack $ [cjk] ++ ":" ++ [an] + +-- quotes +-- seems confusing ... +quotesym :: [Char] +quotesym = "'`\x05f4\"" + +cjkquote :: Rule +cjkquote = do + cjk <- cjkChar + quote <- oneOf quotesym + return $ T.pack $ [cjk] ++ " " ++ [quote] + +quoteCJK :: Rule +quoteCJK = do + quote <- oneOf quotesym + cjk <- cjkChar + return $ T.pack $ [quote] ++ " " ++ [cjk] + +fixQuote :: Rule +fixQuote = do + openQuotes <- T.pack <$> some (oneOf quotesym) + _ <- many spaceChar + content <- T.pack <$> someTill anySingle (lookAhead $ some (oneOf quotesym)) + closeQuotes <- T.pack <$> some (oneOf quotesym) + return $ openQuotes <> T.strip content <> closeQuotes + +cjkpossessivequote :: Rule +cjkpossessivequote = do + cjk <- cjkChar + _ <- char '\'' + _ <- lookAhead $ anySingleBut 's' + return $ T.pack $ cjk : " '" + +-- This singlequoteCJK rule will turn '你好' into ' 你好' +-- which seems not desirable... +-- however, the behavior is aligned with python version +singlequoteCJK :: Rule +singlequoteCJK = do + _ <- char '\'' + cjk <- cjkChar + return $ T.pack $ "' " ++ [cjk] + +fixPossessivequote :: Rule +fixPossessivequote = do + pre <- cjkChar <|> alphanumericChar + _ <- some spaceChar + _ <- chunk "'s" + return $ T.pack $ pre : "'s" + +-- hash +hashANSCJKhash :: Rule +hashANSCJKhash = do + cjk1 <- cjkChar + _ <- char '#' + mid <- some cjkChar + _ <- char '#' + cjk2 <- cjkChar + return $ T.pack $ [cjk1] ++ " #" ++ mid ++ "# " ++ [cjk2] + +cjkhash :: Rule +cjkhash = do + cjk <- cjkChar + _ <- char '#' + _ <- lookAhead $ anySingleBut ' ' + return $ T.pack $ cjk : " #" + +hashcjk :: Rule +hashcjk = do + _ <- char '#' + _ <- lookAhead $ anySingleBut ' ' + cjk <- cjkChar + return $ T.pack $ "# " ++ [cjk] + +-- operators +cjkOPTan :: Rule +cjkOPTan = do + cjk <- cjkChar + opt <- oneOf ("+-=*/&|<>%" :: [Char]) + an <- alphanumericChar + return $ T.pack [cjk, ' ', opt, ' ', an] + +anOPTcjk :: Rule +anOPTcjk = do + an <- alphanumericChar + opt <- oneOf ("+-=*/&|<>%" :: [Char]) + cjk <- cjkChar + return $ T.pack [an, ' ', opt, ' ', cjk] + +-- slash/bracket rules are not implemented + +-- CJK and alphanumeric without space + +cjkans :: Rule +cjkans = do + cjk <- cjkChar + _ <- lookAhead (alphanumericChar <|> oneOf ("@$%^&*-+\\=|/" :: [Char])) + return $ T.pack [cjk, ' '] + +anscjk :: Rule +anscjk = do + an <- alphanumericChar <|> oneOf ("~!$%^&*-+\\=|;:,./?" :: [Char]) + _ <- lookAhead cjkChar + return $ T.pack [an, ' '] + +-- rule set, the order matters +recursiveRules :: RuleSet +recursiveRules = [fullwidthCJKsymCJK, fullwidthCJKsym] + +onepassRules :: RuleSet +onepassRules = [anscjk, cjkans] + +pangu :: Text -> Text +pangu input = applyRules onepassRules $ applyRulesRecursively recursiveRules input \ No newline at end of file diff --git a/src/SideNoteHTML.hs b/src/SideNoteHTML.hs new file mode 100644 index 0000000..21e624b --- /dev/null +++ b/src/SideNoteHTML.hs @@ -0,0 +1,161 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE DerivingStrategies #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} +{- | + Module : Text.Pandoc.SideNoteHTML + Description : Convert pandoc footnotes to sidenotes + Copyright : (c) Tony Zorman 2023 + License : MIT + Maintainer : Tony Zorman + Stability : experimental + Portability : non-portable +-} +module SideNoteHTML (usingSideNotesHTML) where + +import Control.Monad (foldM) +import Control.Monad.State (State, get, modify', runState) +import Data.Text (Text) +import Text.Pandoc (runPure, writeHtml5String) +import Text.Pandoc.Definition (Block (..), Inline (..), Pandoc (..)) +import Text.Pandoc.Options (WriterOptions) +import Text.Pandoc.Shared (tshow) +import Text.Pandoc.Walk (walkM) +import qualified Data.Text as T + +-- type NoteType :: Type +data NoteType = Sidenote | Marginnote + deriving stock (Show, Eq) + +-- type SidenoteState :: Type +data SidenoteState = SNS + { _writer :: !WriterOptions + , counter :: !Int + } + +-- type Sidenote :: Type -> Type +type Sidenote = State SidenoteState + +-- | Like 'Text.Pandoc.SideNote.usingSideNotes', but immediately +-- pre-render the sidenotes. This has the advantage that sidenotes may +-- be wrapped in a @
@ (instead of a 'Span'), which allows arbitrary +-- blocks to be nested in them. The disadvantage is that one now has to +-- specify the 'WriterOptions' for the current document, meaning this is +-- meant to be used as a module and is unlikely to be useful as a +-- standalone application. +-- +-- ==== __Example__ +-- +-- Using this function with could +-- look something like the following, defining an equivalent to the +-- default @pandocCompiler@. +-- +-- > myPandocCompiler :: Compiler (Item String) +-- > myPandocCompiler = +-- > pandocCompilerWithTransformM +-- > defaultHakyllReaderOptions +-- > defaultHakyllWriterOptions +-- > (usingSideNotesHTML defaultHakyllWriterOptions) +-- +usingSideNotesHTML :: WriterOptions -> Pandoc -> Pandoc +usingSideNotesHTML writer (Pandoc meta blocks) = + -- Drop a superfluous paragraph at the start of the document. + Pandoc meta . someStart . walkBlocks (SNS writer 0) $ blocks + where + someStart :: [Block] -> [Block] + someStart = \case + (Para [Str ""] : bs) -> bs + bs -> bs + + walkBlocks :: SidenoteState -> [Block] -> [Block] + walkBlocks sns = \case + [] -> [] + (b : bs) -> b' <> walkBlocks s' bs + where (b', s') = walkM mkSidenote [b] `runState` sns + +-- Sidenotes can probably appear in more places; this should be +-- filled-in at some point. +mkSidenote :: [Block] -> Sidenote [Block] +mkSidenote = foldM (\acc b -> (acc <>) <$> single b) [] + where + -- Try to find and render a sidenote in a single block. + single :: Block -> Sidenote [Block] + single = \case + -- Simulate a paragraph by inserting a dummy block; this is needed + -- in case two consecutive paragraphs have sidenotes, or a paragraph + -- doesn't have one at all. + Para inlines -> (Para [Str ""] :) <$> renderSidenote [] inlines + Plain inlines -> renderSidenote [] inlines + OrderedList attrs bs -> (:[]) . OrderedList attrs <$> traverse mkSidenote bs + BulletList bs -> (:[]) . BulletList <$> traverse mkSidenote bs + block -> pure [block] + +renderSidenote :: [Inline] -> [Inline] -> Sidenote [Block] +renderSidenote !inlines = \case + [] -> pure [plain inlines] + Note bs : xs -> do block <- go bs + mappend [ -- Start gluing before, see [Note Comment]. + plain (RawInline "html" commentStart : inlines) + , block + ] + <$> renderSidenote + [RawInline "html" commentEnd] -- End gluing after + xs + b : xs -> renderSidenote (b : inlines) xs + where + go :: [Block] -> Sidenote Block + go blocks = do + SNS w i <- get <* modify' (\sns -> sns{ counter = 1 + counter sns }) + let (typ, noteText) = getNoteType (render w blocks) + pure . RawBlock "html" $ + mconcat [ commentEnd -- End gluing before + , label typ i <> input i <> note typ noteText + , commentStart -- Start gluing after + ] + + -- The '{-}' symbol differentiates between margin note and side note. + getNoteType :: Text -> (NoteType, Text) + getNoteType t + | "{-} " `T.isPrefixOf` t = (Marginnote, T.drop 4 t) + | otherwise = (Sidenote , t) + + render :: WriterOptions -> [Block] -> Text + render w bs = case runPure (writeHtml5String w (Pandoc mempty bs)) of + Left err -> error $ "Text.Pandoc.SideNoteHTML.writePandocWith: " ++ show err + Right txt -> T.drop 1 (T.dropWhile (/= '\n') txt) + + commentEnd :: T.Text + commentEnd = "-->" + + commentStart :: T.Text + commentStart = "