238 lines
11 KiB
Fennel
238 lines
11 KiB
Fennel
"Script to generate /src/fennel-ls/docs/lua54.fnl and friends automatically"
|
||
|
||
(local fennel (require :fennel))
|
||
(local util (require :tools.util))
|
||
|
||
(local index-pattern "<h2>.%.1 – <a name=\".%.1\">Basic Functions.-\n")
|
||
|
||
(fn parse-html [html]
|
||
"splits the lua manual into the relevant sections"
|
||
(let [begin-index (assert (html:find index-pattern) "no basic functions?")
|
||
end-index (assert (html:find "<h1>. – <a name=\".\".-\n" begin-index))
|
||
stdlib (html:sub begin-index (- end-index 1))
|
||
last-update (html:match "Last update:\n(.-)\n")
|
||
fields []
|
||
modules []]
|
||
(fn loop [prev]
|
||
(let [header (stdlib:find "<hr><h3>.-\n" (+ prev 1))
|
||
section (stdlib:sub prev (if header (- header 1)))]
|
||
(let [index (section:find "<h2>")]
|
||
(if index
|
||
(do
|
||
(table.insert fields (section:sub 1 (- index 1)))
|
||
(table.insert modules (section:sub index)))
|
||
(table.insert fields section)))
|
||
(when header (loop header))))
|
||
|
||
|
||
(fn make-title [name]
|
||
(.. "<hr><h3><a name=\"pdf-" name "\"><code>" name "</code></a></h3>\n"))
|
||
;; Artificially insert the `arg` docs.
|
||
;; This isn't present in the "Basic Functions" part of the manual, but is
|
||
;; still useful to be included in the docs.
|
||
(let [arg-signature (make-title :arg)
|
||
desc (-> html
|
||
;; The goal here is to extract the smallest string which:
|
||
;; 1) contains the string "global table called <code>arg</code>"
|
||
;; 2) ends with "\n<p>" (not inclusive)
|
||
;; 3) begins with "\n<p>" (not inclusive)
|
||
|
||
;; This match fufills 1 and 2
|
||
(: :match "(.*global table called <code>arg</code>.-)\n<p>")
|
||
;; trust me, I tried everything.
|
||
;; The best way to do 3 is a non-greedy search in reverse
|
||
(: :reverse)
|
||
(: :match "(.-)>p<\n")
|
||
(: :reverse))]
|
||
(table.insert fields (.. arg-signature desc)))
|
||
(table.insert fields (.. (make-title "io.stdin") "stdin file"))
|
||
(table.insert fields (.. (make-title "io.stdout") "stdout file"))
|
||
(table.insert fields (.. (make-title "io.stderr") "stderr file"))
|
||
(loop (stdlib:find "<hr><h3>.-\n"))
|
||
(values modules fields last-update)))
|
||
|
||
(fn html-to-markdown [str]
|
||
(let [str (-> str
|
||
;; delete <p> tags
|
||
(: :gsub "</?p>" "")
|
||
;; <code> tags for the rest
|
||
(: :gsub "\"<code>(.-)</code>\"" "`\"%1\"`")
|
||
(: :gsub "\'<code>(.-)</code>\'" "`\"%1\"`")
|
||
(: :gsub "<code>(.-)</code>" "`%1`")
|
||
;; supremum to unicode
|
||
(: :gsub "<sup>x</sup>" "ˣ")
|
||
(: :gsub "<sup>e</sup>" "ᵉ")
|
||
(: :gsub "<sup>y</sup>" "ʸ")
|
||
(: :gsub "<sup>51</sup>" "⁵¹")
|
||
(: :gsub "<sup>32</sup>" "³²")
|
||
; ᵃᵇᶜᵈᵉᶠᵍʰⁱʲᵏˡᵐⁿᵒᵖ𐞥ʳˢᵗᵘᵛʷˣʸᶻ
|
||
;; bold to **
|
||
(: :gsub "<em>([^<]+)</em>" "*%1*")
|
||
(: :gsub "<b>([^<]+)</b>" "**%1**")
|
||
;; defeat all the links
|
||
(: :gsub "<a name=\"pdf%-[^\"]+\">([^<]+)</a>" "%1")
|
||
(: :gsub "<a href=\"#pdf%-[^\"]+\">([^<]+)</a>" "%1")
|
||
(: :gsub "<a href=\"#lua_[^\"]+\">([^<]+)</a>" "%1")
|
||
(: :gsub "See <a href=\"#[^\"]+\">[^<]+</a>[^%.]+%." "")
|
||
(: :gsub "[ \n]%(see <a href=\"#[^\"]+\">[^<]+</a>%)." "")
|
||
(: :gsub "[ \n]%(<a href=\"#[^\"]+\">[^<]+</a>%)." "")
|
||
;; code blocks
|
||
(: :gsub "<pre>\n?([^<]-)\n?</pre>" "```lua\n%1\n```")
|
||
;; list items to indented * thingies
|
||
(: :gsub "<li>([^<]+)</li>"
|
||
#(.. "* " (: ($:match "^\n*(.-)\n*$") :gsub "\n" "\n ")))
|
||
(: :gsub "</?ul>" ""))
|
||
;; check to ensure that all the tags have been defeated
|
||
tag (str:match "<[^>]+>[^>]+>")]
|
||
(when tag (error (.. "unhandled tag:" tag "\n" str)))
|
||
(-> str
|
||
;; trim whitespace
|
||
(: :match "^%s*(.-)%s*$")
|
||
;; html things
|
||
(: :gsub " " " ")
|
||
(: :gsub "–" "–")
|
||
(: :gsub "—" "—")
|
||
;; For some reason, they use an html middot, but we want to use periods.
|
||
(: :gsub "···" "...")
|
||
(: :gsub ">" ">")
|
||
(: :gsub "<" "<")
|
||
(: :gsub "&" "<")
|
||
(: :gsub "π" "π")
|
||
(: :gsub "\n\n+" "\n\n"))))
|
||
|
||
(fn parse-h3-section [html]
|
||
"parse a section that starts with an h3 tag. These are individual functions/variables."
|
||
(let [(header description) (html:match "^(.-)\n+(.-)\n*$")
|
||
optional-args []
|
||
signature (header:match "<code>(.-)</code>")
|
||
;; strip commas
|
||
signature (signature:gsub "," " ")
|
||
;; Replace `[]`'d args with ?-prefixes
|
||
;; Three times is enough, as `table.concat` and `load` and `loadfile`
|
||
;; and `utf8.codepoint` and `utf8.len` have 3 sets of []'s.
|
||
;; Lua 5.2 manual has a typo, so the last pass makes the `]` optional.
|
||
signature (signature:gsub "%[ -([^%[%] ]+)([^%[%]]-)%](%]-%))"
|
||
#(do (table.insert optional-args $1)
|
||
(.. :? $1 $2 $3)))
|
||
signature (signature:gsub "%[ -([^%[%] ]+)([^%[%]]-)%](%]-%))"
|
||
#(do (table.insert optional-args $1)
|
||
(.. :? $1 $2 $3)))
|
||
signature (signature:gsub "%[ -([^%[%] ]+)([^%[%]]-)%]?(%]-%))"
|
||
#(do (table.insert optional-args $1)
|
||
(.. :? $1 $2 $3)))
|
||
;; hide the thread argument in the debug functions
|
||
signature (if (signature:find "debug")
|
||
(signature:gsub "%[thread -%]" "")
|
||
signature)
|
||
;; hide the ?pos argument in table.insert
|
||
signature (if (signature:find "table%.insert")
|
||
(signature:gsub "%[pos -%]" "")
|
||
signature)
|
||
signature (if (signature:find "pcall .* arg1")
|
||
(do
|
||
(table.insert optional-args "arg1")
|
||
(signature:gsub "arg1" "?arg1"))
|
||
signature)
|
||
;; For some reason, they use an html middot, but we want to use periods.
|
||
signature (signature:gsub "···" "...")
|
||
;; fix parens
|
||
signature (signature:gsub "^(.-) -%(" "(%1 ")
|
||
;; fix spaces
|
||
signature (signature:gsub " +" " ")
|
||
signature (signature:gsub " +%)" ")")
|
||
signature-list (case (signature:match "^%((.-)%)$")
|
||
call (doto (icollect [arg (call:gmatch "[^ ]+")]
|
||
arg)
|
||
(table.remove 1)))
|
||
;; <code> tags for optional args
|
||
description (accumulate [desc description _ arg (ipairs optional-args)]
|
||
(desc:gsub (.. "<code>" arg "</code>") (.. "`?" arg "`")))
|
||
;; trim off the string pattern and string.pack/string.unpack format docs
|
||
description (description:gsub "\n[^\n]*<h3>.*" "")
|
||
description (html-to-markdown description)
|
||
name (signature:match "[^() ]+")
|
||
key (name:match "[^.:]+$")
|
||
?module (and (name:find "[.:]") (name:match "^[^.:]+"))
|
||
kind (if (signature:find "[()]") :Function :Variable)]
|
||
|
||
(values ?module
|
||
key
|
||
{:binding (signature:match "[^() ]+")
|
||
:metadata {:fnl/docstring description
|
||
:fnl/arglist signature-list
|
||
:fls/itemKind kind}})))
|
||
|
||
(fn parse-h2-section [html]
|
||
"parse a section that starts with an h2 tag. These are the main modules."
|
||
(let [(title description) (html:match "(.-)\n(.*)")
|
||
module-name (if (title:find "Coroutine")
|
||
"coroutine"
|
||
(title:find "Modules")
|
||
"package"
|
||
(title:find "String")
|
||
"string"
|
||
(title:find "UTF")
|
||
"utf8"
|
||
(title:find "Mathematical")
|
||
"math"
|
||
(title:find "Input and Output")
|
||
"io"
|
||
(title:find "Operating System")
|
||
"os"
|
||
(title:find "Debug")
|
||
"debug"
|
||
(title:find "Bitwise")
|
||
"bit32"
|
||
(title:find "Table")
|
||
"table")
|
||
description (html-to-markdown description)]
|
||
(assert module-name title)
|
||
(values module-name
|
||
{:binding module-name
|
||
:fields {}
|
||
:metadata {:fnl/docstring description
|
||
:fls/itemKind :Module}})))
|
||
|
||
(fn parse [input]
|
||
(let [version (input:match "Lua .- Reference Manual")
|
||
(modules module-items last-update) (parse-html input)
|
||
docs (collect [_ module (ipairs modules)]
|
||
(parse-h2-section module))
|
||
file-fields {}]
|
||
(each [_ section (ipairs module-items)]
|
||
(let [(mod k v) (parse-h3-section section)]
|
||
(if (not mod)
|
||
(tset docs k v)
|
||
(= mod "file")
|
||
(do
|
||
(table.insert v.metadata.fnl/arglist 1 "self")
|
||
(tset file-fields k v))
|
||
(let [module-doc (. docs mod)]
|
||
(assert module-doc (.. mod " not found"))
|
||
(tset (. module-doc :fields) k v)))))
|
||
{: docs : file-fields : last-update : version}))
|
||
|
||
(fn render [{: docs : file-fields : last-update : version}]
|
||
(.. ";; Lua " version " last updated " last-update "\n"
|
||
(fennel.view (fennel.list (fennel.sym :local) (fennel.sym :docs) docs)) "\n"
|
||
(fennel.view (fennel.list (fennel.sym :local) (fennel.sym :file-fields) file-fields)) "\n"
|
||
"(set docs._G.fields docs)\n"
|
||
"(set docs.io.fields.stdin.fields file-fields)\n"
|
||
"(set docs.io.fields.stdout.fields file-fields)\n"
|
||
"(set docs.io.fields.stderr.fields file-fields)\n"
|
||
"docs"))
|
||
|
||
(fn derive-docs-for [version]
|
||
(let [v (assert (version:match "^lua5(.)$") "Invalid Lua version.")
|
||
url (string.format "https://www.lua.org/manual/5.%d/manual.html" v)
|
||
doc-tbl (render (parse (with-open [file (util.curl-cached url)]
|
||
(file:read :*a))))]
|
||
(print ";; auto-generated by `make docs` from fennel-ls. Contents come from")
|
||
(print ";;" url)
|
||
(print doc-tbl)))
|
||
|
||
(case arg
|
||
[version] (derive-docs-for version)
|
||
_ (do (io.stderr:write "USAGE: get-docs.fnl $VERSION\n")
|
||
(os.exit 1)))
|