"Script to generate /src/fennel-ls/docs/lua54.fnl and friends automatically" (local fennel (require :fennel)) (local util (require :tools.util)) (local index-pattern "

.%.1 – Basic Functions.-\n") (fn parse-html [html] "splits the lua manual into the relevant sections" (let [begin-index (assert (html:find index-pattern) "no basic functions?") end-index (assert (html:find "

. –

.-\n" (+ prev 1)) section (stdlib:sub prev (if header (- header 1)))] (let [index (section:find "

")] (if index (do (table.insert fields (section:sub 1 (- index 1))) (table.insert modules (section:sub index))) (table.insert fields section))) (when header (loop header)))) (fn make-title [name] (.. "

" name "

\n")) ;; Artificially insert the `arg` docs. ;; This isn't present in the "Basic Functions" part of the manual, but is ;; still useful to be included in the docs. (let [arg-signature (make-title :arg) desc (-> html ;; The goal here is to extract the smallest string which: ;; 1) contains the string "global table called arg" ;; 2) ends with "\n

" (not inclusive) ;; 3) begins with "\n

" (not inclusive) ;; This match fufills 1 and 2 (: :match "(.*global table called arg.-)\n

") ;; trust me, I tried everything. ;; The best way to do 3 is a non-greedy search in reverse (: :reverse) (: :match "(.-)>p<\n") (: :reverse))] (table.insert fields (.. arg-signature desc))) (table.insert fields (.. (make-title "io.stdin") "stdin file")) (table.insert fields (.. (make-title "io.stdout") "stdout file")) (table.insert fields (.. (make-title "io.stderr") "stderr file")) (loop (stdlib:find "


.-\n")) (values modules fields last-update))) (fn html-to-markdown [str] (let [str (-> str ;; delete

tags (: :gsub "" "") ;; tags for the rest (: :gsub "\"(.-)\"" "`\"%1\"`") (: :gsub "\'(.-)\'" "`\"%1\"`") (: :gsub "(.-)" "`%1`") ;; supremum to unicode (: :gsub "x" "ˣ") (: :gsub "e" "ᵉ") (: :gsub "y" "ʸ") (: :gsub "51" "⁵¹") (: :gsub "32" "³²") ; ᵃᵇᶜᵈᵉᶠᵍʰⁱʲᵏˡᵐⁿᵒᵖ𐞥ʳˢᵗᵘᵛʷˣʸᶻ ;; bold to ** (: :gsub "([^<]+)" "*%1*") (: :gsub "([^<]+)" "**%1**") ;; defeat all the links (: :gsub "([^<]+)" "%1") (: :gsub "([^<]+)" "%1") (: :gsub "([^<]+)" "%1") (: :gsub "See [^<]+[^%.]+%." "") (: :gsub "[ \n]%(see [^<]+%)." "") (: :gsub "[ \n]%([^<]+%)." "") ;; code blocks (: :gsub "

\n?([^<]-)\n?
" "```lua\n%1\n```") ;; list items to indented * thingies (: :gsub "
  • ([^<]+)
  • " #(.. "* " (: ($:match "^\n*(.-)\n*$") :gsub "\n" "\n "))) (: :gsub "" "")) ;; check to ensure that all the tags have been defeated tag (str:match "<[^>]+>[^>]+>")] (when tag (error (.. "unhandled tag:" tag "\n" str))) (-> str ;; trim whitespace (: :match "^%s*(.-)%s*$") ;; html things (: :gsub " " " ") (: :gsub "–" "–") (: :gsub "—" "—") ;; For some reason, they use an html middot, but we want to use periods. (: :gsub "···" "...") (: :gsub ">" ">") (: :gsub "<" "<") (: :gsub "&" "<") (: :gsub "π" "π") (: :gsub "\n\n+" "\n\n")))) (fn parse-h3-section [html] "parse a section that starts with an h3 tag. These are individual functions/variables." (let [(header description) (html:match "^(.-)\n+(.-)\n*$") optional-args [] signature (header:match "(.-)") ;; strip commas signature (signature:gsub "," " ") ;; Replace `[]`'d args with ?-prefixes ;; Three times is enough, as `table.concat` and `load` and `loadfile` ;; and `utf8.codepoint` and `utf8.len` have 3 sets of []'s. ;; Lua 5.2 manual has a typo, so the last pass makes the `]` optional. signature (signature:gsub "%[ -([^%[%] ]+)([^%[%]]-)%](%]-%))" #(do (table.insert optional-args $1) (.. :? $1 $2 $3))) signature (signature:gsub "%[ -([^%[%] ]+)([^%[%]]-)%](%]-%))" #(do (table.insert optional-args $1) (.. :? $1 $2 $3))) signature (signature:gsub "%[ -([^%[%] ]+)([^%[%]]-)%]?(%]-%))" #(do (table.insert optional-args $1) (.. :? $1 $2 $3))) ;; hide the thread argument in the debug functions signature (if (signature:find "debug") (signature:gsub "%[thread -%]" "") signature) ;; hide the ?pos argument in table.insert signature (if (signature:find "table%.insert") (signature:gsub "%[pos -%]" "") signature) signature (if (signature:find "pcall .* arg1") (do (table.insert optional-args "arg1") (signature:gsub "arg1" "?arg1")) signature) ;; For some reason, they use an html middot, but we want to use periods. signature (signature:gsub "···" "...") ;; fix parens signature (signature:gsub "^(.-) -%(" "(%1 ") ;; fix spaces signature (signature:gsub " +" " ") signature (signature:gsub " +%)" ")") signature-list (case (signature:match "^%((.-)%)$") call (doto (icollect [arg (call:gmatch "[^ ]+")] arg) (table.remove 1))) ;; tags for optional args description (accumulate [desc description _ arg (ipairs optional-args)] (desc:gsub (.. "" arg "") (.. "`?" arg "`"))) ;; trim off the string pattern and string.pack/string.unpack format docs description (description:gsub "\n[^\n]*

    .*" "") description (html-to-markdown description) name (signature:match "[^() ]+") key (name:match "[^.:]+$") ?module (and (name:find "[.:]") (name:match "^[^.:]+")) kind (if (signature:find "[()]") :Function :Variable)] (values ?module key {:binding (signature:match "[^() ]+") :metadata {:fnl/docstring description :fnl/arglist signature-list :fls/itemKind kind}}))) (fn parse-h2-section [html] "parse a section that starts with an h2 tag. These are the main modules." (let [(title description) (html:match "(.-)\n(.*)") module-name (if (title:find "Coroutine") "coroutine" (title:find "Modules") "package" (title:find "String") "string" (title:find "UTF") "utf8" (title:find "Mathematical") "math" (title:find "Input and Output") "io" (title:find "Operating System") "os" (title:find "Debug") "debug" (title:find "Bitwise") "bit32" (title:find "Table") "table") description (html-to-markdown description)] (assert module-name title) (values module-name {:binding module-name :fields {} :metadata {:fnl/docstring description :fls/itemKind :Module}}))) (fn parse [input] (let [version (input:match "Lua .- Reference Manual") (modules module-items last-update) (parse-html input) docs (collect [_ module (ipairs modules)] (parse-h2-section module)) file-fields {}] (each [_ section (ipairs module-items)] (let [(mod k v) (parse-h3-section section)] (if (not mod) (tset docs k v) (= mod "file") (do (table.insert v.metadata.fnl/arglist 1 "self") (tset file-fields k v)) (let [module-doc (. docs mod)] (assert module-doc (.. mod " not found")) (tset (. module-doc :fields) k v))))) {: docs : file-fields : last-update : version})) (fn render [{: docs : file-fields : last-update : version}] (.. ";; Lua " version " last updated " last-update "\n" (fennel.view (fennel.list (fennel.sym :local) (fennel.sym :docs) docs)) "\n" (fennel.view (fennel.list (fennel.sym :local) (fennel.sym :file-fields) file-fields)) "\n" "(set docs._G.fields docs)\n" "(set docs.io.fields.stdin.fields file-fields)\n" "(set docs.io.fields.stdout.fields file-fields)\n" "(set docs.io.fields.stderr.fields file-fields)\n" "docs")) (fn derive-docs-for [version] (let [v (assert (version:match "^lua5(.)$") "Invalid Lua version.") url (string.format "https://www.lua.org/manual/5.%d/manual.html" v) doc-tbl (render (parse (with-open [file (util.curl-cached url)] (file:read :*a))))] (print ";; auto-generated by `make docs` from fennel-ls. Contents come from") (print ";;" url) (print doc-tbl))) (case arg [version] (derive-docs-for version) _ (do (io.stderr:write "USAGE: get-docs.fnl $VERSION\n") (os.exit 1)))