Provides the parsing facilities for Marginalia.
This file contains the complete Marginalia parser. It leverages the Clojure reader instead of implementing a complete Clojure parsing solution.
And seems to be in need of some more documentation and cleanup for readability.
(ns sidenotes.parser
(:require
[clojure.string :as string]
[clojure.java.io :as io]
[clojure.tools.namespace :as ctn]
[cljs.tagged-literals :as ctl]))
Access to private or protected field. field-name is a symbol or keyword.
Extracted from clojure.contrib.reflect
(defn get-field
[klass field-name obj]
(-> klass (.getDeclaredField (name field-name))
(doto (.setAccessible true))
(.get obj)))
Calls a private or protected method.
params is a vector of classes which correspond to the arguments to the method e
obj is nil for static methods, the instance object otherwise.
The method-name is given a symbol or a keyword (something Named).
Extracted from clojure.contrib.reflect
(defn call-method
[klass method-name params obj & args]
(-> klass (.getDeclaredMethod (name method-name)
(into-array Class params))
(doto (.setAccessible true))
(.invoke obj (into-array Object args))))
A simple record for holding comments.
(defrecord Comment [content])
Attach a method for printing to Comment records.
(defmethod print-method Comment [comment ^String out] (.write out (str \\" (.content comment) \\")))
Hold comments that are defined at top level.
(def top-level-comments (atom []))
Hold comments that are defined inside a form.
(def sub-level-comments (atom []))
(def ^{:dynamic true} *comments* nil)
(def ^{:dynamic true} *lift-inline-comments* nil)
(def ^{:dynamic true} *delete-lifted-comments* nil)
Remeber if adding comments is currently enabled or disabled.
(def comments-enabled? (atom true))
Marginalia can be given directives in comments. A directive is a comment line containing a directive name, in the form ;; @DirectiveName. Directives change the behavior of the parser within the files that contain them.
The following directives are defined:
@MSidenotesDisable suppresses subsequent comments from the docs@MSidenotesEnable includes subsequent comments in the docs(def directives
{"SidenotesDisable" (fn [] (swap! comments-enabled? false))
"SidenotesEnable" (fn [] (swap! comments-enabled? true))})
If the given line is a directive, applies it. Returns a value indicating whether the line should be included in the comments list.
(defn process-directive!
[line]
(let [directive (->> (re-find #"^;+\\s*@(\\w+)" line)
(last)
(get directives))]
(when directive
(directive))
(not directive)))
Read a comment from the reader.
(defn read-comment
([reader semicolon]
(let [sb (StringBuilder.)]
(.append sb semicolon)
(loop [c (.read reader)]
(let [ch (char c)]
(if (or (= ch \\newline)
(= ch \\return))
(let [line (dec (.getLineNumber reader))
text (.toString sb)
include? (process-directive! text)]
(when (and include? @comments-enabled?)
(swap! *comments* conj {:form (Comment. text)
:text [text]
:start line
:end line}))
reader)
(do
(.append sb (Character/toString ch))
(recur (.read reader))))))))
([reader semicolon opts pending]
(read-comment reader semicolon)))
Set the given reader as handler for lines starting with ;
(defn set-comment-reader
[reader]
(aset (get-field clojure.lang.LispReader :macros nil)
(int \\;)
reader))
Skip forward until something besides a comment or whitespace shows up.
(defn skip-spaces-and-comments
[rdr]
(loop [c (.read rdr)]
(cond (= c -1) nil
(= (char c) \\;) (do (read-comment rdr \\;)
(recur (.read rdr)))
(#{\\space \\tab \\return \\newline \\,} (char c)) (recur (.read rdr))
:else (.unread rdr c))))
(defrecord DoubleColonKeyword [content])
(defmethod print-method DoubleColonKeyword [dck ^java.io.Writer out] (.write out (str \\: (.content dck))))
(defmethod print-dup DoubleColonKeyword [dck ^java.io.Writer out] (print-method dck out))
(defn ^:private read-token [reader c]
(call-method clojure.lang.LispReader :readToken
[java.io.PushbackReader Character/TYPE]
nil reader c))
Clojure 1.9 changed the signature of LispReader/matchSymbol, taking a new parameter of type LispReader\$Resolver. Conveniently, we can test for the existence of the reader-resolver var to detect running under 1.9.
We must take care to use the correct overload for the project's runtime, else we will crash and fail people's builds.
(if-let [resolver-var (resolve '*reader-resolver*)]
(defn ^:private match-symbol [s]
(call-method clojure.lang.LispReader :matchSymbol
[String, (Class/forName "clojure.lang.LispReader\$Resolver")]
nil s (deref resolver-var)))
(defn ^:private match-symbol [s]
(call-method clojure.lang.LispReader :matchSymbol
[String]
nil s)))
Read a keyword from reader.
(defn read-keyword
([reader colon]
(let [c (.read reader)]
(if (= (int \\:) c)
(-> (read-token reader (char c))
match-symbol
DoubleColonKeyword.)
(do (.unread reader c)
(-> (read-token reader colon)
match-symbol)))))
([reader colon opts pending]
(read-keyword reader colon)))
Set the given reader as handler for keywords.
(defn set-keyword-reader
[reader]
(aset (get-field clojure.lang.LispReader :macros nil)
(int \\:)
reader))
Check if two sections are adjacent.
(defn adjacent? [f s] (= (-> f :end) (-> s :start dec)))
Convert a section to a string.
(defn- ->str
[m]
(-> (-> m :form .content)
(string/replace #"^;+\\s(\\s*)" "\$1")
(string/replace #"^;+" "")))
Merge two comment sections into one.
(defn merge-comments
[f s]
{:form (Comment. (str (->str f) "\\n" (->str s)))
:text (into (:text f) (:text s))
:start (:start f)
:end (:end s)})
Parse one form. Throw exception with start line number included when an error is encountered.
(defn parse-form
[reader start]
(binding [*comments* sub-level-comments]
(try (. clojure.lang.LispReader
(read reader {:read-cond :preserve
:eof :_eof}))
(catch Exception ex
(let [msg (str "Problem parsing near line " start
" <" (.readLine reader) ">"
" original reported cause is "
(.getCause ex) " -- "
(.getMessage ex))
e (RuntimeException. msg)]
(.setStackTrace e (.getStackTrace ex))
(throw e))))))
An empty comment that can be injected between other comments.
(def paragraph-comment
{:form (Comment. ";;") :text [";;"]})
Merge adjacent comments together
We optionally lift inline comments to the top of the form. This monstrosity ensures that each consecutive group of inline comments is treated as a mergable block, but with a fake blank comment between non-adjacent inline comments. When merged and converted to markdown, this will produce a paragraph for each separate block of inline comments.
(defn merge-inline-comments
[cs c]
(if (re-find #"^;(\\s|\$)" (.content (:form c)))
cs
(if-let [t (peek cs)]
(if (adjacent? t c)
(conj cs c)
(conj cs paragraph-comment c))
(conj cs c))))
Parse all inline comments and merge them if appropriate.
(defn parse-inline-comments
[start]
(when (and *lift-inline-comments*
(seq @sub-level-comments))
(cond->> (reduce merge-inline-comments
[]
@sub-level-comments)
(seq @top-level-comments)
(into [paragraph-comment])
true
(mapv #(assoc % :start start :end (dec start))))))
Parse all contents in reader.
(defn parse*
[reader]
(take-while
#(not= :_eof (:form %))
(flatten
(repeatedly
(fn []
(binding [*comments* top-level-comments]
(skip-spaces-and-comments reader))
(let [start (.getLineNumber reader)
form (parse-form reader start)
end (.getLineNumber reader)
code {:form form :start start :end end}
inline-comments (parse-inline-comments start)
comments (concat @top-level-comments inline-comments)]
(swap! top-level-comments (constantly []))
(swap! sub-level-comments (constantly []))
(if (empty? comments)
[code]
(vec (concat comments [code])))))))))
Remove the docstring from a form.
(defn strip-docstring
[docstring raw]
(-> raw
(string/replace (str \\" (-> docstring
str
(string/replace "\\"" "\\\\\\""))
\\")
"")
(string/replace #"#?\\^\\{\\s*:doc\\s*\\}" "")
(string/replace #"\\n\\s*\\n" "\\n")
(string/replace #"\\n\\s*\\)" ")")))
(defn get-var-docstring
[nspace-sym sym]
(let [s (if nspace-sym
(symbol (str nspace-sym) (str sym))
(symbol (str sym)))]
(try
(-> `(var ~s) eval meta :doc)
;; HACK: to handle types
(catch Exception _))))
(defn- extract-common-docstring
[form raw nspace-sym]
(let [sym (second form)]
(if (symbol? sym)
(let [maybe-metadocstring (:doc (meta sym))]
(let [nspace (find-ns sym)
[maybe-ds remainder] (let [[_ _ ? & more?] form] [? more?])
docstring (if (and (string? maybe-ds) remainder)
maybe-ds
(if (= (first form) 'ns)
(if (not maybe-metadocstring)
(when (string? maybe-ds) maybe-ds)
maybe-metadocstring)
(if-let [ds maybe-metadocstring]
ds
(when nspace
(-> nspace meta :doc)
(get-var-docstring nspace-sym sym)))))]
[(when docstring
;; Exclude flush left docstrings from adjustment:
(if (re-find #"\\n[^\\s]" docstring)
docstring
(string/replace docstring #"\\n " "\\n")))
(strip-docstring docstring raw)
(if (or (= 'ns (first form)) nspace) sym nspace-sym)]))
[nil raw nspace-sym])))
(defn- extract-impl-docstring [fn-body] (filter string? (rest fn-body)))
(defn- extract-internal-docstrings
[body]
(mapcat #(extract-impl-docstring %)
body))
(defmulti dispatch-form
(fn [form _ _]
(if (seq? form) (first form) form)))
(defmethod dispatch-form 'defprotocol
[form raw nspace-sym]
(let [[ds r s] (extract-common-docstring form raw nspace-sym)]
(let [internal-dses (if ds
(extract-internal-docstrings (nthnext form 3))
(extract-internal-docstrings (nthnext form 2)))]
(with-meta
[ds r s]
{:internal-docstrings internal-dses}))))
(defmethod dispatch-form 'ns [form raw nspace-sym] (extract-common-docstring form raw nspace-sym))
(defmethod dispatch-form 'def [form raw nspace-sym] (extract-common-docstring form raw nspace-sym))
(defmethod dispatch-form 'defn [form raw nspace-sym] (extract-common-docstring form raw nspace-sym))
(defmethod dispatch-form 'defn- [form raw nspace-sym] (extract-common-docstring form raw nspace-sym))
(defmethod dispatch-form 'defmulti [form raw nspace-sym] (extract-common-docstring form raw nspace-sym))
(defmethod dispatch-form 'defmethod [form raw nspace-sym] [nil raw nspace-sym])
(defn dispatch-inner-form
[form raw nspace-sym]
(conj
(reduce (fn [[adoc araw] inner-form]
(if (seq? inner-form)
(let [[d r] (dispatch-form inner-form
araw
nspace-sym)]
[(str adoc d) r])
[adoc araw]))
[nil raw]
form)
nspace-sym))
(defn- dispatch-literal [form raw nspace-sym] [nil raw])
(defn- literal-form? [form]
(or (string? form) (number? form) (keyword? form) (symbol? form)
(char? form) (true? form) (false? form) (instance? java.util.regex.Pattern form)))
(defmethod dispatch-form :default
[form raw nspace-sym]
(cond (literal-form? form)
(dispatch-literal form raw nspace-sym)
(and (first form)
(.isInstance clojure.lang.Named (first form))
(re-find #"^def" (-> form first name)))
(extract-common-docstring form raw nspace-sym)
:else
(dispatch-inner-form form raw nspace-sym)))
If the given form is a reader conditional return the inner form.
(defn unpack-reader-conditional
[form]
(if (instance? clojure.lang.ReaderConditional form)
(.form form)
form))
(defn extract-docstring [m raw nspace-sym]
(let [raw (string/join "\\n" (subvec raw (-> m :start dec) (:end m)))
form (unpack-reader-conditional (:form m))]
(dispatch-form form raw nspace-sym)))
Check if o is a comment.
(defn comment? [o] (->> o :form (instance? Comment)))
Check if o is code.
(defn code?
[o]
(and (->> o :form (instance? Comment) not)
(->> o :form nil? not)))
(defn arrange-in-sections
[parsed-code raw-code]
(loop [sections []
f (first parsed-code)
s (second parsed-code)
nn (nnext parsed-code)
nspace nil]
(if f
(cond
;; ignore comments with only one semicolon
(and (comment? f) (re-find #"^;(\\s|\$)" (-> f :form .content)))
(recur sections s (first nn) (next nn) nspace)
;; merging comments block
(and (comment? f) (comment? s) (adjacent? f s))
(recur sections (merge-comments f s)
(first nn) (next nn)
nspace)
;; merging adjacent code blocks
(and (code? f) (code? s) (adjacent? f s))
(let [[fdoc fcode nspace] (extract-docstring f raw-code nspace)
[sdoc scode _] (extract-docstring s raw-code nspace)]
(recur sections (assoc s
:type :code
:raw (str (or (:raw f) fcode) "\\n" scode)
:docstring (str (or (:docstring f) fdoc) "\\n\\n" sdoc))
(first nn) (next nn) nspace))
;; adjacent comments are added as extra documentation to code block
(and (comment? f) (code? s) (adjacent? f s))
(let [[doc code nspace] (extract-docstring s raw-code nspace)]
(recur sections (assoc s
:type :code
:raw (if *delete-lifted-comments*
;; this is far from perfect but should work
;; for most cases: erase matching comments
;; and then remove lines that are blank
(-> (reduce (fn [raw comment]
(string/replace raw
(str comment "\\n")
"\\n"))
code
(:text f))
(string/replace #"\\n\\s+\\n" "\\n"))
code)
:docstring (str doc "\\n\\n" (->str f)))
(first nn) (next nn) nspace))
;; adding comment section
(comment? f)
(recur (conj sections (assoc f :type :comment :raw (->str f)))
s
(first nn) (next nn)
nspace)
;; adding code section
:else
(let [[doc code nspace] (extract-docstring f raw-code nspace)]
(recur (conj sections (if (= (:type f) :code)
f
{:type :code
:raw code
:docstring doc}))
s (first nn) (next nn) nspace)))
sections)))
Make a buffered string reader for given string.
(defn buffered-string-reader
[source-string]
(java.io.BufferedReader.
(java.io.StringReader.
(str source-string "\\n"))))
Read the string into vector of lines.
(defn read-lines [source-string] (vec (line-seq (buffered-string-reader source-string))))
Create a line numbering reader for given string.
(defn line-numbering-reader [source-string] (clojure.lang.LineNumberingPushbackReader. (buffered-string-reader source-string)))
Handle setting all readers and parse the given source.
(defn parse
[source-string]
(let [reader (line-numbering-reader source-string)
old-cmt-rdr (aget (get-field clojure.lang.LispReader :macros nil) (int \\;))]
(try
(set-comment-reader read-comment)
(set-keyword-reader read-keyword)
(let [parsed-code (-> reader parse* doall)]
(set-comment-reader old-cmt-rdr)
(set-keyword-reader nil)
parsed-code)
(catch Exception e
(set-comment-reader old-cmt-rdr)
(set-keyword-reader nil)
(throw e)))))
Parse the given source and prepare sections.
(defn parse-into-sections [source-string] (arrange-in-sections (parse source-string) (read-lines source-string)))
Check if a file ends with cljs
(defn cljs-file? [filepath] (.endsWith (string/lower-case filepath) "cljs"))
Check if a file ends with cljx
(defn cljx-file? [filepath] (.endsWith (string/lower-case filepath) "cljx"))
Check if a file ends with cljc
(defn cljc-file? [filepath] (.endsWith (string/lower-case filepath) "cljc"))
(def cljx-data-readers {'+clj identity
'+cljc identity
'+cljs identity})
Bind the data readers to ones that match the file name and execute the body.
(defmacro with-readers-for
[file & body]
`(let [readers# (merge {}
(when (cljs-file? ~file) ctl/*cljs-data-readers*)
(when (cljx-file? ~file) cljx-data-readers)
(when (cljc-file? ~file) cljx-data-readers)
default-data-readers)]
(binding [*data-readers* readers#]
~@body)))
Parse the given file into a list of forms.
(defn parse-file
[filename]
(try
(with-readers-for filename
(parse-into-sections (slurp filename)))
(catch Exception e [{:type :error
:error (.getMessage e)
:exception e}])))
Get the namespace from a file.
(defn parse-ns
[filename]
(let [file (io/file filename)
filename (.getName file)]
(with-readers-for filename
(or (not-empty (-> file
(ctn/read-file-ns-decl)
(second)
(str)))
filename))))