Provides the parsing facilities for Marginalia.
This file contains the complete Marginalia parser. It leverages the Clojure reader instead of implementing a complete Clojure parsing solution.
And seems to be in need of some more documentation and cleanup for readability.
(ns sidenotes.parser (:require [clojure.string :as string] [clojure.java.io :as io] [clojure.tools.namespace :as ctn] [cljs.tagged-literals :as ctl]))
Access to private or protected field. field-name is a symbol or keyword.
Extracted from clojure.contrib.reflect
(defn get-field [klass field-name obj] (-> klass (.getDeclaredField (name field-name)) (doto (.setAccessible true)) (.get obj)))
Calls a private or protected method.
params is a vector of classes which correspond to the arguments to the method e
obj is nil for static methods, the instance object otherwise.
The method-name is given a symbol or a keyword (something Named).
Extracted from clojure.contrib.reflect
(defn call-method [klass method-name params obj & args] (-> klass (.getDeclaredMethod (name method-name) (into-array Class params)) (doto (.setAccessible true)) (.invoke obj (into-array Object args))))
A simple record for holding comments.
(defrecord Comment [content])
Attach a method for printing to Comment records.
(defmethod print-method Comment [comment ^String out] (.write out (str \\" (.content comment) \\")))
Hold comments that are defined at top level.
(def top-level-comments (atom []))
Hold comments that are defined inside a form.
(def sub-level-comments (atom []))
(def ^{:dynamic true} *comments* nil) (def ^{:dynamic true} *lift-inline-comments* nil) (def ^{:dynamic true} *delete-lifted-comments* nil)
Remeber if adding comments is currently enabled or disabled.
(def comments-enabled? (atom true))
Marginalia can be given directives in comments. A directive is a comment line containing a directive name, in the form ;; @DirectiveName
. Directives change the behavior of the parser within the files that contain them.
The following directives are defined:
@MSidenotesDisable
suppresses subsequent comments from the docs@MSidenotesEnable
includes subsequent comments in the docs(def directives {"SidenotesDisable" (fn [] (swap! comments-enabled? false)) "SidenotesEnable" (fn [] (swap! comments-enabled? true))})
If the given line is a directive, applies it. Returns a value indicating whether the line should be included in the comments list.
(defn process-directive! [line] (let [directive (->> (re-find #"^;+\\s*@(\\w+)" line) (last) (get directives))] (when directive (directive)) (not directive)))
Read a comment from the reader.
(defn read-comment ([reader semicolon] (let [sb (StringBuilder.)] (.append sb semicolon) (loop [c (.read reader)] (let [ch (char c)] (if (or (= ch \\newline) (= ch \\return)) (let [line (dec (.getLineNumber reader)) text (.toString sb) include? (process-directive! text)] (when (and include? @comments-enabled?) (swap! *comments* conj {:form (Comment. text) :text [text] :start line :end line})) reader) (do (.append sb (Character/toString ch)) (recur (.read reader)))))))) ([reader semicolon opts pending] (read-comment reader semicolon)))
Set the given reader as handler for lines starting with ;
(defn set-comment-reader [reader] (aset (get-field clojure.lang.LispReader :macros nil) (int \\;) reader))
Skip forward until something besides a comment or whitespace shows up.
(defn skip-spaces-and-comments [rdr] (loop [c (.read rdr)] (cond (= c -1) nil (= (char c) \\;) (do (read-comment rdr \\;) (recur (.read rdr))) (#{\\space \\tab \\return \\newline \\,} (char c)) (recur (.read rdr)) :else (.unread rdr c))))
(defrecord DoubleColonKeyword [content])
(defmethod print-method DoubleColonKeyword [dck ^java.io.Writer out] (.write out (str \\: (.content dck))))
(defmethod print-dup DoubleColonKeyword [dck ^java.io.Writer out] (print-method dck out))
(defn ^:private read-token [reader c] (call-method clojure.lang.LispReader :readToken [java.io.PushbackReader Character/TYPE] nil reader c))
Clojure 1.9 changed the signature of LispReader/matchSymbol, taking a new parameter of type LispReader\$Resolver. Conveniently, we can test for the existence of the reader-resolver var to detect running under 1.9.
We must take care to use the correct overload for the project's runtime, else we will crash and fail people's builds.
(if-let [resolver-var (resolve '*reader-resolver*)] (defn ^:private match-symbol [s] (call-method clojure.lang.LispReader :matchSymbol [String, (Class/forName "clojure.lang.LispReader\$Resolver")] nil s (deref resolver-var))) (defn ^:private match-symbol [s] (call-method clojure.lang.LispReader :matchSymbol [String] nil s)))
Read a keyword from reader.
(defn read-keyword ([reader colon] (let [c (.read reader)] (if (= (int \\:) c) (-> (read-token reader (char c)) match-symbol DoubleColonKeyword.) (do (.unread reader c) (-> (read-token reader colon) match-symbol))))) ([reader colon opts pending] (read-keyword reader colon)))
Set the given reader as handler for keywords.
(defn set-keyword-reader [reader] (aset (get-field clojure.lang.LispReader :macros nil) (int \\:) reader))
Check if two sections are adjacent.
(defn adjacent? [f s] (= (-> f :end) (-> s :start dec)))
Convert a section to a string.
(defn- ->str [m] (-> (-> m :form .content) (string/replace #"^;+\\s(\\s*)" "\$1") (string/replace #"^;+" "")))
Merge two comment sections into one.
(defn merge-comments [f s] {:form (Comment. (str (->str f) "\\n" (->str s))) :text (into (:text f) (:text s)) :start (:start f) :end (:end s)})
Parse one form. Throw exception with start line number included when an error is encountered.
(defn parse-form [reader start] (binding [*comments* sub-level-comments] (try (. clojure.lang.LispReader (read reader {:read-cond :preserve :eof :_eof})) (catch Exception ex (let [msg (str "Problem parsing near line " start " <" (.readLine reader) ">" " original reported cause is " (.getCause ex) " -- " (.getMessage ex)) e (RuntimeException. msg)] (.setStackTrace e (.getStackTrace ex)) (throw e))))))
An empty comment that can be injected between other comments.
(def paragraph-comment {:form (Comment. ";;") :text [";;"]})
Merge adjacent comments together
We optionally lift inline comments to the top of the form. This monstrosity ensures that each consecutive group of inline comments is treated as a mergable block, but with a fake blank comment between non-adjacent inline comments. When merged and converted to markdown, this will produce a paragraph for each separate block of inline comments.
(defn merge-inline-comments [cs c] (if (re-find #"^;(\\s|\$)" (.content (:form c))) cs (if-let [t (peek cs)] (if (adjacent? t c) (conj cs c) (conj cs paragraph-comment c)) (conj cs c))))
Parse all inline comments and merge them if appropriate.
(defn parse-inline-comments [start] (when (and *lift-inline-comments* (seq @sub-level-comments)) (cond->> (reduce merge-inline-comments [] @sub-level-comments) (seq @top-level-comments) (into [paragraph-comment]) true (mapv #(assoc % :start start :end (dec start))))))
Parse all contents in reader.
(defn parse* [reader] (take-while #(not= :_eof (:form %)) (flatten (repeatedly (fn [] (binding [*comments* top-level-comments] (skip-spaces-and-comments reader)) (let [start (.getLineNumber reader) form (parse-form reader start) end (.getLineNumber reader) code {:form form :start start :end end} inline-comments (parse-inline-comments start) comments (concat @top-level-comments inline-comments)] (swap! top-level-comments (constantly [])) (swap! sub-level-comments (constantly [])) (if (empty? comments) [code] (vec (concat comments [code])))))))))
Remove the docstring from a form.
(defn strip-docstring [docstring raw] (-> raw (string/replace (str \\" (-> docstring str (string/replace "\\"" "\\\\\\"")) \\") "") (string/replace #"#?\\^\\{\\s*:doc\\s*\\}" "") (string/replace #"\\n\\s*\\n" "\\n") (string/replace #"\\n\\s*\\)" ")")))
(defn get-var-docstring [nspace-sym sym] (let [s (if nspace-sym (symbol (str nspace-sym) (str sym)) (symbol (str sym)))] (try (-> `(var ~s) eval meta :doc) ;; HACK: to handle types (catch Exception _))))
(defn- extract-common-docstring [form raw nspace-sym] (let [sym (second form)] (if (symbol? sym) (let [maybe-metadocstring (:doc (meta sym))] (let [nspace (find-ns sym) [maybe-ds remainder] (let [[_ _ ? & more?] form] [? more?]) docstring (if (and (string? maybe-ds) remainder) maybe-ds (if (= (first form) 'ns) (if (not maybe-metadocstring) (when (string? maybe-ds) maybe-ds) maybe-metadocstring) (if-let [ds maybe-metadocstring] ds (when nspace (-> nspace meta :doc) (get-var-docstring nspace-sym sym)))))] [(when docstring ;; Exclude flush left docstrings from adjustment: (if (re-find #"\\n[^\\s]" docstring) docstring (string/replace docstring #"\\n " "\\n"))) (strip-docstring docstring raw) (if (or (= 'ns (first form)) nspace) sym nspace-sym)])) [nil raw nspace-sym])))
(defn- extract-impl-docstring [fn-body] (filter string? (rest fn-body)))
(defn- extract-internal-docstrings [body] (mapcat #(extract-impl-docstring %) body))
(defmulti dispatch-form (fn [form _ _] (if (seq? form) (first form) form)))
(defmethod dispatch-form 'defprotocol [form raw nspace-sym] (let [[ds r s] (extract-common-docstring form raw nspace-sym)] (let [internal-dses (if ds (extract-internal-docstrings (nthnext form 3)) (extract-internal-docstrings (nthnext form 2)))] (with-meta [ds r s] {:internal-docstrings internal-dses}))))
(defmethod dispatch-form 'ns [form raw nspace-sym] (extract-common-docstring form raw nspace-sym))
(defmethod dispatch-form 'def [form raw nspace-sym] (extract-common-docstring form raw nspace-sym))
(defmethod dispatch-form 'defn [form raw nspace-sym] (extract-common-docstring form raw nspace-sym))
(defmethod dispatch-form 'defn- [form raw nspace-sym] (extract-common-docstring form raw nspace-sym))
(defmethod dispatch-form 'defmulti [form raw nspace-sym] (extract-common-docstring form raw nspace-sym))
(defmethod dispatch-form 'defmethod [form raw nspace-sym] [nil raw nspace-sym])
(defn dispatch-inner-form [form raw nspace-sym] (conj (reduce (fn [[adoc araw] inner-form] (if (seq? inner-form) (let [[d r] (dispatch-form inner-form araw nspace-sym)] [(str adoc d) r]) [adoc araw])) [nil raw] form) nspace-sym))
(defn- dispatch-literal [form raw nspace-sym] [nil raw])
(defn- literal-form? [form] (or (string? form) (number? form) (keyword? form) (symbol? form) (char? form) (true? form) (false? form) (instance? java.util.regex.Pattern form)))
(defmethod dispatch-form :default [form raw nspace-sym] (cond (literal-form? form) (dispatch-literal form raw nspace-sym) (and (first form) (.isInstance clojure.lang.Named (first form)) (re-find #"^def" (-> form first name))) (extract-common-docstring form raw nspace-sym) :else (dispatch-inner-form form raw nspace-sym)))
If the given form is a reader conditional return the inner form.
(defn unpack-reader-conditional [form] (if (instance? clojure.lang.ReaderConditional form) (.form form) form))
(defn extract-docstring [m raw nspace-sym] (let [raw (string/join "\\n" (subvec raw (-> m :start dec) (:end m))) form (unpack-reader-conditional (:form m))] (dispatch-form form raw nspace-sym)))
Check if o is a comment.
(defn comment? [o] (->> o :form (instance? Comment)))
Check if o is code.
(defn code? [o] (and (->> o :form (instance? Comment) not) (->> o :form nil? not)))
(defn arrange-in-sections [parsed-code raw-code] (loop [sections [] f (first parsed-code) s (second parsed-code) nn (nnext parsed-code) nspace nil] (if f (cond ;; ignore comments with only one semicolon (and (comment? f) (re-find #"^;(\\s|\$)" (-> f :form .content))) (recur sections s (first nn) (next nn) nspace) ;; merging comments block (and (comment? f) (comment? s) (adjacent? f s)) (recur sections (merge-comments f s) (first nn) (next nn) nspace) ;; merging adjacent code blocks (and (code? f) (code? s) (adjacent? f s)) (let [[fdoc fcode nspace] (extract-docstring f raw-code nspace) [sdoc scode _] (extract-docstring s raw-code nspace)] (recur sections (assoc s :type :code :raw (str (or (:raw f) fcode) "\\n" scode) :docstring (str (or (:docstring f) fdoc) "\\n\\n" sdoc)) (first nn) (next nn) nspace)) ;; adjacent comments are added as extra documentation to code block (and (comment? f) (code? s) (adjacent? f s)) (let [[doc code nspace] (extract-docstring s raw-code nspace)] (recur sections (assoc s :type :code :raw (if *delete-lifted-comments* ;; this is far from perfect but should work ;; for most cases: erase matching comments ;; and then remove lines that are blank (-> (reduce (fn [raw comment] (string/replace raw (str comment "\\n") "\\n")) code (:text f)) (string/replace #"\\n\\s+\\n" "\\n")) code) :docstring (str doc "\\n\\n" (->str f))) (first nn) (next nn) nspace)) ;; adding comment section (comment? f) (recur (conj sections (assoc f :type :comment :raw (->str f))) s (first nn) (next nn) nspace) ;; adding code section :else (let [[doc code nspace] (extract-docstring f raw-code nspace)] (recur (conj sections (if (= (:type f) :code) f {:type :code :raw code :docstring doc})) s (first nn) (next nn) nspace))) sections)))
Make a buffered string reader for given string.
(defn buffered-string-reader [source-string] (java.io.BufferedReader. (java.io.StringReader. (str source-string "\\n"))))
Read the string into vector of lines.
(defn read-lines [source-string] (vec (line-seq (buffered-string-reader source-string))))
Create a line numbering reader for given string.
(defn line-numbering-reader [source-string] (clojure.lang.LineNumberingPushbackReader. (buffered-string-reader source-string)))
Handle setting all readers and parse the given source.
(defn parse [source-string] (let [reader (line-numbering-reader source-string) old-cmt-rdr (aget (get-field clojure.lang.LispReader :macros nil) (int \\;))] (try (set-comment-reader read-comment) (set-keyword-reader read-keyword) (let [parsed-code (-> reader parse* doall)] (set-comment-reader old-cmt-rdr) (set-keyword-reader nil) parsed-code) (catch Exception e (set-comment-reader old-cmt-rdr) (set-keyword-reader nil) (throw e)))))
Parse the given source and prepare sections.
(defn parse-into-sections [source-string] (arrange-in-sections (parse source-string) (read-lines source-string)))
Check if a file ends with cljs
(defn cljs-file? [filepath] (.endsWith (string/lower-case filepath) "cljs"))
Check if a file ends with cljx
(defn cljx-file? [filepath] (.endsWith (string/lower-case filepath) "cljx"))
Check if a file ends with cljc
(defn cljc-file? [filepath] (.endsWith (string/lower-case filepath) "cljc"))
(def cljx-data-readers {'+clj identity '+cljc identity '+cljs identity})
Bind the data readers to ones that match the file name and execute the body.
(defmacro with-readers-for [file & body] `(let [readers# (merge {} (when (cljs-file? ~file) ctl/*cljs-data-readers*) (when (cljx-file? ~file) cljx-data-readers) (when (cljc-file? ~file) cljx-data-readers) default-data-readers)] (binding [*data-readers* readers#] ~@body)))
Parse the given file into a list of forms.
(defn parse-file [filename] (try (with-readers-for filename (parse-into-sections (slurp filename))) (catch Exception e [{:type :error :error (.getMessage e) :exception e}])))
Get the namespace from a file.
(defn parse-ns [filename] (let [file (io/file filename) filename (.getName file)] (with-readers-for filename (or (not-empty (-> file (ctn/read-file-ns-decl) (second) (str))) filename))))