« Home

Source. common/highlight.rkt [source-BMV5]

(require ffi/unsafe
         racket/file
         racket/list
         racket/promise
         racket/string
         scribble/html/html
         scribble/html/xml
         (only-in scribble/text disable-prefix))

(provide codeblock
         highlight-code
         code-grammars-dir
         tree-sitter-lib-path
         highlight-available?)

(define code-grammars-dir (make-parameter "ts-grammars"))

(define tree-sitter-lib-path
  (make-parameter
   (or (getenv "TREE_SITTER_LIB")
       ; for MacOS homebrew
       "/opt/homebrew/opt/tree-sitter/lib/libtree-sitter")))

(define ts-lib
  (delay/sync (ffi-lib (tree-sitter-lib-path) '("0" #f))))

(define-syntax-rule (ts name type)
  (get-ffi-obj 'name (force ts-lib) type))

(define-cstruct _TSNode
  ([context (_array _uint32 4)]
   [id      _pointer]
   [tree    _pointer]))

(define-cstruct _TSQueryCapture
  ([node  _TSNode]
   [index _uint32]))

(define-cstruct _TSQueryMatch
  ([id            _uint32]
   [pattern-index _uint16]
   [capture-count _uint16]
   [captures      _pointer]))   ; const TSQueryCapture*

(define (ts-parser-new)            ((ts ts_parser_new (_fun -> _pointer))))
(define (ts-parser-delete p)       ((ts ts_parser_delete (_fun _pointer -> _void)) p))
(define (ts-parser-set-language p l)
  ((ts ts_parser_set_language (_fun _pointer _pointer -> _bool)) p l))
(define (ts-parser-parse-string p bs len)
  ((ts ts_parser_parse_string (_fun _pointer _pointer _bytes _uint32 -> _pointer))
   p #f bs len))
(define (ts-tree-delete t)         ((ts ts_tree_delete (_fun _pointer -> _void)) t))
(define (ts-tree-root-node t)      ((ts ts_tree_root_node (_fun _pointer -> _TSNode)) t))
(define (ts-node-start-byte n)     ((ts ts_node_start_byte (_fun _TSNode -> _uint32)) n))
(define (ts-node-end-byte n)       ((ts ts_node_end_byte (_fun _TSNode -> _uint32)) n))

(define (ts-query-new lang src)
  (define-values (q err-off err-type)
    ((ts ts_query_new
         (_fun _pointer _bytes _uint32 (eo : (_ptr o _uint32)) (et : (_ptr o _int))
               -> (q : _pointer) -> (values q eo et)))
     lang src (bytes-length src)))
  (unless q
    (error 'highlight "query compile failed (offset ~a, type ~a)" err-off err-type))
  q)
(define (ts-query-delete q)        ((ts ts_query_delete (_fun _pointer -> _void)) q))
(define (ts-query-capture-name q idx)
  (define-values (p len)
    ((ts ts_query_capture_name_for_id
         (_fun _pointer _uint32 (l : (_ptr o _uint32)) -> (p : _pointer) -> (values p l)))
     q idx))
  (define bs (make-bytes len))
  (memcpy bs p len)
  (bytes->string/utf-8 bs))

(define (ts-query-cursor-new)      ((ts ts_query_cursor_new (_fun -> _pointer))))
(define (ts-query-cursor-delete c) ((ts ts_query_cursor_delete (_fun _pointer -> _void)) c))
(define (ts-query-cursor-exec c q node)
  ((ts ts_query_cursor_exec (_fun _pointer _pointer _TSNode -> _void)) c q node))
(define (ts-query-cursor-next-match c)
  (define-values (ok m)
    ((ts ts_query_cursor_next_match
         (_fun _pointer (m : (_ptr o _TSQueryMatch)) -> (ok : _bool) -> (values ok m)))
     c))
  (and ok m))

(define lang-cache (make-hash))

(define (load-lang lang)
  (hash-ref!
   lang-cache lang
   (lambda ()
     (define dir (build-path (code-grammars-dir) lang))
     (define gram (ffi-lib (path->string (build-path dir lang)) '(#f)))
     (define lang-ptr ((get-ffi-obj (format "tree_sitter_~a" lang) gram (_fun -> _pointer))))
     (define query-src (file->bytes (build-path dir "highlights.scm")))
     (cons lang-ptr (ts-query-new lang-ptr query-src)))))

(define (highlight-available? lang)
  (with-handlers ([exn:fail? (lambda (_) #f)])
    (load-lang lang)
    #t))

(define (capture->class name)
  (cond
    [(string=? name "keyword") "kw"]
    [(member name '("function" "function.method" "function.builtin")) "fn"]
    [(string=? name "type") "type"]
    [(string=? name "string") "str"]
    [(string=? name "escape") "escape"]
    [(string=? name "number") "num"]
    [(string=? name "comment") "comment"]
    [(string=? name "operator") "op"]
    [(string=? name "constant.builtin") "const"]
    [(string=? name "property") "prop"]
    [(string=? name "variable") "var"]
    [else (string-replace name "." "-")]))

(define (html-escape s)
  (string-replace
   (string-replace
    (string-replace s "&" "&")
    "<" "<")
   ">" ">"))

(define (highlight-code lang code)
  (cond
    [(not (highlight-available? lang)) (html-escape code)]
    [else
     (define code-bytes (string->bytes/utf-8 code))
     (define n (bytes-length code-bytes))
     (define entry (load-lang lang))
     (define lang-ptr (car entry))
     (define query (cdr entry))
     (define parser (ts-parser-new))
     (ts-parser-set-language parser lang-ptr)
     (define tree (ts-parser-parse-string parser code-bytes n))
     (define root (ts-tree-root-node tree))
     (define cursor (ts-query-cursor-new))
     (ts-query-cursor-exec cursor query root)
     (define name-cache (make-hash))
     (define (idx->class idx)
       (hash-ref! name-cache idx (lambda () (capture->class (ts-query-capture-name query idx)))))
     (define caps
       (let loop ([acc '()])
         (define m (ts-query-cursor-next-match cursor))
         (cond
           [(not m) acc]
           [else
            (define cnt (TSQueryMatch-capture-count m))
            (define base (TSQueryMatch-captures m))
            (loop
             (for/fold ([acc acc]) ([i (in-range cnt)])
               (define cap (ptr-ref base _TSQueryCapture i))
               (define node (TSQueryCapture-node cap))
               (cons (list (ts-node-start-byte node)
                           (ts-node-end-byte node)
                           (idx->class (TSQueryCapture-index cap)))
                     acc)))])))
     (define owner (make-vector n #f))
     (define (varprio cls) (if (string=? cls "var") 1 0))
     (for ([c (in-list (sort caps (lambda (a b)
                                    (define wa (- (cadr a) (car a)))
                                    (define wb (- (cadr b) (car b)))
                                    (cond
                                      [(not (= wa wb)) (< wa wb)]
                                      [(not (= (car a) (car b))) (< (car a) (car b))]
                                      [else (< (varprio (caddr a)) (varprio (caddr b)))]))))])
       (for ([b (in-range (car c) (cadr c))])
         (unless (vector-ref owner b) (vector-set! owner b (caddr c)))))
     (ts-query-cursor-delete cursor)
     (ts-tree-delete tree)
     (ts-parser-delete parser)
     (define out (open-output-string))
     (let loop ([i 0])
       (when (< i n)
         (define cls (vector-ref owner i))
         (define j (let scan ([j i])
                     (if (and (< j n) (equal? (vector-ref owner j) cls)) (scan (add1 j)) j)))
         (define text (html-escape (bytes->string/utf-8 (subbytes code-bytes i j))))
         (if cls
             (begin (display "" out)
                    (display text out) (display "" out))
             (display text out))
         (loop j)))
     (get-output-string out)]))

(define (codeblock lang . content)
  (define code
    (apply string-append (for/list ([x (in-list (flatten content))]) (format "~a" x))))
  (disable-prefix (pre (literal (highlight-code lang code)))))