#lang racket
;; (char-numeric?) includes other unicode numerals.
;; We only use American Numerals™ here.
(define (char-digit? c)
(and (char? c)
(char>=? c #\0)
(char<=? c #\9)))
;; A simple wrapper for (char-whitespace?) with a type check.
(define (char-ws? c)
(and (char? c)
(char-whitespace? c)))
(define (char->token c)
(match c
[#\[ 'LBRACK]
[#\] 'RBRACK]
[#\{ 'LCURLY]
[#\} 'RCURLY]
[#\: 'KSEP]
[#\, 'VSEP]
[#\" 'QUOTE]
[#\' 'APOS]
[x x]))
;; Simple parser continuation.
;; lst: remaining tokens.
;; res: result upon return.
(struct cont (lst res))
(define (unwrap cn)
(match cn
[(cont _ r) (car r)]))
(define (bad-brack brack ex)
(eprintf "Unexpected bracket: ~v. Expected: ~v.\n" brack ex))
(define (begin-arr tokens)
(define (end-arr lst res)
(cont lst `((arr ,(reverse res)))))
(let loop ([tokens tokens]
[bs '()]
[res '()])
(match tokens
['() (cont '() `(,(reverse res)))]
[`(RBRACK ,_ ...) (end-arr (cdr tokens) res)]
[`(VSEP ,_ ...) (loop (cdr tokens) bs (cons (void) res))]
[_ (let ([lres (parse-json/r tokens)])
(match (cont-lst lres)
[`(VSEP ,r ...) (loop r bs (cons (unwrap lres) res))]
[`(RBRACK ,_ ...) (loop (cont-lst lres) bs (cons (unwrap lres) res))]
[x (eprintf "Expected separator or closing bracket after value. Found ~v\n" x)]))])))
(define (char->digit c)
(- (char->integer c) 48))
(define (begin-numeric tokens)
(define (get-base n)
(if (zero? n) 1
(add1 (floor (log n 10)))))
(define (end-numeric lst res)
(cont lst `(,res)))
(let loop ([tokens tokens]
[dec #f]
[res 0])
(match tokens
['() (cont '() '(res))]
[`(,(? char-digit?) ,_ ...) (loop (cdr tokens)
dec
(+ (* res 10)
(char->digit (car tokens))))]
;; Decimals.
;; Process RHS as its own int value and then add it to res after scaling.
;; dec acts as a flag, helping to avoid processing additional decimal points.
[`(#\. ,_ ...) (if (not dec) (let* ([rhs (loop (cdr tokens) #t 0)]
[ex (get-base (unwrap rhs))])
(end-numeric (cont-lst rhs)
(+ res (/ (unwrap rhs)
(expt 10. ex)))))
(error "Unexpected additional decimal."))]
[_ (end-numeric tokens res)])))
(define (begin-str tokens)
(define (quote-match? q qs)
(and (not (null? qs))
(symbol=? q (car qs))))
(define (end-str lst res)
(cont lst `(,(list->string (reverse res)))))
(let loop ([tokens tokens]
[qs '()]
[res '()])
(match tokens
['() (error "Reached EOF while parsing string value.")]
[`(,(or 'APOS 'QUOTE) ,_ ...) (cond
[(null? qs) (loop (cdr tokens) (cons (car tokens) qs) res)]
[(quote-match? (car tokens) qs) (end-str (cdr tokens) res)]
[else (loop (cdr tokens) qs (cons (car tokens) res))])]
[`(#\\ ,(or #\' #\" #\\) ,r ...) (loop r qs (cons (cadr tokens) res))]
[_ (loop (cdr tokens) qs (cons (car tokens) res))])))
;; key-value pair.
(struct kvp (k v))
(define (begin-obj tokens)
(define (end-obj lst res)
(cont lst `((obj ,(reverse res)))))
(define (parse-kvp tokens)
(match tokens
[`(,(or 'APOS 'QUOTE) ,_ ...) (let* ([kres (parse-json/r tokens)] ; parse key
[vres (parse-json/r (cdr (cont-lst kres)))]) ; parse value
(cont (cont-lst vres)
`(,(kvp (unwrap kres)
(unwrap vres)))))]
[_ (eprintf ("Expected key-value pair. Found ~v\n" tokens))]))
(let loop ([tokens tokens]
[bs '()]
[res '()])
(match tokens
['() (end-obj tokens res)]
[`(RCURLY ,_ ...) (end-obj (cdr tokens) res)]
[`(RBRACK ,_ ...) (bad-brack (car tokens) 'NONE)]
[_ (let ([lres (parse-kvp tokens)])
(match (cont-lst lres)
[`(VSEP ,r ...) (loop r bs (cons (unwrap lres) res))]
[`(RCURLY ,_ ...) (loop (cont-lst lres) bs (cons (unwrap lres) res))]
[x (eprintf "Expected separator or closing brace after kvp. Found ~v\n" x)]))])))
(define (parse-json/r tokens)
;; Check for special keyword values like Infinity, NaN, and bools.
(define (parse-special tokens)
(match tokens
;; Infinity
[`(#\I #\n #\f #\i #\n #\i #\t #\y ,r ...) (cont r '(+inf.f))]
;; -Infinity
[`(#\- #\I #\n #\f #\i #\n #\i #\t #\y ,r ...) (cont r '(-inf.f))]
;; NaN
[`(#\N #\a #\N ,r ...) (cont r '(+nan.f))]
;; true
[`(#\t #\r #\u #\e ,r ...) (cont r '(#t))]
;; false
[`(#\f #\a #\l #\s #\e ,r ...) (cont r '(#f))]
[x (eprintf "Unexpected token: ~v\n" x)]))
(match tokens
['() (cont '() '())]
;; Arrays.
[`(LBRACK ,_ ...) (begin-arr (cdr tokens))]
;; Objects.
[`(LCURLY ,_ ...) (begin-obj (cdr tokens))]
;; Strings.
[`(,(or 'QUOTE 'APOS) ,_ ...) (begin-str tokens)]
;; Positive numbers.
[`(,(? char-digit?) ,_ ...) (begin-numeric tokens)]
;; Negative numbers.
[`(#\- ,(? char-digit?) ,_ ...) (let ([lres (begin-numeric (cdr tokens))]) ; cdr to drop negative sign.
(cont (cont-lst lres) `(,(* (unwrap lres) -1))))]
[_ (parse-special tokens)]))
(define (parse-json tokens)
;; Unwrap parse result.
(unwrap (parse-json/r tokens)))
(define (tokenize str)
(define (quote-match? q qs)
(and (not (null? qs))
(char=? q (car qs))))
(let loop ([chars (string->list str)]
[qs '()]
[res '()])
(match chars
['() (reverse res)]
;; Avoid tokenizing string contents.
[`(,(or #\" #\') ,_ ...) (cond
[(null? qs) (loop (cdr chars)
(cons (car chars) qs)
(cons (char->token (car chars)) res))]
[(quote-match? (car chars) qs) (loop (cdr chars)
(cdr qs) ; drop opening quote from stack.
(cons (char->token (car chars)) res))]
[else (loop (cdr chars) qs (cons (car chars) res))])]
;; Drop whitespace if not in string.
[`(,(? char-ws?) ..1 ,r ...) (if (null? qs) (loop r qs res)
(loop (cdr chars) qs (cons (car chars) res)))]
[_ (if (null? qs) (loop (cdr chars) qs (cons (char->token (car chars)) res))
(loop (cdr chars) qs (cons (car chars) res)))])))
(define (print-ast ast)
(define (print-inner ner)
(define (print-ws ner)
(unless (null? (cdr ner))
(printf " ")))
(match ner
['() (void)]
[`(,(? pair?) ,_ ...) (print-ast/r (car ner))
(print-ws ner)
(print-inner (cdr ner))]
[`(,(kvp k v) ,_ ...) (printf "(~v: " k)
(print-inner `(,v))
(printf ")")
(print-ws ner)
(print-inner (cdr ner))]
[_ (printf "~v" (car ner))
(print-ws ner)
(print-inner (cdr ner))]))
(define (print-ast/r res)
(match res
['() (void)]
[`(arr (,r ...)) (printf "(arr (")
(print-inner r)
(printf "))")]
[`(obj (,r ...)) (printf "(obj (")
(print-inner r)
(printf "))")]
[_ (printf "~v" res)]))
(print-ast/r ast)
(printf "\n"))
;; Tests
(print-ast (parse-json (tokenize "[12.4, 2,3,, 4, 5]")))
(print-ast (parse-json (tokenize "[1, 2,'ab[c, NaN]ef' , -Infinity, 5, true]")))
(print-ast (parse-json (tokenize "[1, 2,'ab\\\"cd\\\"ef' , 4, 5]")))
(print-ast (parse-json (tokenize "['true\"false\"',, [1,false, -2.14 ,3] ,88]")))
(print-ast (parse-json (tokenize "[1, 2,'ab[c\\\\d]ef' , {\"Inf'in'ity\": NaN}, ['true', [1,false, 2.148 ,NaN],88], 5, true]")))
(print-ast (parse-json (tokenize "'some_text'")))
(print-ast (parse-json (tokenize "[{'some_key': 14,\"another_key\": [8, 1, 2], 'yet_another_key': {'abc': 123}}, 420,{'wt':false}]")))