2011/04/03

DCG で JSON をパースする

"Paradigms of Artificial Intelligence Programming"(PAIP, 実用 Common Lisp) の 20 Unification Grammars にある definite clause grammer(DCG) が面白かったので、それを使って JSON をパースしてみた。

DCG とは私の勝手な解釈では Prolog による文法解析かな。 DCG の何がいいかは "Artificial Intelligence: A Modern Approach"(AIMA, エージェントアプローチ人工知能) の方に書いてあるw とりあえず、全部宣言的に書けるのはいい。 PAIP と AIMA 面方読むと面白いね。よく理解できないげど。

数値はめんどうになって整数だけの対応にした。

(eval-when (:compile-toplevel :load-toplevel :execute)
(ql:quickload "unifgram")
(ql:quickload "info.read-eval-print.mecab"))

(defpackage :unifgram-json
(:use :cl :paiprolog :unifgram :info.read-eval-print.mecab))

(in-package :unifgram-json)

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;; Syntax

;;; 空白
(<-- (whitespace-p #\Space))
(<- (whitespace-p #\Tab))
(<- (whitespace-p #\Newline))
(<- (whitespace-p #\Return))
(<- (whitespace-p #\Page))

(<-- (whitespace (?c . ?s) ?s)
(whitespace-p ?c))

(rule (s*) ---> (whitespace) (s*) !)
(rule (s*) -->)

;;; JSON
(rule (json ?x) ---> (s*) (value ?x) (s*))

(rule (value ?x) ---> (string ?x))
(rule (value ?x) --> (number ?x))
(rule (value ?x) --> (object ?x))
(rule (value ?x) --> (array ?x))
(rule (value true) --> #.`(:word ,@(coerce "true" 'list)))
(rule (value false) --> #.`(:word ,@(coerce "false" 'list)))
(rule (value null) --> #.`(:word ,@(coerce "null" 'list)))

(rule (object (new-object)) ---> (:word #\{) (s*) (:word #\}))
(rule (object (new-object . ?x)) --> (:word #\{) (s*) (members ?x) (s*) (:word #\}))

(rule (members (?k ?v . ?xs)) ---> (pair ?k ?v) (s*) (:word #\,) (s*) (members ?xs))
(rule (members (?k ?v)) --> (pair ?k ?v))

(rule (pair ?k ?v) ---> (string ?k) (s*) (:word #\:) (s*) (value ?v))

(rule (array (new-array)) ---> (:word #\[) (s*) (:word #\]))
(rule (array (new-array . ?x)) --> (:word #\[) (s*) (elements ?x) (s*) (:word #\]))

(rule (elements (?x . ?xs)) ---> (value ?x) (s*) (:word #\,) (s*) (elements ?xs))
(rule (elements (?x)) --> (value ?x))

(rule (string (new-string)) ---> (:word #\") (:word #\"))
(rule (string (new-string . ?x)) --> (:word #\") (chars ?x) (:word #\"))

(rule (chars (?x . ?xs)) ---> (char ?x) (chars ?xs))
(rule (chars (?x)) --> (char ?x))

(<-- (char ?x (?x . ?xs) ?xs)
(\\= ?x #\")
(\\= ?x #\\))
(<- (char #\" (#\\ #\" . ?xs) ?xs))
(<- (char #\\ (#\\ #\\ . ?xs) ?xs))
(<- (char #\/ (#\\ #\/ . ?xs) ?xs))
(<- (char #\Backspace (#\\ #\b . ?xs) ?xs))
(<- (char #\Page (#\\ #\f . ?xs) ?xs))
(<- (char #\Newline (#\\ #\n . ?xs) ?xs))
(<- (char #\Return (#\\ #\r . ?xs) ?xs))
(<- (char #\Tab (#\\ #\t . ?xs) ?xs))
(<- (char (unicode ?c1 ?c2 ?c3 ?c4) (#\\ #\u ?c1 ?c2 ?c3 ?c4 . ?xs) ?xs))

;; number は手抜きで整数のみ
(rule (number (new-number . ?x)) ---> (int ?x))

(rule (int (?x)) ---> (digit ?x))
(rule (int (?x . ?xs)) --> (digit1-9 ?x) (digits ?xs))
(rule (int ('- ?x)) --> (:word #\-) (digit ?x))
(rule (int ('- ?x . ?xs)) --> (:word #\-) (digit1-9 ?x) (digits ?xs))

(rule (digits (?x . ?xs)) ---> (digit ?x) (digits ?xs))
(rule (digits (?x)) --> (digit ?x))

(rule (digit 0) ---> (:word #\0))
(rule (digit ?x) --> (digit1-9 ?x))
(rule (digit1-9 1) ---> (:word #\1))
(rule (digit1-9 2) --> (:word #\2))
(rule (digit1-9 3) --> (:word #\3))
(rule (digit1-9 4) --> (:word #\4))
(rule (digit1-9 5) --> (:word #\5))
(rule (digit1-9 6) --> (:word #\6))
(rule (digit1-9 7) --> (:word #\7))
(rule (digit1-9 8) --> (:word #\8))
(rule (digit1-9 9) --> (:word #\9))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;; Semantic

(defun new-object (&rest args)
(loop for (k v) on args by #'cddr
collect (cons k v)))

(defun new-array (&rest args)
args)

(defun new-string (&rest args)
(coerce args 'string))

(defun new-number (&rest args)
(let* ((minus-p (eq '- (car args)))
(int (if minus-p (cdr args) args)))
(loop for i in int
for sum = i then (+ (* sum 10) i)
finally (return (if minus-p (* -1 sum) sum)))))

(defun hex-char-to-decimal (hex-char)
(if (digit-char-p hex-char)
(- (char-code hex-char) #.(char-code #\0))
(- (char-code (char-downcase hex-char)) #.(char-code #\a) -10)))

(defun unicode (a b c d)
(code-char (+ (ash (hex-char-to-decimal a) 12)
(ash (hex-char-to-decimal b) 8)
(ash (hex-char-to-decimal c) 4)
(hex-char-to-decimal d))))

(defconstant true t)
(defconstant false nil)
(defconstant null 'null)

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;; ユーティリティ

(defmacro from-json (json-string)
`(eval (prolog-first (?x)
(json ?x ,(coerce json-string 'list) ()))))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;; 動作確認

(from-json "true")
(from-json "[]")
(from-json "[true]")
(from-json "[true,false]")
(from-json "[true,false,null]")
(from-json "\"\"")
(from-json "\"a\"")
(from-json "\"Hello\\\"\\\\\\b\\f\\n\\r\\t\\u308c\"")
(from-json "{}")
(from-json "{\"foo\":true}")
(from-json "0")
(from-json "1")
(from-json "10")
(from-json "12")
(from-json "123")
(from-json "12345")
(from-json "-1")
(from-json "-10")
(from-json "-123")


(from-json "[
{
\"Person\" : {
\"name\" : \"quek\",
\"weight\" : 55,
\"foo\" : [ true, false ]
}
},
{
\"Person\" : {
\"name\" : \"zumu\",
\"weight\" : 123,
\"foo\" : []
}
}
]"
)
;;=> ((("Person" ("name" . "quek") ("weight" . 55) ("foo" T NIL)))
;; (("Person" ("name" . "zumu") ("weight" . 123) ("foo"))))

0 件のコメント: