json-stream-parser.sml 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165
  1. (* json-stream-parser.sml
  2. *
  3. * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org)
  4. * All rights reserved.
  5. *
  6. * TODO: use the same "source" abstraction supported by the `JSONParser`
  7. * structure.
  8. *)
  9. structure JSONStreamParser :> sig
  10. (* abstract type of JSON input *)
  11. type source = JSONSource.source
  12. (* open a text input stream as a source *)
  13. val openStream : TextIO.instream -> source
  14. (* open a text file as a source *)
  15. val openFile : string -> source
  16. (* open a string as a source *)
  17. val openString : string -> source
  18. (* close a source *)
  19. val close : source -> unit
  20. (* callback functions for the different parsing events *)
  21. type 'ctx callbacks = {
  22. null : 'ctx -> 'ctx,
  23. boolean : 'ctx * bool -> 'ctx,
  24. integer : 'ctx * IntInf.int -> 'ctx,
  25. float : 'ctx * real -> 'ctx,
  26. string : 'ctx * string -> 'ctx,
  27. startObject : 'ctx -> 'ctx,
  28. objectKey : 'ctx * string -> 'ctx,
  29. endObject : 'ctx -> 'ctx,
  30. startArray : 'ctx -> 'ctx,
  31. endArray : 'ctx -> 'ctx,
  32. error : 'ctx * string -> unit
  33. }
  34. val parse : 'ctx callbacks -> (source * 'ctx) -> 'ctx
  35. val parseFile : 'ctx callbacks -> (string * 'ctx) -> 'ctx
  36. end = struct
  37. structure Lex = JSONLexer
  38. structure T = JSONTokens
  39. datatype source = datatype JSONSource.source
  40. val openStream = JSONSource.openStream
  41. val openFile = JSONSource.openFile
  42. val openString = JSONSource.openString
  43. val close = JSONSource.close
  44. (* callback functions for the different parsing events *)
  45. type 'ctx callbacks = {
  46. null : 'ctx -> 'ctx,
  47. boolean : 'ctx * bool -> 'ctx,
  48. integer : 'ctx * IntInf.int -> 'ctx,
  49. float : 'ctx * real -> 'ctx,
  50. string : 'ctx * string -> 'ctx,
  51. startObject : 'ctx -> 'ctx,
  52. objectKey : 'ctx * string -> 'ctx,
  53. endObject : 'ctx -> 'ctx,
  54. startArray : 'ctx -> 'ctx,
  55. endArray : 'ctx -> 'ctx,
  56. error : 'ctx * string -> unit
  57. }
  58. fun error (cb : 'a callbacks, ctx, msg) = (
  59. #error cb (ctx, msg);
  60. raise Fail "error")
  61. fun parse (cb : 'a callbacks) (src as Src{srcMap, strm, ...}, ctx) = let
  62. val lexer = Lex.lex srcMap
  63. val errorMsg = JSONSource.errorMsg src
  64. fun err (ctx, span, msg, tok) = error (cb, ctx, errorMsg (span, msg, tok))
  65. fun parseValue (strm : Lex.strm, ctx) = let
  66. val (tok, span, strm) = lexer strm
  67. in
  68. case tok
  69. of T.LB => parseArray (strm, ctx)
  70. | T.LCB => parseObject (strm, ctx)
  71. | T.KW_null => (strm, #null cb ctx)
  72. | T.KW_true => (strm, #boolean cb (ctx, true))
  73. | T.KW_false => (strm, #boolean cb (ctx, false))
  74. | T.INT n => (strm, #integer cb (ctx, n))
  75. | T.FLOAT f => (strm, #float cb (ctx, f))
  76. | T.STRING s => (strm, #string cb (ctx, s))
  77. | _ => err (ctx, span, "error parsing value", tok)
  78. (* end case *)
  79. end
  80. and parseArray (strm : Lex.strm, ctx) = (case lexer strm
  81. of (T.RB, _, strm) => (strm, #endArray cb (#startArray cb ctx))
  82. | _ => let
  83. fun loop (strm, ctx) = let
  84. val (strm, ctx) = parseValue (strm, ctx)
  85. (* expect either a "," or a "]" *)
  86. val (tok, span, strm) = lexer strm
  87. in
  88. case tok
  89. of T.RB => (strm, ctx)
  90. | T.COMMA => loop (strm, ctx)
  91. | _ => err (ctx, span, "error parsing array", tok)
  92. (* end case *)
  93. end
  94. val ctx = #startArray cb ctx
  95. val (strm, ctx) = loop (strm, #startArray cb ctx)
  96. in
  97. (strm, #endArray cb ctx)
  98. end
  99. (* end case *))
  100. and parseObject (strm : Lex.strm, ctx) = let
  101. fun parseField (strm, ctx) = (case lexer strm
  102. of (T.STRING s, span, strm) => let
  103. val ctx = #objectKey cb (ctx, s)
  104. in
  105. case lexer strm
  106. of (T.COLON, _, strm) => parseValue (strm, ctx)
  107. | (tok, span, _) =>
  108. err (ctx, span, "error parsing field", tok)
  109. (* end case *)
  110. end
  111. | _ => (strm, ctx)
  112. (* end case *))
  113. fun loop (strm, ctx) = let
  114. val (strm, ctx) = parseField (strm, ctx)
  115. in
  116. (* expect either "," or "}" *)
  117. case lexer strm
  118. of (T.RCB, span, strm) => (strm, ctx)
  119. | (T.COMMA, span, strm) => loop (strm, ctx)
  120. | (tok, span, _) =>
  121. err (ctx, span, "error parsing object", tok)
  122. (* end case *)
  123. end
  124. val ctx = #startObject cb ctx
  125. val (strm, ctx) = loop (strm, #startObject cb ctx)
  126. in
  127. (strm, #endObject cb ctx)
  128. end
  129. val (inStrm, cxt) = parseValue (!strm, ctx)
  130. in
  131. strm := inStrm;
  132. ctx
  133. end
  134. fun parseFile cb = let
  135. val parse = parse cb
  136. fun parser (fileName, ctx) = let
  137. val inStrm = openFile fileName
  138. val ctx = parse (inStrm, ctx)
  139. handle ex => (close inStrm; raise ex)
  140. in
  141. close inStrm;
  142. ctx
  143. end
  144. in
  145. parser
  146. end
  147. end