(* $Id$ *)

let hash_add_assoc (key,val as pair) v =
  let i = (hashtbl__hash key) mod (vect_length v) in
  v.(i) <- pair::v.(i); ()
and hash_remove_assoc (key,val as pair) v =
  let i = (hashtbl__hash key) mod (vect_length v) in
  v.(i) <- except pair v.(i); ()
and hash_assoc key v =
  assoc key v.((hashtbl__hash key) mod (vect_length v))
;;

let keywords =
  let t = make_vect 79 [] in
  (* pure keywords *)
  do_list (fun (s,tok) -> hash_add_assoc (s,tok) t) [
    ("and", Tand); ("as", Tas); ("begin", Tbegin); ("do", Tdo);
    ("done", Tdone); ("else", Telse); ("end", Tend); ("exception", Texception);
    ("for", Tfor); ("fun", Tfun); ("function", Tfunction); ("if", Tif);
    ("in", Tin); ("let", Tlet); ("match", Tmatch); ("mutable", Tmutable);
    ("of", Tof); ("or", Tor); ("prefix", Tprefix); ("rec", Trec);
    ("then", Tthen); ("to", Tto); ("try", Ttry); ("type", Ttype);
    ("value", Tvalue); ("where", Twhere); ("while", Twhile); ("with", Twith)
  ];
  (* infixes *)
  do_list (fun (s,tok) -> hash_add_assoc (s,tok) t) [
    ("mod", Tinfix "mod"); ("quo", Tinfix "quo")
  ];
  t
;;

let add_infix name =
  hash_add_assoc (name, Tinfix name) keywords
and remove_infix name =
  hash_remove_assoc (name, Tinfix name) keywords
;;

let (add_in_buff, get_buff, fget_buff) =
  let buff = ref(create_string 80) in (
  (* add_in_buff *) (
    fun i x ->
      let len = string_length !buff in
      if i >= len then (buff := !buff ^ (create_string len); ());
      set_nth_char !buff i x;
      succ i
  ),
  (* get_buff *) (
    fun len ->
      sub_string !buff 0 len
  ),
  (* fget_buff *) (
    fun len ->
      set_nth_char !buff len `\000`;
      !buff
  )
)
;;

let string_of_token = function
  Tident x -> x | Tinfix x -> x
| Tat -> "@" | Tbang -> "!" | Tbangequal -> "!=" | Tcaret -> "^"
| Tcolonequal -> ":=" | Tequal -> "=" | Tequalequal -> "==" | Tgreater -> ">"
| Tgreaterequal -> ">=" | Tless -> "<" | Tlessequal -> "<="
| Tlessgreater -> "<>" | Tminus -> "-" | Tplus -> "+" | Tslash -> "/"
| Tstar -> "*"
| _ -> raise Parse_error
;;

let rec spec_char = function
  [< '`n` >] -> `\n` | [< '`t` >] -> `\t`
| [< '`b` >] -> `\b` | [< '`r` >] -> `\r`
| [< '`0`..`9` as c; (spec1 (int_of_char c-int_of_char `0`)) v >] ->
    char_of_int v
| [< 'x >] -> x

and spec1 v = function
  [< '`0`..`9` as c; s >] -> spec1 (10*v+int_of_char c-int_of_char `0`) s
| [< >] -> v
;;

let rec string len = function
  [< '`"` >] -> len
| [< '`\\`;
     begin function
       [< '`\n`; s >] -> len
     | [< spec_char c >] -> add_in_buff len c
     end len;
     s
  >] -> string len s
| [< 'x; s >] -> string (add_in_buff len x) s

and char = function
  [< '`\\`; spec_char c >] -> c
| [< 'x >] -> x

and skip_comm = function
  [< '`*`; s >] -> skip1 s
| [< '`(`; skip2 _; s >] -> skip_comm s
| [< '`"`; (string 0) _; s >] -> skip_comm s
| [< '```; char _; s >] -> skip_comm s
| [< '_; s >] -> skip_comm s

and skip1 = function
  [< '`)` >] -> ()
| [< s >] -> skip_comm s

and skip2 = function
  [< '`*`; s >] -> skip_comm s
| [< >] -> ()
;;

let rec number len = function
  [< '`0`..`9` as d; s >] -> number (add_in_buff len d) s
| [< '`.`; (float (add_in_buff len `.`)) f >] -> Tfloat f
| [< '`e` | `E`; (exp (add_in_buff len `e`)) f >] -> Tfloat f
| [< >] -> Tint(int_of_string(fget_buff len))

and float len = function
  [< '`0`..`9` as d; s >] -> float (add_in_buff len d) s
| [< '`e` | `E`; (exp (add_in_buff len `e`)) f >] -> f
| [< >] -> float_of_string(fget_buff len)

and exp len = function
  [< '`+` | `-` as c; (exp2 (add_in_buff len c)) f >] -> f
| [< (exp2 len) f >] -> f

and exp2 len = function
  [< '`0`..`9` as d; s >] -> exp2 (add_in_buff len d) s
| [< >] -> float_of_string(fget_buff len)
;;

let rec next_tok = function
  [< '`a`..`z` | `A`..`Z` as c; (ident (add_in_buff 0 c)) i >] -> i
| [< '`0`..`9` as d; (number (add_in_buff 0 d)) n >] -> n
| [< '` ` | `\n` | `\t`; s >] -> next_tok s
| [< '`"`; (string 0) len >] -> Tstring (get_buff len)
| [< '```; char c; '``` >] -> Tchar c
| [< '`(`; s >] -> lparen s
| [< '`!`; (function [< '`=` >] -> Tbangequal | [< >] -> Tbang) t >] -> t
| [< '`|`; (function [< '`]` >] -> Tbarrbracket | [< >] -> Tbar) t >] -> t
| [< '`:`; (function [< '`:` >] -> Tcoloncolon | [< '`=` >] -> Tcolonequal
                   | [< >] -> Tcolon) t >] -> t
| [< '`.`; (function [< '`.` >] -> Tdotdot | [< '`(` >] -> Tdotlparen
                   | [< >] -> Tdot) t >] -> t
| [< '`=`; (function [< '`=` >] -> Tequalequal | [< >] -> Tequal) t >] -> t
| [< '`>`; (function [< '`=` >] -> Tgreaterequal
                   | [< '`]` >] -> Tgreaterrbracket
                   | [< >] -> Tgreater) t >] -> t
| [< '`[`; (function [< '`|` >] -> Tlbracketbar | [< '`<` >] -> Tlbracketless
                   | [< >] -> Tlbracket) t >] -> t
| [< '`<`; (function [< '`=` >] -> Tlessequal | [< '`>` >] -> Tlessgreater
                   | [< '`-` >] -> Tlessminus | [< >] -> Tless) t >] -> t
| [< '`-`; (function [< '`>` >] -> Tminusgreater | [< >] -> Tminus) t >] -> t
| [< '`;`; (function [< '`;` >] -> Tsemisemi | [< >] -> Tsemi) t >] -> t
| [< '`&` >] -> Tampersand
| [< '`@` >] -> Tat
| [< '`^` >] -> Tcaret
| [< '`,` >] -> Tcomma
| [< '`{` >] -> Tlbrace
| [< '`+` >] -> Tplus
| [< '`'` >] -> Tquote
| [< '`}` >] -> Trbrace
| [< '`]` >] -> Trbracket
| [< '`)` >] -> Trparen
| [< '`#` >] -> Tsharp
| [< '`/` >] -> Tslash
| [< '`*` >] -> Tstar
| [< '`_` >] -> Tunderscore
| [< end_of_stream ()  >] -> raise Parse_failure
| [< >] -> raise Parse_error

and lparen = function
  [< '`*`; skip_comm _; s >] -> next_tok s
| [< >] -> Tlparen

and ident len = function
  [< '`a`..`z` | `A`..`Z` | `0`..`9` | `'` as c; s >] ->
      ident (add_in_buff len c) s
| [< '`_`; s >] -> ident1 len s
| [< s >] ->
    let str = get_buff len in
    match try hash_assoc str keywords with _ -> Tident str with
      Tprefix -> prefix_id s
    | x -> x

and ident1 len = function
  [< '`_`; s >] ->
    let str = get_buff len in
    Tqualid(str, Tident(string_of_token(next_tok s)))
| [< s >] ->
    ident (add_in_buff len `_`) s

and prefix_id = function
  [< next_tok t >] -> Tident(string_of_token t)
;;

let next_token cs =
  try next_tok cs with Parse_error -> raise Lex_error
;;
