package dbutils;

import java.util.*;
import java.io.*;

/**
 *
 * @author I. Burak Ozyurt
 * @version $Id: DMLTokenizer.java,v 1.1.1.1 2005/10/25 18:32:59 bozyurt Exp $
 */
public class DMLTokenizer {
  StreamTokenizer stok;
 Map keywordMap = new HashMap();
 Map tokenCodeMap = new HashMap();
 BufferedReader in = null;
 public final static int STRING = 1;
 public final static int INSERT = 2;
 public final static int INTO = 3;
 public final static int VALUES = 4;
 public final static int NULL_TOK = 5;
 public final static int TIMESTAMP = 6;
 public final static int LEFT_PAR = 7;
 public final static int RIGHT_PAR = 8;
 public final static int SEMICOL = 9;
 public final static int COMMA = 10;
 public final static int WORD = 11;
 public final static int NUMBER = 12;

 public final static int CREATE = 20;
 public final static int TABLE = 21;
 public final static int NUMBER_TOK = 22;
 public final static int VARCHAR2 = 23;
 public final static int CLOB = 24;
 public final static int DATE = 25;
 public final static int CHAR = 26;
 public final static int DEFAULT = 27;
 public final static int NOT = 28;
 public final static int UNIQUE = 29;
 public final static int CONSTRAINT = 30;

 public final static int COMMENT = 31;
 public final static int ON = 32;
 public final static int COLUMN = 33;
 public final static int IS = 34;
 public final static int ALTER = 35;
 public final static int ADD = 36;
 public final static int FOREIGN = 37;
 public final static int PRIMARY = 38;
 public final static int KEY = 39;
 public final static int REFERENCES = 40;
 public final static int USING = 41;
 public final static int INDEX = 42;
 public final static int TABLESPACE = 43;
 public final static int CHECK = 44;
 public final static int IN = 45;
 public final static int DOUBLE = 46;
 public final static int PRECISION = 47;
 public final static int DELETE = 48;
 public final static int CASCADE = 49;

 public final static int EOF = 100;

 public final static int GREATER = 101;
 public final static int LESS = 102;
 public final static int EQUAL = 103;

 public DMLTokenizer(String filename) throws IOException {

   stok = new StreamTokenizer( this.in = new BufferedReader( new FileReader(filename) ) );
   stok.resetSyntax();
   //stok.commentChar('-');
   stok.parseNumbers();
   stok.eolIsSignificant(true);
   stok.quoteChar('\'');
   stok.wordChars('a','z');
   stok.wordChars('A','Z');
   stok.wordChars('_','_');
   //stok.wordChars('.','.');
   //stok.wordChars('0','9');
   stok.ordinaryChar('(');
   stok.ordinaryChar(')');
   stok.ordinaryChar(',');
   stok.ordinaryChar(';');
   stok.ordinaryChar('<'); stok.ordinaryChar('>'); stok.ordinaryChar('=');

   stok.whitespaceChars(' ',' ');
   stok.whitespaceChars('\t','\t');


   keywordMap.put("insert", new Integer(INSERT) );
   keywordMap.put("into", new Integer(INTO) );
   keywordMap.put("values", new Integer(VALUES) );
   keywordMap.put("timestamp", new Integer(TIMESTAMP) );

   keywordMap.put("null", new Integer(NULL_TOK) );
   keywordMap.put("create", new Integer(CREATE) );
   keywordMap.put("table", new Integer(TABLE) );
   keywordMap.put("number", new Integer(NUMBER_TOK) );
   keywordMap.put("varchar2", new Integer(VARCHAR2) );
   keywordMap.put("clob", new Integer(CLOB) );
   keywordMap.put("date", new Integer(DATE) );
   keywordMap.put("char", new Integer(CHAR) );
   keywordMap.put("default", new Integer(DEFAULT) );
   keywordMap.put("not", new Integer(NOT) );
   keywordMap.put("unique", new Integer(UNIQUE) );
   keywordMap.put("constraint", new Integer(CONSTRAINT) );
   keywordMap.put("comment", new Integer(COMMENT) );
   keywordMap.put("on", new Integer(ON) );
   keywordMap.put("is", new Integer(IS) );
   keywordMap.put("column", new Integer(COLUMN) );
   keywordMap.put("alter", new Integer(ALTER) );
   keywordMap.put("add", new Integer(ADD) );
   keywordMap.put("foreign", new Integer(FOREIGN) );
   keywordMap.put("primary", new Integer(PRIMARY) );
   keywordMap.put("key", new Integer(KEY) );
   keywordMap.put("references", new Integer(REFERENCES) );
   keywordMap.put("using", new Integer(USING) );
   keywordMap.put("index", new Integer(INDEX) );
   keywordMap.put("tablespace", new Integer(TABLESPACE) );
   keywordMap.put("check", new Integer(CHECK) );
   keywordMap.put("in", new Integer(IN) );
   keywordMap.put("double", new Integer(DOUBLE) );
   keywordMap.put("precision", new Integer(PRECISION) );
   keywordMap.put("delete", new Integer(DELETE) );
   keywordMap.put("cascade", new Integer(CASCADE) );



   for (Iterator iter = keywordMap.entrySet().iterator(); iter.hasNext(); ) {
     Map.Entry entry = (Map.Entry)iter.next();
     String keyword = (String) entry.getKey();
     Integer tokCode = (Integer) entry.getValue();

     tokenCodeMap.put(tokCode, keyword);
   }
 }

 public void shutdown() {
   if (in != null)
     try { in.close(); } catch(Exception x) {}
 }

 protected int getKeywordCode(String word) {
   Integer code = (Integer) keywordMap.get(word.toLowerCase());
   return (code != null) ? code.intValue() : -1;
 }

 public String getKeyword(int tokCode) {
   return (String) tokenCodeMap.get(new Integer(tokCode) );
 }

 public String getStringValue() {
   return stok.sval;
 }

 protected int getDelimiter(int c) {
   switch(c) {
     case '(' : return LEFT_PAR;
     case ')' : return RIGHT_PAR;
     case ',' : return COMMA;
     case ';' : return SEMICOL;
     case '>' : return GREATER;
     case '<' : return LESS;
     case '=' : return EQUAL;
     default:
       return -1;
   }
 }

 public Number getNumber() {
   Double num = new Double( stok.nval );
   double diff = stok.nval - num.intValue();
   if ( Math.abs(diff) < 1e-10) {
     return new Integer( num.intValue() );
   }
   return num;
 }

 protected void eatComments() throws IOException {
   // System.out.println("eating the comment");
   int ttype = stok.nextToken();
   while( ttype != StreamTokenizer.TT_EOL ) {
     ttype = stok.nextToken();
   }
 }

 public int getNextToken() throws IOException{
   int ttype;
   int tc = -1;
   boolean found = false;
   while (!found) {
     ttype = stok.nextToken();
     switch (ttype) {
       case StreamTokenizer.TT_EOL:
         continue;
       case StreamTokenizer.TT_WORD:
         tc = getKeywordCode(stok.sval);
         return (tc == -1) ? WORD : tc;
       case StreamTokenizer.TT_NUMBER:
         return NUMBER;
       case StreamTokenizer.TT_EOF:
         return EOF;
       default:
         if (ttype == '-') {
           int ttype2 = stok.nextToken();
           if (ttype2 == '-') {
             eatComments();
             continue;
           }
           else {
             stok.pushBack();
           }
         }
         if (ttype == '\'') {
           return STRING;
         }
         tc = getDelimiter(ttype);
         return tc;
     }
   }
   return -1;
 }


 public static void main(String[] args) {
    String filename = "/home/bozyurt/tmp/morph_pk_create.sql";
    DMLTokenizer tokenizer = null;
    try {
      tokenizer = new DMLTokenizer(filename);

      int tc = -1;
      while ( (tc = tokenizer.getNextToken() ) != -1 && tc != DMLTokenizer.EOF) {
        switch(tc) {
          case WORD:
            System.out.println("word: " + tokenizer.getStringValue() );
            break;
          case STRING:
           System.out.println("string: " + tokenizer.getStringValue() );
           break;
          case NUMBER:
             System.out.println("number: " + tokenizer.getNumber() );
             break;
          default:
            String keyword = tokenizer.getKeyword(tc);
            if ( keyword != null)
              System.out.println("keyword: " + keyword );

        }
      }
    } catch(Exception x) {
      x.printStackTrace();
    } finally {
      if (tokenizer != null)
        tokenizer.shutdown();
    }
 }


}