ionflux.org | Impressum

Tokenizer.hpp

Go to the documentation of this file.
00001 #ifndef IONFLUX_TOOLS_TOKENIZER
00002 #define IONFLUX_TOOLS_TOKENIZER
00003 /* ==========================================================================
00004  * Ionflux Tools
00005  * Copyright (c) 2004 Joern P. Meier
00006  * mail@ionflux.org
00007  * --------------------------------------------------------------------------
00008  * Tokenizer.hpp                    Generic byte string tokenizer.
00009  * ==========================================================================
00010  * 
00011  * This file is part of Ionflux Tools.
00012  * 
00013  * Ionflux Tools is free software; you can redistribute it and/or modify it 
00014  * under the terms of the GNU General Public License as published by the Free
00015  * Software Foundation; either version 2 of the License, or (at  your option)
00016  * any later version.
00017  * 
00018  * Ionflux Tools is distributed in the hope that it will be useful, but 
00019  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
00020  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
00021  * for more details.
00022  * 
00023  * You should have received a copy of the GNU General Public License
00024  * along with Ionflux Tools; if not, write to the Free Software Foundation, 
00025  * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
00026  * 
00027  * ========================================================================== */
00028 
00029 #include <string>
00030 #include <iostream>
00031 #include <vector>
00032 #include "ionflux/tools.hpp"
00033 #include "ionflux/TokenTypeMap.hpp"
00034 
00035 namespace Ionflux
00036 {
00037 
00038 namespace Tools
00039 {
00040 
00048 
00049 struct TokenType
00050 {
00052     int typeID;
00054     std::string validChars;
00060     bool invert;
00065     int maxChars;
00066     
00073     static const int USERTYPE_ID;
00074 };
00075 
00077 struct Token
00078 {
00080     int typeID;
00082     std::string value;
00083 };
00084 
00085 class TokenTypeMap;
00086 
00103 class Tokenizer
00104 {
00105     protected:
00107         std::string theInput;
00109         unsigned int currentPos;
00111         unsigned int currentTokenPos;
00113         Token currentToken;
00115         bool extractQuoted;
00117         char currentQuoteChar;
00119         bool extractEscaped;
00121         TokenTypeMap *typeMap;
00122         
00123     public:
00125         TokenType TT_ANYTHING;
00127         static const TokenType TT_INVALID;
00129         static const TokenType TT_NONE;
00131         static const TokenType TT_QUOTED;
00133         static const TokenType TT_ESCAPED;
00135         static const TokenType TT_WHITESPACE;
00137         static const TokenType TT_LINETERM;
00139         static const TokenType TT_NUMBER;
00141         static const TokenType TT_ALPHA;
00143         static const TokenType TT_DEFAULT_SEP;
00145         static const TokenType TT_IDENTIFIER;
00147         static const Token TOK_INVALID;
00149         static const Token TOK_NONE;
00151         static const int TT_ANYTHING_TYPE_ID;
00153         static const std::string QUOTE_CHARS;
00155         static const char ESCAPE_CHAR;
00156         
00161         Tokenizer();
00162         
00169         Tokenizer(const std::string& initInput);
00170         
00177         Tokenizer(const std::vector<TokenType>& initTokenTypes);
00178         
00186         Tokenizer(const std::vector<TokenType>& initTokenTypes, 
00187             const std::string& initInput);
00188         
00193         virtual ~Tokenizer();
00194         
00205         virtual void clearTokenTypes();
00206         
00212         virtual void useDefaultTokenTypes();
00213         
00223         virtual void setTokenTypes(const std::vector<TokenType>& newTokenTypes);
00224         
00232         virtual void addTokenType(const TokenType& newTokenType);
00233         
00241         virtual void addTokenTypes(const std::vector<TokenType>& newTokenTypes);
00242         
00249         virtual void setInput(const std::string& newInput);
00250         
00263         virtual Token nextToken();
00264         
00274         virtual Token getNextToken(const TokenTypeMap &otherMap);
00275         
00282         virtual Token getNextToken();
00283         
00290         virtual Token getCurrentToken();
00291         
00298         virtual int getCurrentTokenType();
00299         
00305         virtual void reset();
00306         
00316         virtual void setTokenTypeAnything();
00317         
00330         virtual void setExtractQuoted(bool newExtractQuoted);
00331         
00344         virtual void setExtractEscaped(bool newExtractEscaped);
00345         
00353         virtual unsigned int getCurrentPos();
00354         
00362         virtual unsigned int getCurrentTokenPos();
00363         
00371         virtual char getQuoteChar();
00372         
00393         static bool isOneOf(char c, const std::string& testChars, bool invert);
00394         
00402         static bool isValid(Token& token);
00403 };
00404 
00406 
00407 }
00408 
00409 }
00410 
00414 #endif

Generated on Tue Mar 14 20:58:30 2006 for Ionflux Tools Class Library (iftools) by  doxygen 1.4.6