• Main Page
  • Namespaces
  • Classes
  • Files
  • File List
  • File Members

regexp.h

Go to the documentation of this file.
00001 #ifndef WIBBLE_REGEXP_H
00002 #define WIBBLE_REGEXP_H
00003 
00004 /*
00005  * OO wrapper for regular expression functions
00006  * 
00007  * Copyright (C) 2003--2006  Enrico Zini <enrico@debian.org>
00008  *
00009  * This library is free software; you can redistribute it and/or
00010  * modify it under the terms of the GNU Lesser General Public
00011  * License as published by the Free Software Foundation; either
00012  * version 2.1 of the License, or (at your option) any later version.
00013  *
00014  * This library is distributed in the hope that it will be useful,
00015  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00016  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00017  * Lesser General Public License for more details.
00018  *
00019  * You should have received a copy of the GNU Lesser General Public
00020  * License along with this library; if not, write to the Free Software
00021  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
00022  */
00023 
00024 #include <wibble/exception.h>
00025 #include <sys/types.h>
00026 #include <regex.h>
00027 
00028 namespace wibble {
00029 namespace exception {
00030 
00032 
00033 class Regexp : public wibble::exception::Generic
00034 {
00035 protected:
00036     int m_code;
00037     std::string m_message;
00038 
00039 public:
00040     Regexp(const regex_t& re, int code, const std::string& context)
00041         throw ();
00042     ~Regexp() throw () {}
00043 
00045     virtual int code() const throw () { return m_code; }
00046 
00047     virtual const char* type() const throw () { return "Regexp"; }
00048     virtual std::string desc() const throw () { return m_message; }
00049 };
00050 
00051 }
00052 
00053 class Regexp
00054 {
00055 protected:
00056     regex_t re;
00057     regmatch_t* pmatch;
00058     int nmatch;
00059     std::string lastMatch;
00060 
00061 public:
00062         /* Note that match_count is required to be >1 to enable
00063            sub-regexp capture. The maximum *INCLUDES* the whole-regexp
00064            match (indexed 0). [TODO we may want to fix this to be more
00065            friendly?] */
00066     Regexp(const std::string& expr, int match_count = 0, int flags = 0) throw (wibble::exception::Regexp);
00067     ~Regexp() throw ();
00068 
00069     bool match(const std::string& str, int flags = 0) throw (wibble::exception::Regexp);
00070     
00071         /* Indexing is from 1 for capture matches, like perl's $0,
00072            $1... 0 is whole-regexp match, not a capture. TODO
00073            the range is miscalculated (an off-by-one, wrt. the
00074            counterintuitive match counting). */
00075     std::string operator[](int idx) throw (wibble::exception::OutOfRange);
00076 
00077     size_t matchStart(int idx) throw (wibble::exception::OutOfRange);
00078     size_t matchEnd(int idx) throw (wibble::exception::OutOfRange);
00079     size_t matchLength(int idx) throw (wibble::exception::OutOfRange);
00080 };
00081 
00082 class ERegexp : public Regexp
00083 {
00084 public:
00085     ERegexp(const std::string& expr, int match_count = 0, int flags = 0) throw (wibble::exception::Regexp)
00086         : Regexp(expr, match_count, flags | REG_EXTENDED) {}
00087 };
00088 
00089 class Tokenizer
00090 {
00091     const std::string& str;
00092     wibble::Regexp re;
00093 
00094 public:
00095     class const_iterator
00096     {
00097         Tokenizer& tok;
00098         size_t beg, end;
00099     public:
00100         typedef std::string value_type;
00101         typedef ptrdiff_t difference_type;
00102         typedef value_type *pointer;
00103         typedef value_type &reference;
00104         typedef std::forward_iterator_tag iterator_category;
00105 
00106         const_iterator(Tokenizer& tok) : tok(tok), beg(0), end(0) { operator++(); }
00107         const_iterator(Tokenizer& tok, bool) : tok(tok), beg(tok.str.size()), end(tok.str.size()) {}
00108 
00109         const_iterator& operator++();
00110 
00111         std::string operator*() const
00112         {
00113             return tok.str.substr(beg, end-beg);
00114         }
00115         bool operator==(const const_iterator& ti) const
00116         {
00117             return beg == ti.beg && end == ti.end;
00118         }
00119         bool operator!=(const const_iterator& ti) const
00120         {
00121             return beg != ti.beg || end != ti.end;
00122         }
00123     };
00124 
00125     Tokenizer(const std::string& str, const std::string& re, int flags)
00126         : str(str), re(re, 1, flags) {}
00127 
00128     const_iterator begin() { return const_iterator(*this); }
00129     const_iterator end() { return const_iterator(*this, false); }
00130 };
00131 
00145 class Splitter
00146 {
00147     wibble::Regexp re;
00148 
00149 public:
00154     // TODO: add iterator_traits
00155     class const_iterator
00156     {
00157         wibble::Regexp& re;
00158         std::string cur;
00159         std::string next;
00160 
00161     public:
00162         typedef std::string value_type;
00163         typedef ptrdiff_t difference_type;
00164         typedef value_type *pointer;
00165         typedef value_type &reference;
00166         typedef std::forward_iterator_tag iterator_category;
00167 
00168         const_iterator(wibble::Regexp& re, const std::string& str) : re(re), next(str) { ++*this; }
00169         const_iterator(wibble::Regexp& re) : re(re) {}
00170 
00171         const_iterator& operator++();
00172 
00173         const std::string& operator*() const
00174         {
00175             return cur;
00176         }
00177         const std::string* operator->() const
00178         {
00179             return &cur;
00180         }
00181         bool operator==(const const_iterator& ti) const
00182         {
00183             return cur == ti.cur && next == ti.next;
00184         }
00185         bool operator!=(const const_iterator& ti) const
00186         {
00187             return cur != ti.cur || next != ti.next;
00188         }
00189     };
00190 
00194     Splitter(const std::string& re, int flags)
00195         : re(re, 1, flags) {}
00196 
00200     const_iterator begin(const std::string& str) { return const_iterator(re, str); }
00201     const_iterator end() { return const_iterator(re); }
00202 };
00203 
00204 }
00205 
00206 // vim:set ts=4 sw=4:
00207 #endif

Generated on Tue May 10 2011 16:51:50 for wibble by  doxygen 1.7.1