libept
0.5.25
|
00001 #ifndef EPT_DEBTAGS_VOCABULARY_H 00002 #define EPT_DEBTAGS_VOCABULARY_H 00003 00009 /* 00010 * Copyright (C) 2003,2004,2005,2006,2007 Enrico Zini <enrico@debian.org> 00011 * 00012 * This program is free software; you can redistribute it and/or modify 00013 * it under the terms of the GNU General Public License as published by 00014 * the Free Software Foundation; either version 2 of the License, or 00015 * (at your option) any later version. 00016 * 00017 * This program is distributed in the hope that it will be useful, 00018 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00019 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00020 * GNU General Public License for more details. 00021 * 00022 * You should have received a copy of the GNU General Public License 00023 * along with this program; if not, write to the Free Software 00024 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00025 */ 00026 00027 #include <ept/debtags/tag.h> 00028 #include <tagcoll/diskindex/mmap.h> 00029 00030 #include <string> 00031 #include <vector> 00032 #include <map> 00033 00034 namespace ept { 00035 namespace debtags { 00036 00037 class Vocabulary 00038 { 00039 public: 00040 class FacetIndex : public tagcoll::diskindex::MMap 00041 { 00042 protected: 00043 // Layout of the data in the index 00044 struct Item { 00045 int offset; 00046 int size; 00047 int firsttag; 00048 int lasttag; 00049 const char name[]; 00050 }; 00051 inline Item* item(int id) const 00052 { 00053 if (id >= 0 && (unsigned)id < size()) 00054 return (Item*)(m_buf + ((int*)m_buf)[id]); 00055 return NULL; 00056 } 00057 00058 public: 00059 FacetIndex() : tagcoll::diskindex::MMap() {} 00060 FacetIndex(const tagcoll::diskindex::MasterMMap& master, size_t idx) 00061 : tagcoll::diskindex::MMap(master, idx) {} 00062 00064 size_t size() const { return m_size == 0 ? 0 : *(int*)m_buf / sizeof(int); } 00066 size_t offset(int id) const { Item* i = item(id); return i == NULL ? 0 : i->offset; } 00068 size_t size(int id) const { Item* i = item(id); return i == NULL ? 0 : i->size; } 00070 int firsttag(int id) const { Item* i = item(id); return i == NULL ? -1 : i->firsttag; } 00072 int lasttag(int id) const { Item* i = item(id); return i == NULL ? -1 : i->lasttag; } 00074 const char* name(int id) const { Item* i = item(id); return i == NULL ? "" : i->name; } 00076 int id(const char* name) const; 00077 int id(const std::string& name) const { return id(name.c_str()); } 00078 }; 00079 00080 class TagIndex : public tagcoll::diskindex::MMap 00081 { 00082 protected: 00083 // Layout of the data in the index 00084 struct Item { 00085 int offset; 00086 int size; 00087 int facet; 00088 const char name[]; 00089 }; 00090 inline Item* item(int id) const 00091 { 00092 if (id >= 0 && (unsigned)id < size()) 00093 return (Item*)(m_buf + ((int*)m_buf)[id]); 00094 return NULL; 00095 } 00096 00097 public: 00098 TagIndex() : tagcoll::diskindex::MMap() {} 00099 TagIndex(const tagcoll::diskindex::MasterMMap& master, size_t idx) 00100 : tagcoll::diskindex::MMap(master, idx) {} 00101 00103 size_t size() const { return m_size == 0 ? 0 : *(int*)m_buf / sizeof(int); } 00105 size_t offset(int id) const { Item* i = item(id); return i == NULL ? 0 : i->offset; } 00107 size_t size(int id) const { Item* i = item(id); return i == NULL ? 0 : i->size; } 00109 int facet(int id) const { Item* i = item(id); return i == NULL ? -1 : i->facet; } 00111 const char* name(int id) const { Item* i = item(id); return i == NULL ? "" : i->name; } 00113 int id(const char* name) const; 00114 int id(const std::string& name) const { return id(name.c_str()); } 00115 }; 00116 00117 protected: 00118 // Master MMap index container 00119 tagcoll::diskindex::MasterMMap mastermmap; 00120 00121 time_t m_timestamp; 00122 00123 // Mmapped vocabulary file 00124 std::string voc_fname; 00125 int voc_fd; 00126 size_t voc_size; 00127 const char* voc_buf; 00128 00129 // Facet and tag indexes 00130 FacetIndex findex; 00131 TagIndex tindex; 00132 00133 // Cached parsed facet and tag records 00134 mutable std::vector< std::map<std::string, std::string> > m_facetData; 00135 mutable std::vector< std::map<std::string, std::string> > m_tagData; 00136 // Empty parsed data to return when data is asked for IDs == -1 00137 std::map<std::string, std::string> emptyData; 00138 00139 void parseVocBuf(std::map<std::string, std::string>& res, size_t ofs, size_t len) const; 00140 00141 public: 00142 Vocabulary(); 00143 ~Vocabulary(); 00144 00146 time_t timestamp() const { return m_timestamp; } 00147 00149 bool hasData() const { return m_timestamp != 0; } 00150 00151 const FacetIndex& facetIndex() const { return findex; } 00152 const TagIndex& tagIndex() const { return tindex; } 00153 00157 bool hasFacet(const std::string& name) const 00158 { 00159 return findex.id(name.c_str()) != -1; 00160 } 00161 00165 bool hasTag(const std::string& fullname) const 00166 { 00167 return tindex.id(fullname.c_str()) != -1; 00168 } 00169 00173 Facet facetByID(int id) const; 00174 00178 Tag tagByID(int id) const; 00179 00180 template<typename IDS> 00181 std::set<Tag> tagsByID(const IDS& ids) const 00182 { 00183 std::set<Tag> res; 00184 for (typename IDS::const_iterator i = ids.begin(); 00185 i != ids.end(); ++i) 00186 res.insert(tagByID(*i)); 00187 return res; 00188 } 00189 00193 Facet facetByTag(int id) const { return facetByID(tindex.facet(id)); } 00194 00198 Facet facetByName(const std::string& name) const { return facetByID(findex.id(name)); } 00199 00203 Tag tagByName(const std::string& fullname) const { return tagByID(tindex.id(fullname)); } 00204 00208 std::set< Facet > facets() const 00209 { 00210 std::set< Facet > res; 00211 for (size_t i = 0; i < findex.size(); i++) 00212 res.insert(facetByID(i)); 00213 return res; 00214 } 00215 00219 std::set< Tag > tags() const 00220 { 00221 std::set< Tag > res; 00222 for (size_t i = 0; i < tindex.size(); i++) 00223 res.insert(tagByID(i)); 00224 return res; 00225 } 00226 00230 std::set< Tag > tags(int facet) const 00231 { 00232 std::set< Tag > res; 00233 for (int i = findex.firsttag(facet); i != -1 && i <= findex.lasttag(facet); i++) 00234 res.insert(tagByID(i)); 00235 return res; 00236 } 00237 00238 std::set< Tag > tags(const std::string& facetName) const 00239 { 00240 return tags(findex.id(facetName)); 00241 } 00242 00243 std::set< Tag > tags(const Facet& facet) const 00244 { 00245 return tags(facet.id()); 00246 } 00247 00248 #if 0 00249 00250 const DerivedTagList& getEquations() const throw () { return equations; } 00251 00253 FacetSet facets(const FacetMatcher& filter) const throw () { return getFiltered(filter); } 00254 #endif 00255 00256 #if 0 00257 // These functions are here just to be used by Facet and Tag. I'm not 00258 // making them private because I don't want Facet and Tag to access other 00259 // Vocabulary member, and other classes can't use these anyway as Facet::Data and 00260 // Tag::Data are protected 00261 const Facet::Data& facetData(int idx) { return m_facets[idx]; } 00262 const Tag::Data& tagData(int idx) { return m_tags[idx]; } 00263 #endif 00264 00266 std::string facetName(int id) const { return findex.name(id); } 00267 00269 std::string tagName(int id) const { return tindex.name(id); } 00270 00272 std::string tagShortName(int id) const; 00273 00274 const std::map<std::string, std::string>& facetData(int id) const; 00275 const std::map<std::string, std::string>& tagData(int id) const; 00276 }; 00277 00278 } 00279 } 00280 00281 // vim:set ts=4 sw=4: 00282 #endif