4 #include <wibble/regexp.h>
5 #include <wibble/sys/pipe.h>
6 #include <wibble/sys/exec.h>
31 return term[0] ==
'X' && term[1] ==
'T';
38 mutable Xapian::MSet::const_iterator
m_iter;
51 t.
_id = m_iter.get_document().get_data();
60 if ( m_matches.size() == chunkSize && m_iter == m_matches.end() ) {
61 m_matches =
enq().get_mset( m_pos, chunkSize );
62 m_iter = m_matches.begin();
71 return m_matches.size() < 30 && m_iter == m_matches.end();
81 Xapian::Enquire
const &
enq()
const {
82 return *
reinterpret_cast< Xapian::Enquire
const *
>(
m_enqPlace );
85 List( Xapian::Enquire _enq )
87 Xapian::Enquire *e =
new (
m_enqPlace) Xapian::Enquire( _enq );
88 assert_eq( e, &
enq() );
89 m_matches =
enq().get_mset( 0, chunkSize );
90 m_iter = m_matches.begin();
101 typedef std::set< std::string >
Terms;
112 Query( Xapian::Database &e ) : m_db( &e ), m_enq( e ) {
118 return wibble::Tokenizer( q,
"[A-Za-z0-9._+:-]+", REG_EXTENDED );
121 template<
typename Out >
125 for (wibble::Tokenizer::const_iterator i = tok.begin(); i != tok.end(); ++i )
127 if ( (*i).find(
"::" ) != std::string::npos ) {
130 std::string t = wibble::str::tolower(*i);
131 std::string s =
m_stem(t);
139 template<
typename Out >
144 Xapian::MSet mset = m_enq.get_mset(0, 5);
145 for ( Xapian::MSet::iterator i = mset.begin(); i != mset.end(); ++i )
146 rset.add_document(i);
149 Xapian::ESet eset = m_enq.get_eset(5, rset, &tagf);
150 for ( Xapian::ESetIterator i = eset.begin(); i != eset.end(); ++i )
156 Xapian::Query inc( Xapian::Query::OP_OR,
159 exc( Xapian::Query::OP_OR,
162 secondary( Xapian::Query::OP_OR,
165 secondary1( Xapian::Query::OP_SCALE_WEIGHT, secondary, 0.02 ),
166 query1( Xapian::Query::OP_AND_NOT, inc, exc ),
167 query( Xapian::Query::OP_OR, query1, secondary1 );
169 m_enq.set_query( query );
173 expand( std::inserter( m_include, m_include.begin() ) );
179 Xapian::MSet first = m_enq.get_mset(0, 1, 0, 0, 0);
180 Xapian::MSetIterator ifirst = first.begin();
181 if ( ifirst != first.end() ) {
189 return List( m_enq );
194 std::map< std::string, int > relev;
196 Xapian::MSet mset = m_enq.get_mset(0, 100);
197 for ( Xapian::MSet::iterator i = mset.begin(); i != mset.end(); ++i )
198 rset.add_document(i);
201 Xapian::ESet eset = m_enq.get_eset(n, rset, &tagf);
202 for ( Xapian::ESetIterator i = eset.begin(); i != eset.end(); ++i )
203 relev.insert( relev.begin(),
205 std::string( *i, 2, std::string::npos ),
210 void addTerms( std::string t,
bool partial =
false,
bool exclude =
false ) {
213 Terms &to = exclude ? m_exclude :
m_include;
214 std::vector< std::string > tok;
217 if ( tok.back().size() == 1 ) {
221 m_db->allterms_begin( tok.back() ),
222 m_db->allterms_end( tok.back() ),
223 std::back_inserter( tok ) );
226 std::copy( tok.begin(), tok.end(), std::inserter( to, to.begin() ) );
229 void addTerms(
const Terms &t,
bool exclude =
false ) {
230 Terms &to = exclude ? m_exclude :
m_include;
231 std::copy( t.begin(), t.end(), std::inserter( to, to.begin() ) );
237 m_db->allterms_begin( term ),
238 m_db->allterms_end( term ),
239 std::inserter( m_secondary, m_secondary.begin() ) );
241 m_include.insert( m_secondary.begin(), term );
255 static std::string
toLower(
const std::string& str);
270 Xapian::Database&
db() {
276 const Xapian::Database&
db()
const {
283 m_db = Xapian::Database();
297 op =
new OpProgress();
299 wibble::exception::AddContext _ctx(
"Rebuilding Xapian database." );
303 wibble::sys::Exec ex(
"update-apt-xapian-index" );
304 ex.args.push_back(
"--batch-mode" );
305 ex.searchInPath =
true;
306 ex.forkAndRedirect( 0, &outfd, 0 );
308 wibble::sys::Pipe monit( outfd );
309 while ( !monit.eof() ) {
310 std::string line = monit.nextLine();
311 if ( line.empty() ) {
315 std::cerr <<
"got : " << line << std::endl;
316 if ( wibble::str::startsWith( line,
"begin: " ) ) {
317 op_str = std::string( line, 7, std::string::npos );
318 op->OverallProgress( 0, 100, 100, op_str );
320 }
else if ( wibble::str::startsWith( line,
"done: " ) ) {
322 }
else if ( wibble::str::startsWith( line,
"progress: " ) ) {
323 wibble::ERegexp re(
"progress: ([0-9]+)/([0-9]+)", 3 );
324 if ( re.match( line ) ) {
325 assert_eq( re[2],
"100" );
326 op->OverallProgress( atoi( re[1].c_str() ), 100, 100, op_str );
339 int qualityCutoff = 50 )
345 if ( s.length() > 2 )
359 Xapian::docid
docidByName(
const std::string& pkgname)
const;
364 Xapian::Query
makeORQuery(
const std::string& keywords)
const;
377 template<
typename ITER>
378 Xapian::Query
makeORQuery(
const ITER& begin,
const ITER& end)
const
380 return Xapian::Query(Xapian::Query::OP_OR, begin, end);
384 std::vector<std::string>
expand(Xapian::Enquire& enq)
const;
397 Xapian::valueno val_id)
const;
402 int getIntValue(
const std::string& pkgname, Xapian::valueno val_id)
const;
List results()
Definition: xapian.h:187
int getIntValue(const std::string &pkgname, Xapian::valueno val_id) const
Get the integer value for.
Definition: xapian.cpp:123
void updateEnquire()
Definition: xapian.h:154
Terms m_secondary
Definition: xapian.h:102
const Xapian::valueno VAL_ITERATING_SECURITY
Definition: xapian.h:22
void setQualityCutoff(int c)
Definition: xapian.h:106
Query partialQuery(const std::string &s)
Definition: xapian.h:350
time_t timestamp() const
Timestamp of when the Xapian database was last updated.
Definition: xapian.cpp:54
Xapian::Database m_db
Definition: xapian.h:250
static const size_t chunkSize
Definition: xapian.h:42
char m_enqPlace[sizeof(Xapian::Enquire)]
Definition: xapian.h:36
Xapian::Stem m_stem
Definition: xapian.h:100
int m_cutoff
Definition: xapian.h:103
void setExpand(bool e)
Definition: xapian.h:110
Xapian::Query makeORQuery(const ITER &begin, const ITER &end) const
Build a query with the given keywords, specified as iterators of strings.
Definition: xapian.h:378
const Xapian::valueno VAL_ITERATING_FUNCTIONALITY
Definition: xapian.h:20
virtual bool operator()(const std::string &term) const
Definition: xapian.h:30
Xapian::Query makeORQuery(const std::string &keywords) const
Tokenize the string and build an OR query with the resulting keywords.
void normalize_and_add(Xapian::Document &doc, const std::string &term, int &pos) const
Add normalised tokens computed from the string to the document doc.
Definition: xapian.cpp:75
const Xapian::valueno VAL_ITERATING_QUALITY
Definition: xapian.h:24
time_t timestamp()
Definition: core/apt.h:135
const Xapian::valueno VAL_ITERATING_SUPPORT
Definition: xapian.h:25
double getDoubleValue(const std::string &pkgname, Xapian::valueno val_id) const
Get the integer value for.
Definition: xapian.cpp:109
void update(OpProgress *op=0)
Definition: xapian.h:295
static std::string toLower(const std::string &str)
Return a lowercased copy of the string.
Definition: xapian.cpp:66
void addTerms(const Terms &t, bool exclude=false)
Definition: xapian.h:229
Query query(const std::string &s, bool expand=true, int qualityCutoff=50)
Definition: xapian.h:337
const Xapian::valueno VAL_ITERATING_PERFORMANCE
Definition: xapian.h:23
Xapian::Query makePartialORQuery(const std::string &keywords) const
Tokenize the string and build an OR query with the resulting keywords.
-*- C++ -*- (c) 2006, 2007 Petr Rockai me@mornfall.net
Definition: apt.cc:43
void tokenizeQuery(std::string q, Out o) const
Definition: xapian.h:122
Terms m_include
Definition: xapian.h:102
const Xapian::valueno VAL_ITERATING_ADOPTION
Definition: xapian.h:26
std::set< std::string > Terms
Definition: xapian.h:101
List head() const
Definition: xapian.h:44
const Xapian::valueno VAL_POPCON
Definition: xapian.h:18
Source()
Definition: xapian.cpp:49
const Xapian::valueno VAL_ITERATING_USABILITY
Definition: xapian.h:21
Xapian::MSet m_matches
Definition: xapian.h:37
Definition: core/apt.h:112
List tail() const
Definition: xapian.h:74
bool m_expand
Definition: xapian.h:104
List(Xapian::Enquire _enq)
Definition: xapian.h:85
Xapian::Enquire const & enq() const
Definition: xapian.h:81
Token token() const
Definition: xapian.h:49
const Xapian::valueno VAL_APT_INSTALLED_SIZE
Definition: xapian.h:16
void invalidate()
Definition: xapian.h:282
Xapian::Database & db()
Access the Xapian database.
Definition: xapian.h:270
const Xapian::Database & db() const
Access the Xapian database.
Definition: xapian.h:276
void addTerms(std::string t, bool partial=false, bool exclude=false)
Definition: xapian.h:210
Xapian::Enquire m_enq
Definition: xapian.h:99
bool empty() const
Definition: xapian.h:67
Xapian::Database * m_db
Definition: xapian.h:98
const Xapian::valueno VAL_ITERATING_RATING
Definition: xapian.h:19
List()
Definition: xapian.h:94
std::vector< std::string > expand(Xapian::Enquire &enq) const
Return a list of tag-based terms that can be used to expand an OR query.
void updateLeniently(AptDatabase &apt, OpProgress *op=0)
Definition: xapian.h:290
std::string _id
Definition: token.h:11
Terms m_exclude
Definition: xapian.h:102
Xapian::MSet::const_iterator m_iter
Definition: xapian.h:38
void addSecondaryTerm(const std::string &term, bool partial=false)
Definition: xapian.h:234
bool operator<(const List &o) const
Definition: xapian.h:55
Xapian::docid docidByName(const std::string &pkgname) const
Returns true if the index is older than the Apt database information.
Definition: xapian.cpp:85
void expand(Out o) const
Definition: xapian.h:140
Xapian::Query makeRelatedQuery(const std::string &pkgname) const
Create a query to look for packages similar to the given one.
Definition: xapian.cpp:94
Xapian::Stem m_stem
Definition: xapian.h:251
const Xapian::valueno VAL_APT_PACKAGE_SIZE
Definition: xapian.h:17
List Type
Definition: xapian.h:40
std::map< std::string, int > relevantTags(int n=30)
Definition: xapian.h:192
void open() const
Definition: xapian.cpp:58
wibble::Tokenizer queryTokenizer(std::string q) const
Definition: xapian.h:117
int m_pos
Definition: xapian.h:39
bool m_opened
Definition: xapian.h:252
void seek() const
Definition: xapian.h:59
bool hasData() const
Returns true if the index has data.
Definition: xapian.h:335
Query(Xapian::Database &e)
Definition: xapian.h:112