/* $Id: junk_filter.h,v 1.16 1998/06/14 05:17:25 dps Exp $ */ #ifndef __JUNK_FILTER_H__ #define __JUNK_FILTER_H__ #include #include #include #include "tune.h" class word_junk_filter: public streambuf { private: enum { INIT_NONE, INIT_SKIPPED, INIT_DONE } init; /* Init stage */ istream *is; /* Data source */ enum { __UNI_PROBE, /* Unknown, probe */ __UNI_NO, /* No */ __UNI_YES_LE, /* Yes, little endian */ __UNI_YES_BE /* Yes, big endian */ } unicode; /* Unicode flag */ char junk_end_buf[RESUME_CHARS+1]; /* Junk end characters */ int junk_end_usage; /* Usage of junk end buffer */ int text_size; /* Text characters retreived */ int junk_size; /* Amount of junk skipped */ char buf[JUNK_FILT_BUFSIZE+2]; enum { NORMAL, UNICODE_Z, /* Unicode, Unicode zero expected */ SKIP_JUNK, SKIP_JUNK_WASPRN, /* Skip junk, Skip junk was printable */ SINK_JUNK, /* Like skip junk with more restart */ } mode; char save; int ns; /* Tunable stuff */ struct { int st_min_zeros; // Minimum leading zeros int st_min_ff; // Minimum leadingg ff characters int st_rej_limit; // Non-printing characters tolerated int non_unicode_st; // Not unicode start treshold int unicode_st; // Unicode start treshold int max_junk; // Junk required to trigger SINK_JUNK mode int min_text; // Text required to trigger SINK_JUNK mode int max_top_run; // Max top bit set run struct { unsigned unicode_aggresive: 1; // Aggresive unicode mod unsigned ff_intro:1; // Allow ff leadin characters } options; int resume_chars; // Good characters need to stop junk skipping } tune; void set_dfl_tuning(void); // Set default tuning int __fill(const char *, int, int, int); int filter_junk(const char *, int); int skip_to_start(void); long pos; public: inline int overflow(int ch) { ch=ch; return EOF; /* ZXZ */ } inline int sync(void) { return 0; } int underflow(void); int do_reset(void); inline int __uflow(void) { return this->underflow(); } inline void open(istream *s) { if (is!=NULL) delete(is); is=s; pos=0; init=INIT_NONE; mode=NORMAL; junk_end_usage=0; ns=0; } inline word_junk_filter(void) { set_dfl_tuning(); // Set default tuning is=NULL; init=INIT_NONE; } inline word_junk_filter(istream *s) { set_dfl_tuning(); // Set default tuning pos=0; init=INIT_NONE; junk_end_usage=0; is=s; mode=NORMAL; ns=0; } inline word_junk_filter(const char *f) { ifstream *s; set_dfl_tuning(); // Set default tuning s=new(ifstream)(f); if (!(*s)) { cerr<<"Can not open "<