#include "fribidixetex-defines.h" #include "fribidixetex-bidi.h" #include "fribidixetex-util.h" #include "fribidixetex-dict.h" #include "fribidixetex-ignore.h" #include #include #include #include "fribidixetex-io.h" #ifndef TRUE #define TRUE 1 #endif #ifndef FALSE #define FALSE 0 #endif #ifndef min #define min(x,y) ((x) < (y) ? (x) : (y)) #endif #ifndef max #define max(x,y) ((x) > (y) ? (x) : (y)) #endif /***********************/ /* Global Data */ typedef struct COMMAND { char *name; struct COMMAND *next; } bidi_cmd_t; bidi_cmd_t *bidi_command_list; enum { MODE_BIDIOFF, MODE_BIDION, MODE_BIDILTR }; static int bidi_mode; static FriBidiLevel bidi_embed[MAX_LINE_SIZE]; static FriBidiChar translation_buffer[MAX_LINE_SIZE]; /* only ASCII charrecters mirroring are * supported - the coded below 128 according to * http://www.unicode.org/Public/4.1.0/ucd/BidiMirroring.txt */ /* TODO: implement full mirroring list according to unicode * standard */ static const char *bidi_mirror_list[][2] = { {"(", ")"}, {")", "("}, {"<", ">"}, {">", "<"}, {"[", "]"}, {"]", "["}, {"\\{","\\}"}, {"\\}","\\{"}, {NULL,NULL} }; static const char *bidi_hack_list[][2] = { {"---","{\\fribidixetexemdash}"}, {"--","{\\fribidixetexendash}"}, {NULL,NULL} }; /********/ /* TAGS */ /********/ #define TAG_BIDI_ON "%BIDION" #define TAG_BIDI_OFF "%BIDIOFF" #define TAG_BIDI_NEW_TAG "%BIDITAG" #define TAG_BIDI_LTR "%BIDILTR" #define TAG_BIDI_DIC_TAG "%BIDIDICTAG" #define TAG_BIDI_DIC_ENV "%BIDIDICENV" #define TAG_RTL "\\fribidixetexRLE{" #define TAG_LTR "\\fribidixetexLRE{" #define TAG_CLOSE "}" #define TAG_LATIN_NUM "\\fribidixetexlatinnumbers{" #define TAG_NONLATIN_NUM "\\fribidixetexnonlatinnumbers{" /***********************/ /* Compares begining of string U and C * for example "Hello!" == "Hel" */ static int bidi_strieq_u_a(const FriBidiChar *u,const char *c) { while(*u && *c) { if(*u!=*c) return 0; u++; c++; } if(*u ==0 && *c!=0) { return 0; } return 1; } static int bidi_strlen(FriBidiChar *in) { int len; for(len=0;*in;len++) in++; return len; } /* Safe functions for adding charrecters to buffer * if the line is too long the program just exits */ static void bidi_add_char_u(FriBidiChar *out,int *new_len, int limit,FriBidiChar ch) { if(*new_len+2next=bidi_command_list; new_cmd->name=new_text; strcpy(new_text,name); bidi_command_list = new_cmd; } int bidi_is_cmd_char(FriBidiChar ch) { if( ('a'<= ch && ch <='z') || ('A' <=ch && ch <= 'Z') ) return 1; return 0; } /* Verirfies wether then text of length "len" is * in command list */ int bidi_in_cmd_list(FriBidiChar *text,int len) { int i; bidi_cmd_t *p; for(p=bidi_command_list;p;p=p->next) { for(i=0;iname[i]) break; } if(i==len && p->name[len]==0){ return 1; } } return 0; } /* Find special charrecters */ int bidi_is_latex_special_char(FriBidiChar *text) { if(text[0]!='\\') return FALSE; /* Special charrecters according to lshort.pdf * # $ % ^ & _ { } ~ \, "{}" should be mirrored * thus not included to the list */ switch (text[1]) { case '#' : case '$' : case '%' : case '^' : case '&' : case '_' : case '\\': return TRUE; default : return FALSE; } } /*Verifies wether the next string is command * ie: "\\[a-zA-Z]+" or "\\[a-zA-Z]+\*" */ int bidi_is_command(FriBidiChar *text,int *command_length) { int len; if(bidi_is_latex_special_char(text)) { /* Charrecters like \\ or \$ that should be treated * as `commands' */ *command_length=2; return TRUE; } if(*text != '\\' || !bidi_is_cmd_char(text[1])) { return FALSE; } len=1; while(bidi_is_cmd_char(text[len])) { len++; } if(text[len] == '*') { len++; } *command_length = len; return TRUE; } /* This is implementation of state machine with stack * that distinguishs between text and commands */ /* STACK VALUES */ enum { EMPTY, SQ_BRACKET ,SQ_BRACKET_IGN, BRACKET, BRACKET_IGN, CMD_BRACKET, CMD_BRACKET_IGN }; /* STATES */ enum { ST_NO, ST_NORM, ST_IGN }; /* Used for ignore commands */ int bidi_is_ignore(int top) { return top == SQ_BRACKET_IGN || top== BRACKET_IGN || top == CMD_BRACKET_IGN; } int bidi_state_on_left_br(int top,int *after_command_state) { int ign_addon; int push,state = *after_command_state; if(bidi_is_ignore(top) || state == ST_IGN) { ign_addon = 1; } else { ign_addon = 0; } if(state) { push = CMD_BRACKET; } else{ push = BRACKET; } *after_command_state = ST_NO; return push + ign_addon; } int bidi_state_on_left_sq_br(int top,int *after_command_state) { int push; if(bidi_is_ignore(top) || *after_command_state == ST_IGN) { push = SQ_BRACKET_IGN; } else { push = SQ_BRACKET; } *after_command_state = ST_NO; return push; } void bidi_state_on_right_br(int top,int *after_command_state) { if(top == CMD_BRACKET) { *after_command_state = ST_NORM; } else if(top == BRACKET || top == BRACKET_IGN) { *after_command_state = ST_NO; } else {/*top == CMD_BRACKET_IGN*/ *after_command_state = ST_IGN; } } void bidi_state_on_right_sq_br(int top,int *after_command_state) { if(top == SQ_BRACKET_IGN) { *after_command_state = ST_IGN; } else { /* top == SQ_BRACKET */ *after_command_state = ST_NORM; } } /* Using marks "$$" */ int bidi_calc_equation_inline(FriBidiChar *in) { int len=1; while(in[len] && in[len]!='$') { if(in[len]=='\\' && (in[len+1]=='$' || in[len+1]=='\\')) { len+=2; } else { len++; } } if(in[len]=='$') len++; return len; } /* using \[ and \] marks */ int bidi_calc_equation_display(FriBidiChar *in) { int len=2; while(in[len]){ if(in[len]=='\\' && in[len+1]=='\\') len+=2; else if(in[len]=='\\' && in[len+1]==']') return len+2; else len++; } return len; } /* Support of equations */ int bidi_calc_equation(FriBidiChar *in) { if(*in=='$') return bidi_calc_equation_inline(in); else return bidi_calc_equation_display(in); } /* This function parses the text "in" in marks places that * should be ignored by fribidi in "is_command" as true */ void bidi_mark_commands(FriBidiChar *in,int len,char *is_command,int is_rtl) { char *parthness_stack; int stack_size=0; int cmd_len,top; int after_command_state=ST_NO; int mark,pos,symbol,i,push; /* Assumption - depth of stack can not be bigger then text length */ parthness_stack = utl_malloc(len); pos=0; while(pos 10 // 0.10 static void get_bidi_levels(FriBidiChar *in,int length,int is_rtl,FriBidiLevel *embed) { FriBidiCharType *types = utl_malloc(sizeof(FriBidiCharType)*length); FriBidiParType direction = is_rtl ? FRIBIDI_PAR_RTL : FRIBIDI_PAR_LTR; fribidi_get_bidi_types(in,length,types); fribidi_get_par_embedding_levels(types,length,&direction,embed); utl_free(types); } #else // old fribidi static void get_bidi_levels(FriBidiChar *in,int length,int is_rtl,FriBidiLevel *embed) { FriBidiCharType direction; if(is_rtl) direction = FRIBIDI_TYPE_RTL; else direction = FRIBIDI_TYPE_LTR; fribidi_log2vis_get_embedding_levels(in,length,&direction,embed); } #endif /* This function marks embedding levels at for text "in", * it ignores different tags */ void bidi_tag_tolerant_fribidi_l2v( FriBidiChar *in,int len, int is_rtl, FriBidiLevel *embed, char *is_command) { int in_pos,out_pos,cmd_len,i; FriBidiChar *in_tmp; FriBidiLevel *embed_tmp,fill_level; in_tmp=(FriBidiChar*)utl_malloc(sizeof(FriBidiChar)*(len+1)); embed_tmp=(FriBidiLevel*)utl_malloc(sizeof(FriBidiLevel)*len); /********************************************** * This is main parser that marks commands * * across the text i.e. marks non text * **********************************************/ bidi_mark_commands(in,len,is_command,is_rtl); /**********************************************/ /* Copy all the data without tags for fribidi */ /**********************************************/ in_pos=0; out_pos=0; while(in_pos127) { /* other foreign language */ return 0; } if(('a'<=ch && ch<='z') || ('A'<=ch && ch<='Z')) { /* Find latin characters */ return 0; } string++; levels++; } return 1; } int bidi_only_nonlatin_number(FriBidiLevel *levels,FriBidiChar *string) { FriBidiChar ch; while(*string && (*levels & 1)==0 ) { ch=*string; if('0'<=ch && ch<='9') { /* Find latin numbers */ return 0; } if(('a'<=ch && ch<='z') || ('A'<=ch && ch<='Z')) { /* Find latin characters */ return 0; } string++; levels++; } return 1; } /* Mark Unicode LRM & RLM charrecters */ int bidi_is_directional_mark(FriBidiChar c) { if(c==0x200F || c==0x200E) { return 1; } return 0; } /* The function that parses line and adds required \R \L tags */ void bidi_add_tags(FriBidiChar *in,FriBidiChar *out,int limit, int is_rtl,int replace_minus,int no_mirroring) { int len,new_len,level,new_level,brakets; int i,size; int is_number_env=0; const char *tag; char *is_command; len=bidi_strlen(in); is_command=(char*)utl_malloc(len); bidi_tag_tolerant_fribidi_l2v(in,len,is_rtl,bidi_embed,is_command); level=bidi_basic_level(is_rtl); new_len=0; out[0]=0; brakets=0; for(i=0,new_len=0;ilevel) { /* LTR Direction according to odd/even value of level */ is_number_env=FALSE; if((new_level & 1) == 0) { if(bidi_only_latin_number(bidi_embed+i,in+i)){ tag=TAG_LATIN_NUM; is_number_env=TRUE; } else if(bidi_only_nonlatin_number(bidi_embed+i,in+i)){ tag=TAG_NONLATIN_NUM; is_number_env=TRUE; } else { tag=TAG_LTR; } } else { tag=TAG_RTL; } brakets++; bidi_add_str_c(out,&new_len,limit,tag); } else if(new_levelnext; utl_free(tmp->name); utl_free(tmp); } if(bidi_mode != MODE_BIDIOFF) { fprintf(stderr,"Warning: No %%BIDIOFF Tag at the end of the file\n"); } } void bidi_init(FILE *f_out) { int i; bidi_mode = MODE_BIDIOFF; for(i=0;ignore_tags_list[i][0];i++) { bidi_add_command(ignore_tags_list[i]); } }