diff --git a/src/file_txt.c b/src/file_txt.c index d6c27c1c..dbdce24a 100644 --- a/src/file_txt.c +++ b/src/file_txt.c @@ -522,7 +522,7 @@ static int header_check_txt(const unsigned char *buffer, const unsigned int buff { static char *buffer_lower=NULL; static unsigned int buffer_lower_size=0; - unsigned int i; + unsigned int l=0; const unsigned char header_asp[22] = "<%@ language=\"vbscript"; const unsigned char header_bat[9] = "@echo off"; const unsigned char header_vcf[11] = "begin:vcard"; @@ -539,6 +539,7 @@ static int header_check_txt(const unsigned char *buffer, const unsigned int buff const char sign_html[] = "data_check=&data_check_txt; @@ -610,7 +611,7 @@ static int header_check_txt(const unsigned char *buffer, const unsigned int buff file_recovery_new->extension="pl"; return 1; } - if(td_memmem(haystack, l, header_sig_python, sizeof(header_sig_python)) != NULL) + if(td_memmem(haystack, ll, header_sig_python, sizeof(header_sig_python)) != NULL) { reset_file_recovery(file_recovery_new); file_recovery_new->data_check=&data_check_txt; @@ -618,7 +619,7 @@ static int header_check_txt(const unsigned char *buffer, const unsigned int buff file_recovery_new->extension="py"; return 1; } - if(td_memmem(haystack, l, header_sig_ruby, sizeof(header_sig_ruby)) != NULL) + if(td_memmem(haystack, ll, header_sig_ruby, sizeof(header_sig_ruby)) != NULL) { reset_file_recovery(file_recovery_new); file_recovery_new->data_check=&data_check_txt; @@ -643,31 +644,59 @@ static int header_check_txt(const unsigned char *buffer, const unsigned int buff /* ind=~0: random * ind=~1: constant */ double ind=1; - unsigned int nbr=0; + unsigned int nbrf=0; + unsigned int is_csv=1; /* Detect Fortran */ { char *str=buffer_lower; while((str=strstr(str, "\n "))!=NULL) { - nbr++; + nbrf++; str++; } } - if(i>1) + /* Detect csv */ + { + unsigned int csv_per_line_current=0; + unsigned int csv_per_line=0; + unsigned int line_nbr=0; + unsigned int i; + for(i=0;i0;i++) + { + if(buffer_lower[i]==';') + { + csv_per_line_current++; + } + else if(buffer_lower[i]=='\n') + { + if(line_nbr==0) + csv_per_line=csv_per_line_current; + if(csv_per_line_current!=csv_per_line) + is_csv=0; + line_nbr++; + csv_per_line_current=0; + } + } + if(csv_per_line<1 || line_nbr<10) + is_csv=0; + } + if(l>1) { unsigned int stats[256]; - unsigned int j; + unsigned int i; memset(&stats, 0, sizeof(stats)); - for(j=0;j0) - ind+=stats[j]*(stats[j]-1); - ind=ind/i/(i-1); + for(i=0;i<256;i++) + if(stats[i]>0) + ind+=stats[i]*(stats[i]-1); + ind=ind/l/(l-1); } - if(nbr>10 && i<=0.90) + if(nbrf>10 && ind<0.9) ext="f"; + else if(is_csv>0) + ext="csv"; /* Detect LaTeX, C, PHP, JSP, ASP, HTML, C header */ else if(strstr(buffer_lower, sign_tex)!=NULL) ext="tex"; @@ -683,9 +712,9 @@ static int header_check_txt(const unsigned char *buffer, const unsigned int buff ext="asp"; else if(strstr(buffer_lower, sign_html)!=NULL) ext="html"; - else if(strstr(buffer_lower, sign_h)!=NULL && i>50) + else if(strstr(buffer_lower, sign_h)!=NULL && l>50) ext="h"; - else if(i<100 || ind<0.03 || ind>0.90) + else if(l<100 || ind<0.03 || ind>0.90) ext=NULL; else ext=file_hint_txt.extension; @@ -712,15 +741,15 @@ Doc: \r (0xD) if(file_recovery->file_stat->file_hint==&file_hint_doc && strstr(file_recovery->filename,".doc")!=NULL) { - unsigned int j; + unsigned int i; unsigned int txt_nl=0; - for(j=0;j