testdisk/src/file_txt.c

1275 lines
46 KiB
C
Raw Blame History

/*
File: file_txt.c
Copyright (C) 2005-2009 Christophe GRENIER <grenier@cgsecurity.org>
This software is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write the Free Software Foundation, Inc., 51
Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
#ifdef HAVE_STRING_H
#include <string.h>
#endif
#ifdef HAVE_TIME_H
#include <time.h>
#endif
#include <ctype.h> /* tolower */
#include <stdio.h>
#include "types.h"
#include "common.h"
#include "filegen.h"
#include "log.h"
#include "memmem.h"
#include "file_txt.h"
extern const file_hint_t file_hint_doc;
extern const file_hint_t file_hint_jpg;
extern const file_hint_t file_hint_pdf;
extern const file_hint_t file_hint_tiff;
extern const file_hint_t file_hint_zip;
static inline int filtre(unsigned char car);
static void register_header_check_txt(file_stat_t *file_stat);
static int header_check_txt(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new);
static void register_header_check_fasttxt(file_stat_t *file_stat);
static int header_check_fasttxt(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new);
#ifdef UTF16
static int header_check_le16_txt(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new);
#endif
static int data_check_txt(const unsigned char *buffer, const unsigned int buffer_size, file_recovery_t *file_recovery);
static void file_check_emlx(file_recovery_t *file_recovery);
static void file_check_ers(file_recovery_t *file_recovery);
static void file_check_svg(file_recovery_t *file_recovery);
static void file_check_smil(file_recovery_t *file_recovery);
static void file_check_xml(file_recovery_t *file_recovery);
const file_hint_t file_hint_fasttxt= {
.extension="tx?",
.description="Text files with header: rtf,xml,xhtml,mbox/imm,pm,ram,reg,sh,slk,stp,jad,url",
.min_header_distance=0,
.max_filesize=PHOTOREC_MAX_FILE_SIZE,
.recover=1,
.enable_by_default=1,
.register_header_check=&register_header_check_fasttxt
};
const file_hint_t file_hint_txt= {
.extension="txt",
.description="Other text files: txt,html,asp,bat,C,jsp,perl,php,py/emlx... scripts",
.min_header_distance=0,
.max_filesize=PHOTOREC_MAX_FILE_SIZE,
.recover=1,
.enable_by_default=1,
.register_header_check=&register_header_check_txt
};
static const unsigned char header_adr[25] = "Opera Hotlist version 2.0";
static const unsigned char header_bash[11] = "#!/bin/bash";
static const unsigned char header_cls[24] = {'V','E','R','S','I','O','N',' ','1','.','0',' ','C','L','A','S','S','\r','\n','B','E','G','I','N'};
static const unsigned char header_cue1[10] = "REM GENRE ";
static const unsigned char header_cue2[6] = { 'F', 'I', 'L', 'E', ' ', '"'};
static const unsigned char header_dc[6] = "SC V10";
static const unsigned char header_dif[12] = { 'T', 'A', 'B', 'L', 'E', '\r', '\n', '0', ',', '1', '\r', '\n'};
static const unsigned char header_emka[16] = { '1', '\t', '\t', '\t', '\t', '\t', 't', 'h','i','s',' ','f','i','l','e','\t'};
static const unsigned char header_ers[19] = "DatasetHeader Begin";
static const unsigned char header_ics[15] = "BEGIN:VCALENDAR";
static const unsigned char header_imm[13] = {'M','I','M','E','-','V','e','r','s','i','o','n',':'};
static const unsigned char header_jad[9] = { 'M', 'I', 'D', 'l', 'e', 't', '-', '1', ':'};
static const unsigned char header_json[31] = {
'{', '"', 't', 'i', 't', 'l', 'e', '"',
':', '"', '"', ',', '"', 'i', 'd', '"',
':', '1', ',', '"', 'd', 'a', 't', 'e',
'A', 'd', 'd', 'e', 'd', '"', ':' };
static const unsigned char header_ksh[10] = "#!/bin/ksh";
static const unsigned char header_lyx[7] = {'#', 'L', 'y', 'X', ' ', '1', '.'};
static const unsigned char header_m3u[7] = {'#','E','X','T','M','3','U'};
static const unsigned char header_mail[19] = {'F','r','o','m',' ','M','A','I','L','E','R','-','D','A','E','M','O','N',' '};
static const unsigned char header_mail2[5] = {'F','r','o','m',' '};
static const unsigned char header_mdl[7] = {'M','o','d','e','l',' ','{'};
static const unsigned char header_mnemosyne[48] = {
'-', '-', '-', ' ', 'M', 'n', 'e', 'm',
'o', 's', 'y', 'n', 'e', ' ', 'D', 'a',
't', 'a', ' ', 'B', 'a', 's', 'e', ' ',
'-', '-', '-', ' ', 'F', 'o', 'r', 'm',
'a', 't', ' ', 'V', 'e', 'r', 's', 'i',
'o', 'n', ' ', '2', ' ', '-', '-', '-'
};
static const unsigned char header_msf[19] = "// <!-- <mdb:mork:z";
static const unsigned char header_mysql[14] = { '-', '-', ' ', 'M', 'y', 'S', 'Q', 'L', ' ', 'd', 'u', 'm', 'p', ' '};
static const unsigned char header_perlm[7] = "package";
static const unsigned char header_phpMyAdmin[22]= {
'-', '-', ' ', 'p', 'h', 'p', 'M', 'y',
'A', 'd', 'm', 'i', 'n', ' ', 'S', 'Q',
'L', ' ', 'D', 'u', 'm', 'p'};
static const unsigned char header_postgreSQL[38]= {
'-', '-', '\n', '-', '-', ' ', 'P', 'o',
's', 't', 'g', 'r', 'e', 'S', 'Q', 'L',
' ', 'd', 'a', 't', 'a', 'b', 'a', 's',
'e', ' ', 'c', 'l', 'u', 's', 't', 'e',
'r', ' ', 'd', 'u', 'm', 'p'};
static const unsigned char header_postgreSQL_win[39]= {
'-', '-', '\r', '\n', '-', '-', ' ', 'P',
'o', 's', 't', 'g', 'r', 'e', 'S', 'Q',
'L', ' ', 'd', 'a', 't', 'a', 'b', 'a',
's', 'e', ' ', 'c', 'l', 'u', 's', 't',
'e', 'r', ' ', 'd', 'u', 'm', 'p'};
static const unsigned char header_qgis[15] = "<!DOCTYPE qgis ";
static const unsigned char header_ram[7] = "rtsp://";
static const unsigned char header_ReceivedFrom[14]= {'R','e','c','e','i','v','e','d',':',' ','f','r','o','m'};
static const unsigned char header_reg[8] = "REGEDIT4";
static const unsigned char header_ReturnPath[13]= {'R','e','t','u','r','n','-','P','a','t','h',':',' '};
static const unsigned char header_rpp[16] = { '<', 'R', 'E', 'A', 'P', 'E', 'R', '_', 'P', 'R', 'O', 'J', 'E', 'C', 'T', ' '};
static const unsigned char header_rtf[5] = { '{','\\','r','t','f'};
/* firefox session store */
static const unsigned char header_sessionstore[42] = "({\"windows\":[{\"tabs\":[{\"entries\":[{\"url\":\"";
static const unsigned char header_sh[9] = "#!/bin/sh";
static const unsigned char header_slk[10] = "ID;PSCALC3";
static const unsigned char header_smil[6] = "<smil>";
static const unsigned char header_stl[6] = "solid ";
static const unsigned char header_stp[13] = "ISO-10303-21;";
static const unsigned char header_url[18] = {
'[', 'I', 'n', 't', 'e', 'r', 'n', 'e',
't', 'S', 'h', 'o', 'r', 't', 'c', 'u',
't', ']'
};
static const unsigned char header_wpl[21] = { '<', '?', 'w', 'p', 'l', ' ', 'v', 'e', 'r', 's', 'i', 'o', 'n', '=', '"', '1', '.', '0', '"', '?', '>' };
static const unsigned char header_xml[14] = "<?xml version=";
static const unsigned char header_xml_utf8[17] = {0xef, 0xbb, 0xbf, '<', '?', 'x', 'm', 'l', ' ', 'v', 'e', 'r', 's', 'i', 'o', 'n', '='};
static const unsigned char header_xmp[35] = {
'<', 'x', ':', 'x', 'm', 'p', 'm', 'e',
't', 'a', ' ', 'x', 'm', 'l', 'n', 's',
':', 'x', '=', '"', 'a', 'd', 'o', 'b',
'e', ':', 'n', 's', ':', 'm', 'e', 't',
'a', '/', '"'};
static const unsigned char header_vbookmark[10] = { 'B', 'E', 'G', 'I', 'N', ':', 'V', 'B', 'K', 'M'};
static const char sign_java1[6] = "class";
static const char sign_java3[15] = "private static";
static const char sign_java4[17] = "public interface";
static unsigned char ascii_char[256];
static void register_header_check_txt(file_stat_t *file_stat)
{
unsigned int i;
for(i=0; i<256; i++)
ascii_char[i]=i;
for(i=0; i<256; i++)
{
if(filtre(i) || i==0xE2 || i==0xC2 || i==0xC3 || i==0xC5 || i==0xC6 || i==0xCB)
register_header_check(0, &ascii_char[i], 1, &header_check_txt, file_stat);
}
#ifdef UTF16
register_header_check(1, &ascii_char[0], 1, &header_check_le16_txt, file_stat);
#endif
}
static void register_header_check_fasttxt(file_stat_t *file_stat)
{
register_header_check(0, header_adr, sizeof(header_adr), &header_check_fasttxt, file_stat);
register_header_check(0, header_bash,sizeof(header_bash), &header_check_fasttxt, file_stat);
register_header_check(0, header_cls,sizeof(header_cls), &header_check_fasttxt, file_stat);
register_header_check(0, header_cue1,sizeof(header_cue1), &header_check_fasttxt, file_stat);
register_header_check(0, header_cue2,sizeof(header_cue2), &header_check_fasttxt, file_stat);
register_header_check(4, header_dc, sizeof(header_dc), &header_check_fasttxt, file_stat);
register_header_check(0, header_dif, sizeof(header_dif), &header_check_fasttxt, file_stat);
register_header_check(0, header_emka, sizeof(header_emka), &header_check_fasttxt, file_stat);
register_header_check(0, header_ers,sizeof(header_ers), &header_check_fasttxt, file_stat);
register_header_check(0, header_ics, sizeof(header_ics), &header_check_fasttxt, file_stat);
register_header_check(0, header_imm,sizeof(header_imm), &header_check_fasttxt, file_stat);
register_header_check(0, header_jad, sizeof(header_jad), &header_check_fasttxt, file_stat);
register_header_check(0, header_json, sizeof(header_json), &header_check_fasttxt, file_stat);
register_header_check(0, header_ksh,sizeof(header_ksh), &header_check_fasttxt, file_stat);
register_header_check(0, header_lyx,sizeof(header_lyx), &header_check_fasttxt, file_stat);
register_header_check(0, header_m3u, sizeof(header_m3u), &header_check_fasttxt, file_stat);
register_header_check(0, header_mail,sizeof(header_mail), &header_check_fasttxt, file_stat);
register_header_check(0, header_mail2,sizeof(header_mail2), &header_check_fasttxt, file_stat);
register_header_check(0, header_mdl, sizeof(header_mdl), &header_check_fasttxt, file_stat);
register_header_check(0, header_mnemosyne, sizeof(header_mnemosyne), &header_check_fasttxt, file_stat);
register_header_check(0, header_msf, sizeof(header_msf), &header_check_fasttxt, file_stat);
register_header_check(0, header_mysql, sizeof(header_mysql), &header_check_fasttxt, file_stat);
register_header_check(0, header_perlm,sizeof(header_perlm), &header_check_fasttxt, file_stat);
register_header_check(0, header_phpMyAdmin, sizeof(header_phpMyAdmin), &header_check_fasttxt, file_stat);
register_header_check(0, header_postgreSQL, sizeof(header_postgreSQL), &header_check_fasttxt, file_stat);
register_header_check(0, header_postgreSQL_win, sizeof(header_postgreSQL_win), &header_check_fasttxt, file_stat);
register_header_check(0, header_qgis, sizeof(header_qgis), &header_check_fasttxt, file_stat);
register_header_check(0, header_ram,sizeof(header_ram), &header_check_fasttxt, file_stat);
register_header_check(0, header_reg,sizeof(header_reg), &header_check_fasttxt, file_stat);
register_header_check(0, header_ReturnPath,sizeof(header_ReturnPath), &header_check_fasttxt, file_stat);
register_header_check(0, header_rpp,sizeof(header_rpp), &header_check_fasttxt, file_stat);
register_header_check(0, header_rtf,sizeof(header_rtf), &header_check_fasttxt, file_stat);
register_header_check(0, header_sessionstore, sizeof(header_sessionstore), &header_check_fasttxt, file_stat);
register_header_check(0, header_sh,sizeof(header_sh), &header_check_fasttxt, file_stat);
register_header_check(0, header_slk,sizeof(header_slk), &header_check_fasttxt, file_stat);
register_header_check(0, header_smil,sizeof(header_smil), &header_check_fasttxt, file_stat);
register_header_check(0, header_stl,sizeof(header_stl), &header_check_fasttxt, file_stat);
register_header_check(0, header_stp,sizeof(header_stp), &header_check_fasttxt, file_stat);
register_header_check(0, header_url,sizeof(header_url), &header_check_fasttxt, file_stat);
register_header_check(0, header_wpl,sizeof(header_wpl), &header_check_fasttxt, file_stat);
register_header_check(0, header_xml,sizeof(header_xml), &header_check_fasttxt, file_stat);
register_header_check(0, header_xml_utf8,sizeof(header_xml_utf8), &header_check_fasttxt, file_stat);
register_header_check(0, header_xmp,sizeof(header_xmp), &header_check_fasttxt, file_stat);
register_header_check(0, header_vbookmark, sizeof(header_vbookmark), &header_check_fasttxt, file_stat);
}
// #define DEBUG_FILETXT
/* return 1 if char can be found in text file */
static int filtre(unsigned char car)
{
switch(car)
{
case 0x7c: /* similar to | */
case 0x80:
case 0x92:
case 0x99:
case 0x9c: /* '<27>' */
case 0xa0: /* nonbreaking space */
case 0xa1: /* '<27>' */
case 0xa2:
case 0xa3: /* '<27>' */
case 0xa7: /* '<27>' */
case 0xa8:
case 0xa9: /* '<27>' */
case 0xab: /* '<27>' */
case 0xae: /* '<27>' */
case 0xb0: /* '<27>' */
case 0xb4: /* '<27>' */
case 0xb7:
case 0xbb: /* '<27>' */
case 0xc0: /* '<27>' */
case 0xc7: /* '<27>' */
case 0xc9: /* '<27>' */
case 0xd6: /* '<27>' */
case 0xd7:
case 0xd9: /* '<27>' */
case 0xdf:
case 0xe0: /* '<27>' */
case 0xe1: /* '<27>' */
case 0xe2: /* '<27>' */
case 0xe3: /* '<27>' */
case 0xe4: /* '<27>' */
case 0xe6: /* '<27>' */
case 0xe7: /* '<27>' */
case 0xe8: /* '<27>' */
case 0xe9: /* '<27>' */
case 0xea: /* '<27>' */
case 0xeb: /* '<27>' */
case 0xed: /* '<27>' */
case 0xee: /* '<27>' */
case 0xef: /* '<27>' */
case 0xf4: /* '<27>' */
case 0xf6: /* '<27>' */
case 0xf8: /* '<27>' */
case 0xf9: /* '<27>' */
case 0xfa: /* '<27>' */
case 0xfb: /* '<27>' */
case 0xfc: /* '<27>' */
return 1;
}
if((car=='\b')||(car=='\t')||(car=='\r')||(car=='\n')
||((car>=' ')&&(car<='~'))
||((car>=0x82)&&(car<=0x8d))
||((car>=0x93)&&(car<=0x98))
)
return 1;
return 0;
}
/* destination should have an extra byte available for null terminator
return read size */
int UTF2Lat(unsigned char *buffer_lower, const unsigned char *buffer, const int buf_len)
{
const unsigned char *p; /* pointers to actual position in source buffer */
unsigned char *q; /* pointers to actual position in destination buffer */
int i; /* counter of remaining bytes available in destination buffer */
for (i = buf_len, p = buffer, q = buffer_lower; p-buffer<buf_len && i > 0 && *p!='\0';)
{
const unsigned char *p_org=p;
if((*p & 0xf0)==0xe0 && (*(p+1) & 0xc0)==0x80 && (*(p+2) & 0xc0)==0x80)
{ /* UTF8 l=3 */
#ifdef DEBUG_TXT
log_info("UTF8 l=3 0x%02x 0x%02x 0x02x\n", *p, *(p+1),*(p+2));
#endif
*q = '\0';
switch (*p)
{
case 0xE2 :
switch (*(p+1))
{
case 0x80 :
switch (*(p+2))
{
case 0x93 : (*q) = 150; break;
case 0x94 : (*q) = 151; break;
case 0x98 : (*q) = 145; break;
/* case 0x99 : (*q) = 146; break; */
case 0x99 : (*q) = '\''; break;
case 0x9A : (*q) = 130; break;
case 0x9C : (*q) = 147; break;
case 0x9D : (*q) = 148; break;
case 0x9E : (*q) = 132; break;
case 0xA0 : (*q) = 134; break;
case 0xA1 : (*q) = 135; break;
case 0xA2 : (*q) = 149; break;
case 0xA6 : (*q) = 133; break;
case 0xB0 : (*q) = 137; break;
case 0xB9 : (*q) = 139; break;
case 0xBA : (*q) = 155; break;
}
break;
case 0x82 :
switch (*(p+2))
{
case 0xAC : (*q) = 128; break;
}
break;
case 0x84 :
switch (*(p+2))
{
case 0xA2 : (*q) = 153; break;
}
break;
}
break;
}
p+=3;
}
else if((*p & 0xe0)==0xc0 && (*(p+1) & 0xc0)==0x80)
{ /* UTF8 l=2 */
*q = '\0';
switch (*p)
{
case 0xC2 :
(*q) = ((*(p+1)) | 0x80) & 0xBF; /* A0-BF and a few 80-9F */
if((*q)==0xA0)
(*q)=' ';
break;
case 0xC3 :
switch (*(p+1))
{
case 0xB3 : (*q) = 162; break;
default:
(*q) = (*(p+1)) | 0xC0; /* C0-FF */
break;
}
break;
case 0xC5 :
switch (*(p+1)) {
case 0x92 : (*q) = 140; break;
case 0x93 : (*q) = 156; break;
case 0xA0 : (*q) = 138; break;
case 0xA1 : (*q) = 154; break;
case 0xB8 : (*q) = 143; break;
case 0xBD : (*q) = 142; break;
case 0xBE : (*q) = 158; break;
}
break;
case 0xC6:
switch (*(p+1)) {
case 0x92 : (*q) = 131; break;
}
break;
case 0xCB :
switch (*(p+1)) {
case 0x86 : (*q) = 136; break;
case 0x9C : (*q) = 152; break;
}
break;
}
p+=2;
}
else
{ /* Ascii UCS */
#ifdef DEBUG_TXT
log_info("UTF8 Ascii UCS 0x%02x\n", *p);
#endif
*q = tolower(*p++);
}
if (*q=='\0' || filtre(*q)==0)
{
#ifdef DEBUG_TXT
log_warning("UTF2Lat reject 0x%x\n",*q);
#endif
*q = '\0';
return(p_org-buffer);
}
q++;
i--;
}
*q = '\0';
return(p-buffer);
}
static int header_check_fasttxt(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new)
{
const char sign_grisbi[14] = "Version_grisbi";
const char sign_fst[5] = "QBFSD";
const char sign_html[5] = "<html";
const char sign_plist[16] = "<!DOCTYPE plist ";
const char sign_svg[4] = "<svg";
static const unsigned char spaces[16]={
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ' };
if(memcmp(buffer,header_cls,sizeof(header_cls))==0)
{
reset_file_recovery(file_recovery_new);
file_recovery_new->data_check=&data_check_txt;
file_recovery_new->file_check=&file_check_size;
file_recovery_new->extension="cls";
return 1;
}
if(memcmp(buffer,header_json,sizeof(header_json))==0)
{
reset_file_recovery(file_recovery_new);
file_recovery_new->data_check=&data_check_txt;
file_recovery_new->file_check=&file_check_size;
file_recovery_new->extension="json";
return 1;
}
/* Incredimail has .imm extension but this extension isn't frequent */
if(memcmp(buffer,header_imm,sizeof(header_imm))==0 ||
memcmp(buffer,header_mail,sizeof(header_mail))==0 ||
memcmp(buffer,header_ReturnPath,sizeof(header_ReturnPath))==0)
{
if(file_recovery!=NULL && file_recovery->file_stat!=NULL &&
file_recovery->file_stat->file_hint==&file_hint_fasttxt &&
strcmp(file_recovery->extension,"mbox")==0)
return 0;
reset_file_recovery(file_recovery_new);
file_recovery_new->data_check=NULL;
file_recovery_new->extension="mbox";
return 1;
}
if(memcmp(buffer,header_mail2,sizeof(header_mail2))==0)
{
unsigned int i;
/* From someone@somewhere */
for(i=sizeof(header_mail2); buffer[i]!=' ' && buffer[i]!='@' && i<200; i++);
if(buffer[i]!='@')
return 0;
if(file_recovery!=NULL && file_recovery->file_stat!=NULL &&
file_recovery->file_stat->file_hint==&file_hint_fasttxt &&
strcmp(file_recovery->extension,"mbox")==0)
return 0;
reset_file_recovery(file_recovery_new);
file_recovery_new->data_check=NULL;
file_recovery_new->extension="mbox";
return 1;
}
if(memcmp(buffer,header_mdl,sizeof(header_mdl))==0)
{ /* Mathlab Model .mdl */
reset_file_recovery(file_recovery_new);
file_recovery_new->data_check=&data_check_txt;
file_recovery_new->extension="mdl";
return 1;
}
if(memcmp(buffer,header_perlm,sizeof(header_perlm))==0 &&
(buffer[sizeof(header_perlm)]==' ' || buffer[sizeof(header_perlm)]=='\t'))
{
char *buffer_lower=(char *)MALLOC(2048);
const unsigned int buffer_size_test=(buffer_size < 2048-16 ? buffer_size : 2048-16);
UTF2Lat((unsigned char*)buffer_lower, buffer, buffer_size_test);
reset_file_recovery(file_recovery_new);
file_recovery_new->data_check=&data_check_txt;
file_recovery_new->file_check=&file_check_size;
if(strstr(buffer_lower, sign_java1)!=NULL ||
strstr(buffer_lower, sign_java3)!=NULL ||
strstr(buffer_lower, sign_java4)!=NULL)
{
#ifdef DJGPP
file_recovery_new->extension="jav";
#else
file_recovery_new->extension="java";
#endif
}
else
file_recovery_new->extension="pm";
free(buffer_lower);
return 1;
}
if(memcmp(buffer,header_rpp,sizeof(header_rpp))==0)
{
/* Reaper Project */
reset_file_recovery(file_recovery_new);
file_recovery_new->data_check=&data_check_txt;
file_recovery_new->file_check=&file_check_size;
file_recovery_new->extension="rpp";
return 1;
}
if(memcmp(buffer,header_rtf,sizeof(header_rtf))==0)
{
reset_file_recovery(file_recovery_new);
file_recovery_new->data_check=&data_check_txt;
file_recovery_new->file_check=&file_check_size;
file_recovery_new->extension="rtf";
return 1;
}
if(memcmp(buffer,header_reg,sizeof(header_reg))==0)
{
reset_file_recovery(file_recovery_new);
file_recovery_new->data_check=&data_check_txt;
file_recovery_new->file_check=&file_check_size;
file_recovery_new->extension="reg";
return 1;
}
if(memcmp(buffer,header_sessionstore, sizeof(header_sessionstore))==0)
{
reset_file_recovery(file_recovery_new);
file_recovery_new->data_check=&data_check_txt;
file_recovery_new->file_check=&file_check_size;
#ifdef DJGPP
file_recovery_new->extension="js";
#else
file_recovery_new->extension="sessionstore.js";
#endif
return 1;
}
if(memcmp(buffer,header_sh,sizeof(header_sh))==0 ||
memcmp(buffer,header_bash,sizeof(header_bash))==0 ||
memcmp(buffer,header_ksh,sizeof(header_ksh))==0)
{
reset_file_recovery(file_recovery_new);
file_recovery_new->data_check=&data_check_txt;
file_recovery_new->file_check=&file_check_size;
file_recovery_new->extension="sh";
return 1;
}
if(memcmp(buffer,header_slk,sizeof(header_slk))==0)
{
reset_file_recovery(file_recovery_new);
file_recovery_new->data_check=&data_check_txt;
file_recovery_new->file_check=&file_check_size;
file_recovery_new->extension="slk";
return 1;
}
if(memcmp(buffer, header_mysql, sizeof(header_mysql))==0 ||
memcmp(buffer, header_phpMyAdmin, sizeof(header_phpMyAdmin))==0 ||
memcmp(buffer, header_postgreSQL, sizeof(header_postgreSQL))==0 ||
memcmp(buffer, header_postgreSQL_win, sizeof(header_postgreSQL_win))==0)
{
reset_file_recovery(file_recovery_new);
file_recovery_new->data_check=&data_check_txt;
file_recovery_new->file_check=&file_check_size;
file_recovery_new->extension="sql";
return 1;
}
if(memcmp(buffer, header_stl, sizeof(header_stl))==0 &&
memcmp(buffer+0x40, spaces, sizeof(spaces))!=0)
{
/* StereoLithography - STL Ascii format
* http://www.ennex.com/~fabbers/StL.asp */
reset_file_recovery(file_recovery_new);
file_recovery_new->data_check=&data_check_txt;
file_recovery_new->file_check=&file_check_size;
file_recovery_new->extension="stl";
return 1;
}
if(memcmp(buffer, header_ers, sizeof(header_ers))==0)
{
/* ER Mapper Rasters (ERS) */
reset_file_recovery(file_recovery_new);
file_recovery_new->data_check=&data_check_txt;
file_recovery_new->file_check=&file_check_ers;
file_recovery_new->extension="ers";
return 1;
}
if(memcmp(buffer, header_emka, sizeof(header_emka))==0)
{
/* EMKA IOX file */
reset_file_recovery(file_recovery_new);
file_recovery_new->data_check=&data_check_txt;
file_recovery_new->file_check=&file_check_size;
#ifdef DJGPP
file_recovery_new->extension="emk";
#else
file_recovery_new->extension="emka";
#endif
return 1;
}
if(td_memmem(buffer, buffer_size, header_qgis, sizeof(header_qgis))!=NULL)
{
/* Quantum GIS (QGIS) is a user friendly Open Source Geographic Information System */
reset_file_recovery(file_recovery_new);
file_recovery_new->data_check=&data_check_txt;
file_recovery_new->file_check=&file_check_size;
file_recovery_new->extension="qgs";
return 1;
}
if(memcmp(buffer,header_stp,sizeof(header_stp))==0)
{
/* ISO 10303 is an ISO standard for the computer-interpretable
* representation and exchange of industrial product data.
* - Industrial automation systems and integration - Product data representation and exchange
* - Standard for the Exchange of Product model data.
* */
reset_file_recovery(file_recovery_new);
file_recovery_new->data_check=&data_check_txt;
file_recovery_new->file_check=&file_check_size;
file_recovery_new->extension="stp";
return 1;
}
if(memcmp(buffer,header_wpl,sizeof(header_wpl))==0)
{
/* Windows Play List*/
reset_file_recovery(file_recovery_new);
file_recovery_new->data_check=&data_check_txt;
file_recovery_new->file_check=&file_check_size;
file_recovery_new->extension="wpl";
return 1;
}
if(memcmp(buffer,header_ram,sizeof(header_ram))==0)
{
reset_file_recovery(file_recovery_new);
file_recovery_new->data_check=&data_check_txt;
file_recovery_new->file_check=&file_check_size;
file_recovery_new->extension="ram";
return 1;
}
if(memcmp(buffer, header_xml, sizeof(header_xml))==0 ||
memcmp(buffer, header_xml_utf8, sizeof(header_xml_utf8))==0)
{
reset_file_recovery(file_recovery_new);
file_recovery_new->data_check=&data_check_txt;
if(td_memmem(buffer, buffer_size, sign_grisbi, sizeof(sign_grisbi))!=NULL)
{
/* Grisbi - Personal Finance Manager XML data */
file_recovery_new->extension="gsb";
}
else if(td_memmem(buffer, buffer_size, sign_fst, sizeof(sign_fst))!=NULL)
file_recovery_new->extension="fst";
else if(td_memmem(buffer, buffer_size, sign_html, sizeof(sign_html))!=NULL)
{
#ifdef DJGPP
file_recovery_new->extension="html";
#else
file_recovery_new->extension="htm";
#endif
}
else if(td_memmem(buffer, buffer_size, sign_svg, sizeof(sign_svg))!=NULL)
{
/* Scalable Vector Graphics */
file_recovery_new->extension="svg";
file_recovery_new->file_check=&file_check_svg;
return 1;
}
else if(td_memmem(buffer, buffer_size, sign_plist, sizeof(sign_plist))!=NULL)
{
/* Mac OS X property list */
#ifdef DJGPP
file_recovery_new->extension="pli";
#else
file_recovery_new->extension="plist";
#endif
}
else
file_recovery_new->extension="xml";
file_recovery_new->file_check=&file_check_xml;
return 1;
}
if(buffer[0]=='0' && buffer[1]=='0' && memcmp(&buffer[4],header_dc,sizeof(header_dc))==0)
{ /*
TSCe Survey Controller DC v10.0
*/
reset_file_recovery(file_recovery_new);
file_recovery_new->data_check=&data_check_txt;
file_recovery_new->file_check=&file_check_size;
file_recovery_new->extension="dc";
return 1;
}
if(memcmp(buffer,header_dif,sizeof(header_dif))==0)
{ /*
Lotus Data Interchange Format
*/
reset_file_recovery(file_recovery_new);
file_recovery_new->data_check=&data_check_txt;
file_recovery_new->file_check=&file_check_size;
file_recovery_new->extension="dif";
return 1;
}
if(memcmp(buffer, header_ics, sizeof(header_ics))==0)
{
const char *date_asc;
char *buffer2;
reset_file_recovery(file_recovery_new);
file_recovery_new->data_check=&data_check_txt;
file_recovery_new->file_check=&file_check_size;
file_recovery_new->extension="ics";
/* DTSTART:19970714T133000 ;Local time
* DTSTART:19970714T173000Z ;UTC time
* DTSTART;TZID=US-Eastern:19970714T133000 ;Local time and time
*/
buffer2=(char *)MALLOC(buffer_size+1);
buffer2[buffer_size]='\0';
memcpy(buffer2, buffer, buffer_size);
date_asc=strstr(buffer2, "DTSTART");
if(date_asc!=NULL)
date_asc=strchr(date_asc, ':');
if(date_asc!=NULL && date_asc-buffer2<=buffer_size-14)
{
struct tm tm_time;
memset(&tm_time, 0, sizeof(tm_time));
date_asc++;
tm_time.tm_sec=(date_asc[13]-'0')*10+(date_asc[14]-'0'); /* seconds 0-59 */
tm_time.tm_min=(date_asc[11]-'0')*10+(date_asc[12]-'0'); /* minutes 0-59 */
tm_time.tm_hour=(date_asc[9]-'0')*10+(date_asc[10]-'0'); /* hours 0-23*/
tm_time.tm_mday=(date_asc[6]-'0')*10+(date_asc[7]-'0'); /* day of the month 1-31 */
tm_time.tm_mon=(date_asc[4]-'0')*10+(date_asc[5]-'0')-1; /* month 0-11 */
tm_time.tm_year=(date_asc[0]-'0')*1000+(date_asc[1]-'0')*100+
(date_asc[2]-'0')*10+(date_asc[3]-'0')-1900; /* year */
tm_time.tm_isdst = -1; /* unknown daylight saving time */
file_recovery_new->time=mktime(&tm_time);
}
free(buffer2);
return 1;
}
/* Java Application Descriptor
* http://en.wikipedia.org/wiki/JAD_%28file_format%29 */
if(memcmp(buffer, header_jad, sizeof(header_jad))==0)
{
reset_file_recovery(file_recovery_new);
file_recovery_new->data_check=&data_check_txt;
file_recovery_new->file_check=&file_check_size;
file_recovery_new->extension="jad";
return 1;
}
/* Lyx http://www.lyx.org */
if(memcmp(buffer, header_lyx, sizeof(header_lyx))==0)
{
reset_file_recovery(file_recovery_new);
file_recovery_new->data_check=&data_check_txt;
file_recovery_new->file_check=&file_check_size;
file_recovery_new->extension="lyx";
return 1;
}
/* Moving Picture Experts Group Audio Layer 3 Uniform Resource Locator */
if(memcmp(buffer, header_m3u, sizeof(header_m3u))==0)
{
reset_file_recovery(file_recovery_new);
file_recovery_new->data_check=&data_check_txt;
file_recovery_new->file_check=&file_check_size;
file_recovery_new->extension="m3u";
return 1;
}
/* http://www.mnemosyne-proj.org/
* flash-card program to help you memorise question/answer pairs */
if(memcmp(buffer, header_mnemosyne, sizeof(header_mnemosyne))==0)
{
reset_file_recovery(file_recovery_new);
file_recovery_new->data_check=&data_check_txt;
file_recovery_new->file_check=&file_check_size;
file_recovery_new->extension="mem";
return 1;
}
/* Mozilla, firefox, thunderbird msf (Mail Summary File) */
if(memcmp(buffer, header_msf, sizeof(header_msf))==0)
{
reset_file_recovery(file_recovery_new);
file_recovery_new->data_check=&data_check_txt;
file_recovery_new->file_check=&file_check_size;
file_recovery_new->extension="msf";
return 1;
}
/* Opera Hotlist bookmark/contact list/notes */
if(memcmp(buffer, header_adr, sizeof(header_adr))==0)
{
reset_file_recovery(file_recovery_new);
file_recovery_new->data_check=&data_check_txt;
file_recovery_new->file_check=&file_check_size;
file_recovery_new->extension="adr";
return 1;
}
/* Cue sheet often begins by the music genre
* or by the filename
* http://wiki.hydrogenaudio.org/index.php?title=Cue_sheet */
if(memcmp(buffer, header_cue1, sizeof(header_cue1))==0 ||
memcmp(buffer, header_cue2, sizeof(header_cue2))==0)
{
reset_file_recovery(file_recovery_new);
file_recovery_new->data_check=&data_check_txt;
file_recovery_new->file_check=&file_check_size;
file_recovery_new->extension="cue";
return 1;
}
/* Synchronized Multimedia Integration Language
* http://en.wikipedia.org/wiki/Synchronized_Multimedia_Integration_Language */
if(memcmp(buffer, header_smil, sizeof(header_smil))==0)
{
reset_file_recovery(file_recovery_new);
file_recovery_new->data_check=&data_check_txt;
file_recovery_new->file_check=&file_check_smil;
file_recovery_new->extension="smil";
return 1;
}
if(memcmp(buffer,header_xmp,sizeof(header_xmp))==0 &&
!(file_recovery!=NULL && file_recovery->file_stat!=NULL &&
(file_recovery->file_stat->file_hint==&file_hint_pdf ||
file_recovery->file_stat->file_hint==&file_hint_tiff)))
{
/* Adobe's Extensible Metadata Platform */
reset_file_recovery(file_recovery_new);
file_recovery_new->data_check=&data_check_txt;
file_recovery_new->file_check=&file_check_size;
file_recovery_new->extension="xmp";
return 1;
}
if(memcmp(buffer, header_vbookmark, sizeof(header_vbookmark))==0)
{
reset_file_recovery(file_recovery_new);
file_recovery_new->data_check=&data_check_txt;
file_recovery_new->file_check=&file_check_size;
file_recovery_new->extension="url";
return 1;
}
return 0;
}
static int is_ini(const char *buffer)
{
const char *src=buffer;
if(*src!='[')
return 0;
src++;
while(1)
{
if(*src==']')
{
if(src > buffer + 3)
return 1;
return 0;
}
if(!isalnum(*src) && *src!=' ')
return 0;
src++;
}
}
#ifdef UTF16
static int header_check_le16_txt(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new)
{
unsigned int i;
for(i=0; i+1 < buffer_size; i+=2)
{
if(!( buffer[i+1]=='\0' && (isprint(buffer[i]) || buffer[i]=='\n' || buffer[i]=='\r' || buffer[i]==0xbb)))
{
if(i<40)
return 0;
reset_file_recovery(file_recovery_new);
file_recovery_new->calculated_file_size=i;
file_recovery_new->data_check=&data_check_size;
file_recovery_new->file_check=&file_check_size;
file_recovery_new->extension="utf16";
return 1;
}
}
reset_file_recovery(file_recovery_new);
file_recovery_new->calculated_file_size=i;
file_recovery_new->data_check=&data_check_size;
file_recovery_new->file_check=&file_check_size;
file_recovery_new->extension="utf16";
return 1;
}
#endif
static int header_check_txt(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new)
{
static char *buffer_lower=NULL;
static unsigned int buffer_lower_size=0;
unsigned int l=0;
const unsigned char header_asp[22] = "<%@ language=\"vbscript";
const unsigned char header_bat[9] = "@echo off";
const unsigned char header_bat2[4] = "rem ";
const unsigned char header_json_small[2] = { '{', '"'};
const unsigned char header_vb[20] = {
'v', 'e', 'r', 's', 'i', 'o', 'n', ' ',
'4', '.', '0', '0', '\r', '\n', 'b', 'e',
'g', 'i', 'n', ' '
};
const unsigned char header_vcf[11] = "begin:vcard";
const unsigned char header_sig_perl[4] = "perl";
const unsigned char header_sig_python[6] = "python";
const unsigned char header_sig_ruby[4] = "ruby";
const char sign_asp[] = "<% ";
const char sign_c[] = "#include";
const char sign_h[] = "/*";
const char sign_inf[] = "[autorun]";
const char sign_jsp[] = "<%@";
const char sign_jsp2[] = "<%=";
const char sign_php[] = "<?php";
const char sign_tex[] = "\\begin{";
const char sign_html[] = "<html";
const unsigned int buffer_size_test=(buffer_size < 2048 ? buffer_size : 2048);
{
unsigned int i;
unsigned int tmp=0;
for(i=0;i<10 && isdigit(buffer[i]);i++)
tmp=tmp*10+buffer[i]-'0';
if(buffer[i]==0x0a &&
(memcmp(buffer+i+1, header_ReturnPath, sizeof(header_ReturnPath))==0 ||
memcmp(buffer+i+1, header_ReceivedFrom, sizeof(header_ReceivedFrom))==0) &&
!(file_recovery!=NULL && file_recovery->file_stat!=NULL &&
file_recovery->file_stat->file_hint==&file_hint_fasttxt &&
strcmp(file_recovery->extension,"mbox")==0))
{
reset_file_recovery(file_recovery_new);
file_recovery_new->calculated_file_size=tmp+i+1;
file_recovery_new->data_check=NULL;
file_recovery_new->file_check=&file_check_emlx;
file_recovery_new->extension="emlx";
return 1;
}
}
if(buffer_lower_size<buffer_size_test+16)
{
free(buffer_lower);
buffer_lower=NULL;
}
/* Don't malloc/free memory every time, small memory leak */
if(buffer_lower==NULL)
{
buffer_lower_size=buffer_size_test+16;
buffer_lower=(char *)MALLOC(buffer_lower_size);
}
l=UTF2Lat((unsigned char*)buffer_lower, buffer, buffer_size_test);
if(l<10)
return 0;
/* strncasecmp */
if(memcmp(buffer_lower, header_bat, sizeof(header_bat))==0 ||
memcmp(buffer_lower, header_bat2, sizeof(header_bat2))==0)
{
reset_file_recovery(file_recovery_new);
file_recovery_new->data_check=&data_check_txt;
file_recovery_new->file_check=&file_check_size;
file_recovery_new->extension="bat";
return 1;
}
if(memcmp(buffer_lower,header_asp,sizeof(header_asp))==0)
{
reset_file_recovery(file_recovery_new);
file_recovery_new->data_check=&data_check_txt;
file_recovery_new->file_check=&file_check_size;
file_recovery_new->extension="asp";
return 1;
}
if(memcmp(buffer_lower, header_vb ,sizeof(header_vb))==0)
{
reset_file_recovery(file_recovery_new);
file_recovery_new->data_check=&data_check_txt;
file_recovery_new->file_check=&file_check_size;
file_recovery_new->extension="vb";
return 1;
}
if(memcmp(buffer_lower,header_vcf,sizeof(header_vcf))==0)
{
reset_file_recovery(file_recovery_new);
file_recovery_new->data_check=&data_check_txt;
file_recovery_new->file_check=&file_check_size;
file_recovery_new->extension="vcf";
return 1;
}
if(buffer[0]=='#' && buffer[1]=='!')
{
unsigned int ll=l-2;
const unsigned char *haystack=(const unsigned char *)buffer_lower+2;
const unsigned char *res;
res=(const unsigned char *)memchr(haystack,'\n',ll);
if(res!=NULL)
ll=res-haystack;
if(td_memmem(haystack, ll, header_sig_perl, sizeof(header_sig_perl)) != NULL)
{
reset_file_recovery(file_recovery_new);
file_recovery_new->data_check=&data_check_txt;
file_recovery_new->file_check=&file_check_size;
file_recovery_new->extension="pl";
return 1;
}
if(td_memmem(haystack, ll, header_sig_python, sizeof(header_sig_python)) != NULL)
{
reset_file_recovery(file_recovery_new);
file_recovery_new->data_check=&data_check_txt;
file_recovery_new->file_check=&file_check_size;
file_recovery_new->extension="py";
return 1;
}
if(td_memmem(haystack, ll, header_sig_ruby, sizeof(header_sig_ruby)) != NULL)
{
reset_file_recovery(file_recovery_new);
file_recovery_new->data_check=&data_check_txt;
file_recovery_new->file_check=&file_check_size;
file_recovery_new->extension="rb";
return 1;
}
}
if(safe_header_only!=0)
{
return 0;
}
/* Don't search text in the beginning of JPG or inside pdf */
if(file_recovery!=NULL && file_recovery->file_stat!=NULL &&
((file_recovery->file_stat->file_hint==&file_hint_jpg && file_recovery->file_size < file_recovery->min_filesize) ||
file_recovery->file_stat->file_hint==&file_hint_pdf))
{
return 0;
}
{
const char *ext=NULL;
/* ind=~0: random
* ind=~1: constant */
double ind=1;
unsigned int nbrf=0;
unsigned int is_csv=1;
/* Detect Fortran */
{
char *str=buffer_lower;
while((str=strstr(str, "\n "))!=NULL)
{
nbrf++;
str++;
}
}
/* Detect csv */
{
unsigned int csv_per_line_current=0;
unsigned int csv_per_line=0;
unsigned int line_nbr=0;
unsigned int i;
for(i=0;i<l && is_csv>0;i++)
{
if(buffer_lower[i]==';')
{
csv_per_line_current++;
}
else if(buffer_lower[i]=='\n')
{
if(line_nbr==0)
csv_per_line=csv_per_line_current;
if(csv_per_line_current!=csv_per_line)
is_csv=0;
line_nbr++;
csv_per_line_current=0;
}
}
if(csv_per_line<1 || line_nbr<10)
is_csv=0;
}
/* if(l>1) */
{
unsigned int stats[256];
unsigned int i;
memset(&stats, 0, sizeof(stats));
for(i=0;i<l;i++)
stats[(unsigned char)buffer_lower[i]]++;
ind=0;
for(i=0;i<256;i++)
if(stats[i]>0)
ind+=stats[i]*(stats[i]-1);
ind=ind/l/(l-1);
}
/* Detect .ini */
if(buffer[0]=='[' && is_ini(buffer_lower) && l>50)
ext="ini";
else if(strstr(buffer_lower, sign_php)!=NULL)
ext="php";
else if(strstr(buffer_lower, sign_java1)!=NULL ||
strstr(buffer_lower, sign_java3)!=NULL ||
strstr(buffer_lower, sign_java4)!=NULL)
{
#ifdef DJGPP
ext="jav";
#else
ext="java";
#endif
}
else if(nbrf>10 && ind<0.9)
ext="f";
else if(is_csv>0)
ext="csv";
/* Detect LaTeX, C, PHP, JSP, ASP, HTML, C header */
else if(strstr(buffer_lower, sign_tex)!=NULL)
ext="tex";
else if(strstr(buffer_lower, sign_c)!=NULL)
ext="c";
/* Windows Autorun */
else if(strstr(buffer_lower, sign_inf)!=NULL)
ext="inf";
else if(strstr(buffer_lower, sign_jsp)!=NULL)
ext="jsp";
else if(strstr(buffer_lower, sign_jsp2)!=NULL)
ext="jsp";
else if(strstr(buffer_lower, sign_asp)!=NULL)
ext="asp";
else if(strstr(buffer_lower, sign_html)!=NULL)
ext="html";
else if(strstr(buffer_lower, sign_h)!=NULL && l>50)
ext="h";
else if(l<100 || ind<0.03 || ind>0.90)
ext=NULL;
else if(memcmp(buffer_lower, header_json_small, sizeof(header_json_small))==0)
ext="json";
else
ext=file_hint_txt.extension;
if(ext!=NULL && strcmp(ext,"txt")==0 &&
(strstr(buffer_lower,"<br>")!=NULL || strstr(buffer_lower,"<p>")!=NULL))
{
ext="html";
}
if(ext!=NULL && file_recovery!=NULL && file_recovery->file_stat!=NULL)
{
const unsigned char zip_header[4] = { 'P', 'K', 0x03, 0x04};
if(strcmp(ext,"html")==0 &&
file_recovery->file_stat->file_hint==&file_hint_txt &&
strstr(file_recovery->filename,"")!=NULL)
{
return 0;
}
/* Special case: doc, texte files
Unix: \n (0xA)
Dos: \r\n (0xD 0xA)
Doc: \r (0xD)
*/
if(file_recovery->file_stat->file_hint==&file_hint_doc &&
strstr(file_recovery->filename,".doc")!=NULL)
{
unsigned int i;
unsigned int txt_nl=0;
for(i=0;i<l-1;i++)
if(buffer_lower[i]=='\r' && buffer_lower[i+1]!='\n')
{
return 0;
}
for(i=0;i<l && i<512;i++)
if(buffer_lower[i]=='\n')
txt_nl=1;
if(txt_nl==1)
{
/* log_trace(">%s<\ndoc => %s\n",buffer_lower,ext); */
reset_file_recovery(file_recovery_new);
file_recovery_new->data_check=&data_check_txt;
file_recovery_new->file_check=&file_check_size;
file_recovery_new->extension=ext;
return 1;
}
}
buffer_lower[511]='\0';
/* Special case: two consecutive HTML files */
if((strcmp(ext,"html")==0 &&
strstr(buffer_lower,sign_html)!=NULL &&
strstr(file_recovery->filename,".html")!=NULL) ||
/* Text should not be found in JPEG */
(file_recovery->file_stat->file_hint==&file_hint_jpg &&
td_memmem(buffer, buffer_size_test, "8BIM", 4)==NULL &&
td_memmem(buffer, buffer_size_test, "adobe", 5)==NULL &&
td_memmem(buffer, buffer_size_test, "exif:", 5)==NULL &&
td_memmem(buffer, buffer_size_test, "<rdf:", 5)==NULL &&
td_memmem(buffer, buffer_size_test, "<?xpacket", 9)==NULL &&
td_memmem(buffer, buffer_size_test, "<dict>", 6)==NULL) ||
/* Text should not be found in zip because of compression */
(file_recovery->file_stat->file_hint==&file_hint_zip &&
td_memmem(buffer, buffer_size_test, zip_header, 4)==NULL))
{
reset_file_recovery(file_recovery_new);
file_recovery_new->data_check=&data_check_txt;
file_recovery_new->file_check=&file_check_size;
file_recovery_new->extension=ext;
return 1;
}
return 0;
}
/* log_trace("ext=%s\n",ext); */
if(ext!=NULL)
{
reset_file_recovery(file_recovery_new);
file_recovery_new->extension=ext;
file_recovery_new->data_check=&data_check_txt;
file_recovery_new->file_check=&file_check_size;
return 1;
}
}
return 0;
}
static int data_check_txt(const unsigned char *buffer, const unsigned int buffer_size, file_recovery_t *file_recovery)
{
unsigned int i;
char *buffer_lower=(char *)MALLOC(buffer_size+16);
i=UTF2Lat((unsigned char*)buffer_lower, &buffer[buffer_size/2], buffer_size/2);
if(i<buffer_size/2)
{
const char sign_html_end[] = "</html>";
const char *pos;
pos=strstr(buffer_lower,sign_html_end);
if(strstr(file_recovery->filename,".html")!=NULL && pos!=NULL && i<((pos-buffer_lower)+sizeof(sign_html_end))-1+10)
{
file_recovery->calculated_file_size+=(pos-buffer_lower)+sizeof(sign_html_end)-1;
}
else if(i>=10)
file_recovery->calculated_file_size=file_recovery->file_size+i;
free(buffer_lower);
return 2;
}
free(buffer_lower);
file_recovery->calculated_file_size=file_recovery->file_size+(buffer_size/2);
return 1;
}
static void file_check_emlx(file_recovery_t *file_recovery)
{
const unsigned char emlx_footer[9]= {'<', '/', 'p', 'l', 'i', 's', 't', '>', 0x0a};
if(file_recovery->file_size < file_recovery->calculated_file_size)
file_recovery->file_size=0;
else
{
if(file_recovery->file_size > file_recovery->calculated_file_size+2048)
file_recovery->file_size=file_recovery->calculated_file_size+2048;
file_search_footer(file_recovery, emlx_footer, sizeof(emlx_footer), 0);
}
}
static void file_check_smil(file_recovery_t *file_recovery)
{
const unsigned char smil_footer[7]= { '<', '/', 's', 'm', 'i', 'l', '>'};
file_search_footer(file_recovery, smil_footer, sizeof(smil_footer), 0);
file_allow_nl(file_recovery, NL_BARENL|NL_CRLF|NL_BARECR);
}
static void file_check_xml(file_recovery_t *file_recovery)
{
const unsigned char xml_footer[1]= { '>'};
file_search_footer(file_recovery, xml_footer, sizeof(xml_footer), 0);
file_allow_nl(file_recovery, NL_BARENL|NL_CRLF|NL_BARECR);
}
static void file_check_ers(file_recovery_t *file_recovery)
{
const unsigned char ers_footer[17]= "DatasetHeader End";
file_search_footer(file_recovery, ers_footer, sizeof(ers_footer), 0);
file_allow_nl(file_recovery, NL_BARENL|NL_CRLF|NL_BARECR);
}
static void file_check_svg(file_recovery_t *file_recovery)
{
const unsigned char svg_footer[6]= { '<', '/', 's', 'v', 'g', '>'};
file_search_footer(file_recovery, svg_footer, sizeof(svg_footer), 0);
file_allow_nl(file_recovery, NL_BARENL|NL_CRLF|NL_BARECR);
}