/* File: file_pdf.c Copyright (C) 1998-2011 Christophe GRENIER This software is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ #if !defined(SINGLE_FORMAT) || defined(SINGLE_FORMAT_pdf) #ifdef HAVE_CONFIG_H #include #endif #ifdef HAVE_STRING_H #include #endif #include #ifdef HAVE_TIME_H #include #endif #ifdef HAVE_STDLIB_H #include /* free */ #endif #include #include "types.h" #include "filegen.h" #include "memmem.h" #include "common.h" /*@ @ requires \valid(file_stat); @*/ static void register_header_check_pdf(file_stat_t *file_stat); const file_hint_t file_hint_pdf= { .extension="pdf", .description="Portable Document Format, Adobe Illustrator", .max_filesize=PHOTOREC_MAX_FILE_SIZE, .recover=1, .enable_by_default=1, .register_header_check=®ister_header_check_pdf }; /*@ @ assigns \nothing; @*/ static int is_hexa(const int c) { return ((c>='0' && c<='9') || (c>='A' && c<='F') || (c>='a' && c<='f')); } /*@ @ assigns \nothing; @ ensures 0 <= \result <= 15; @*/ static unsigned int hex(const int c) { if(c>='0' && c<='9') return c-'0'; if(c>='A' && c<='F') return c-'A'+10; if(c>='a' && c<='f') return c-'a'+10; return 0; } /*@ @ requires \valid(file_recovery); @ requires valid_read_string((char*)file_recovery->filename); @ requires file_recovery->file_rename==&file_rename_pdf; @*/ static void file_rename_pdf(file_recovery_t *file_recovery) { char title[512]; const unsigned char pattern[6]={ '/', 'T', 'i', 't', 'l', 'e' }; off_t offset; uint64_t tmp; FILE *handle; unsigned char*buffer; unsigned int i; unsigned int l; size_t bsize; const unsigned char utf16[3]= { 0xfe, 0xff, 0x00}; if((handle=fopen(file_recovery->filename, "rb"))==NULL) return; if(my_fseek(handle, 0, SEEK_END)<0) { fclose(handle); return; } #ifdef HAVE_FTELLO offset=ftello(handle); #else offset=ftell(handle); #endif if(offset <= 0) { fclose(handle); return; } tmp=file_rsearch(handle, offset, pattern, sizeof(pattern)); if(tmp==0 || tmp > PHOTOREC_MAX_FILE_SIZE) { fclose(handle); return; } offset=tmp+sizeof(pattern); if(my_fseek(handle, offset, SEEK_SET)<0) { fclose(handle); return ; } buffer=(unsigned char*)MALLOC(512); if((bsize=fread(buffer, 1, 512, handle)) <= 2) { free(buffer); fclose(handle); return ; } /*@ assert 2 < bsize; */ #if defined(__FRAMAC__) Frama_C_make_unknown(buffer, 512); #endif /*@ assert \initialized(buffer + (0 .. 512-1)); */ fclose(handle); /* Skip spaces after /Title */ /*@ @ loop invariant 0 <= i <= bsize; @ loop assigns i; @ */ for(i=0; i= bsize) { /* Too much spaces */ free(buffer); return ; } /*@ assert i + 2 < bsize; */ if(buffer[i]=='<') { unsigned int j; unsigned int s; /* hexa to ascii */ buffer[i]='('; /*@ assert \valid(buffer + (0 .. bsize -1)); */ /*@ @ loop invariant s <= bsize; @ loop invariant j <= s; @ loop invariant j < bsize; @ loop assigns s, j, buffer[0 .. 512-1]; @ */ for(s=i+1, j=i+1; s+14 && (memcmp(&title[l-4], ".doc", 4)==0 || memcmp(&title[l-4], ".xls", 4)==0)) l-=4; else if(l>5 && (memcmp(&title[l-5], ".docx", 5)==0 || memcmp(&title[l-5], ".xlsx", 5)==0)) l-=5; file_rename(file_recovery, title, l, 0, NULL, 1); free(buffer); } /*@ @ requires \valid(file_recovery); @ requires valid_file_recovery(file_recovery); @ requires \separated(file_recovery, file_recovery->handle, file_recovery->extension, &errno, &Frama_C_entropy_source); @*/ static void file_date_pdf(file_recovery_t *file_recovery) { const unsigned char pattern[14]={'x', 'a', 'p', ':', 'C', 'r', 'e', 'a', 't', 'e', 'D', 'a', 't', 'e'}; uint64_t offset=0; unsigned int j=0; unsigned char*buffer; if(file_recovery->file_size > PHOTOREC_MAX_FILE_SIZE) return ; /*@ assert file_recovery->file_size <= PHOTOREC_MAX_FILE_SIZE; */ buffer=(unsigned char*)MALLOC(4096); if(my_fseek(file_recovery->handle, 0, SEEK_SET)<0) { free(buffer); return ; } while(offset < file_recovery->file_size) { int i; const int bsize=fread(buffer, 1, 4096, file_recovery->handle); if(bsize<=0) { free(buffer); return ; } /*@ @ loop invariant \separated(file_recovery, file_recovery->handle, file_recovery->extension, &errno, buffer + (..)); @ loop assigns i, j, *file_recovery->handle, file_recovery->time, buffer[0..21]; @ loop assigns errno; @*/ for(i=0; ihandle, offset+i+1, SEEK_SET)>=0 && fread(buffer, 1, 22, file_recovery->handle) == 22) { /*@ assert \initialized( buffer+ (0 .. 22-1)); */ if(buffer[0]=='=' && (buffer[1]=='\'' || buffer[1]=='"')) { file_recovery->time=get_time_from_YYYY_MM_DD_HH_MM_SS(&buffer[2]); } else if(buffer[0]=='>') { file_recovery->time=get_time_from_YYYY_MM_DD_HH_MM_SS(&buffer[1]); } } free(buffer); return ; } } else j=0; } offset+=bsize; } free(buffer); } #define PDF_READ_SIZE 20 /*@ @ requires \valid(file_recovery); @ requires \valid(file_recovery->handle); @ requires valid_file_recovery(file_recovery); @ requires \separated(file_recovery, file_recovery->handle, file_recovery->extension, &errno, &Frama_C_entropy_source); @*/ static void file_check_pdf_and_size(file_recovery_t *file_recovery) { unsigned char buffer[PDF_READ_SIZE + 3]; int i; int taille; if( file_recovery->file_size < file_recovery->calculated_file_size || file_recovery->calculated_file_size < PDF_READ_SIZE) { file_recovery->file_size=0; return; } /*@ assert file_recovery->calculated_file_size >= PDF_READ_SIZE; */ file_recovery->file_size=file_recovery->calculated_file_size; /*@ assert file_recovery->file_size >= PDF_READ_SIZE; */ if(my_fseek(file_recovery->handle,file_recovery->file_size-PDF_READ_SIZE,SEEK_SET)<0) { file_recovery->file_size=0; return ; } taille=fread(buffer, 1, PDF_READ_SIZE, file_recovery->handle); #if defined(__FRAMAC__) Frama_C_make_unknown((char *)&buffer, sizeof(buffer)); #endif for(i=taille-4;i>=0;i--) { if(buffer[i]=='%' && buffer[i+1]=='E' && buffer[i+2]=='O' && buffer[i+3]=='F') { file_date_pdf(file_recovery); return ; } } file_recovery->file_size=0; } /*@ @ requires \valid(file_recovery); @ requires \valid(file_recovery->handle); @ requires valid_file_recovery(file_recovery); @ requires \separated(file_recovery, file_recovery->handle, file_recovery->extension, &errno, &Frama_C_entropy_source); @*/ static void file_check_pdf(file_recovery_t *file_recovery) { const unsigned char pdf_footer[4]= { '%', 'E', 'O', 'F'}; file_search_footer(file_recovery, pdf_footer, sizeof(pdf_footer), 0); file_allow_nl(file_recovery, NL_BARENL|NL_CRLF|NL_BARECR); file_date_pdf(file_recovery); } /*@ @ requires \valid_read(buffer+(0..512-1)); @ assigns \nothing; @*/ static uint64_t read_pdf_file_aux(const unsigned char *buffer, unsigned int i) { uint64_t file_size=0; /*@ loop assigns i; */ while(i < 512 && (buffer[i] ==' ' || buffer[i]=='\t' || buffer[i]=='\n' || buffer[i]=='\r')) i++; /*@ @ loop invariant file_size <= PHOTOREC_MAX_FILE_SIZE; @ loop assigns i, file_size; @ */ for(;i<512 && buffer[i]>='0' && buffer[i]<='9'; i++) { file_size*=10; file_size+=buffer[i]-'0'; if(file_size > PHOTOREC_MAX_FILE_SIZE) { return PHOTOREC_MAX_FILE_SIZE + 1; } /*@ assert file_size <= PHOTOREC_MAX_FILE_SIZE; */ } return file_size; } /*@ @ requires \valid_read(buffer+(0..512-1)); @ assigns \nothing; @*/ static uint64_t read_pdf_file(const unsigned char *buffer) { const unsigned char sig_linearized[10]={'L','i','n','e','a','r','i','z','e','d'}; const char *src; unsigned int i; const char *sbuffer=(const char *)buffer; src=(const char *)td_memmem(sbuffer, 512, sig_linearized, sizeof(sig_linearized)); if(src == NULL) return 0; i = src - sbuffer; i+=sizeof(sig_linearized); if( i >= 512 -1) return 0; /*@ assert i < 512-1; */ /*@ @ loop assigns i; @ loop variant 512 - 1 - i; @ */ for(; i < 512-1 && buffer[i]!='>'; i++) { if(buffer[i]=='/' && buffer[i+1]=='L') return read_pdf_file_aux(buffer, i+2); } return 0; } /*@ @ requires buffer_size >= 512; @ requires \valid_read(buffer+(0..buffer_size-1)); @ requires \valid_read(file_recovery); @ requires file_recovery->file_stat==\null || valid_read_string((char*)file_recovery->filename); @ requires \valid(file_recovery_new); @ requires file_recovery_new->blocksize > 0; @ requires separation: \separated(&file_hint_pdf, buffer, file_recovery, file_recovery_new); @ ensures \result!=0 ==> valid_file_recovery(file_recovery_new); @ assigns *file_recovery_new; @*/ static int header_check_pdf(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new) { uint64_t file_size; if(!isprint(buffer[6])) return 0; file_size=read_pdf_file(buffer); if(file_size > PHOTOREC_MAX_FILE_SIZE) return 0; reset_file_recovery(file_recovery_new); if(td_memmem(buffer, buffer_size, "<extension="ai"; else { file_recovery_new->extension=file_hint_pdf.extension; file_recovery_new->file_rename=&file_rename_pdf; } if(file_size == 0) { file_recovery_new->file_check=&file_check_pdf; return 1; } file_recovery_new->calculated_file_size=file_size; file_recovery_new->data_check=&data_check_size; file_recovery_new->file_check=&file_check_pdf_and_size; return 1; } static void register_header_check_pdf(file_stat_t *file_stat) { static const unsigned char pdf_header[] = { '%','P','D','F','-','1'}; register_header_check(0, pdf_header,sizeof(pdf_header), &header_check_pdf, file_stat); } #endif