Move UTFsize() from src/file_txt.c to src/utfsize.c

src/file_win.c: add Frama-C annotations
This commit is contained in:
Christophe Grenier 2021-03-14 11:01:02 +01:00
parent 5909fc97d0
commit cef9304cf0
5 changed files with 157 additions and 115 deletions

View file

@ -387,9 +387,10 @@ file_C = filegen.c \
file_z2d.c \ file_z2d.c \
file_zcode.c \ file_zcode.c \
file_zip.c \ file_zip.c \
file_zpr.c file_zpr.c \
utfsize.c
file_H = ext2.h hfsp_struct.h filegen.h file_doc.h file_jpg.h file_gz.h file_riff.h file_sp3.h file_tar.h file_tiff.h file_txt.h luks_struct.h ntfs_struct.h ole.h pe.h suspend.h file_H = ext2.h hfsp_struct.h filegen.h file_doc.h file_jpg.h file_gz.h file_riff.h file_sp3.h file_tar.h file_tiff.h luks_struct.h ntfs_struct.h ole.h pe.h suspend.h utfsize.h
photorec_C = photorec.c phcfg.c addpart.c chgarch.c chgtype.c dir.c exfatp.c ext2grp.c ext2_dir.c ext2p.c fat_dir.c fatp.c file_found.c geometry.c ntfs_dir.c ntfsp.c pdisksel.c poptions.c sessionp.c dfxml.c partgptro.c photorec_C = photorec.c phcfg.c addpart.c chgarch.c chgtype.c dir.c exfatp.c ext2grp.c ext2_dir.c ext2p.c fat_dir.c fatp.c file_found.c geometry.c ntfs_dir.c ntfsp.c pdisksel.c poptions.c sessionp.c dfxml.c partgptro.c
@ -467,6 +468,10 @@ session_tiff_le.framac: file_tiff.c file_tiff_be.c file_tiff_le.c common.c fileg
$(CC) $(CFLAGS) -DMAIN_tiff_le -DHAVE_CONFIG_H -DSINGLE_FORMAT -DSINGLE_FORMAT_tiff -I.. $^ -o demo_tiff_le $(CC) $(CFLAGS) -DMAIN_tiff_le -DHAVE_CONFIG_H -DSINGLE_FORMAT -DSINGLE_FORMAT_tiff -I.. $^ -o demo_tiff_le
frama-c $^ -cpp-extra-args="-DMAIN_tiff_le -DHAVE_CONFIG_H -DSINGLE_FORMAT -DSINGLE_FORMAT_tiff -D__x86_64__ -I.." $(FRAMA_C_FLAGS) -save $@ frama-c $^ -cpp-extra-args="-DMAIN_tiff_le -DHAVE_CONFIG_H -DSINGLE_FORMAT -DSINGLE_FORMAT_tiff -D__x86_64__ -I.." $(FRAMA_C_FLAGS) -save $@
session_txt.framac: file_txt.c common.c filegen.c log.c utfsize.c
$(CC) $(CFLAGS) -DMAIN_txt -DHAVE_CONFIG_H -DSINGLE_FORMAT -DSINGLE_FORMAT_txt -I.. $^ -o demo_txt
frama-c $^ -cpp-extra-args="-DMAIN_txt -DHAVE_CONFIG_H -DSINGLE_FORMAT -DSINGLE_FORMAT_txt -D__x86_64__ -I.." $(FRAMA_C_FLAGS) -save $@
session_%.framac: file_%.c common.c filegen.c log.c session_%.framac: file_%.c common.c filegen.c log.c
$(CC) $(CFLAGS) -DMAIN_$* -DHAVE_CONFIG_H -DSINGLE_FORMAT -DSINGLE_FORMAT_$* -I.. $^ -o demo_$* $(CC) $(CFLAGS) -DMAIN_$* -DHAVE_CONFIG_H -DSINGLE_FORMAT -DSINGLE_FORMAT_$* -I.. $^ -o demo_$*
frama-c $^ -cpp-extra-args="-DMAIN_$* -DHAVE_CONFIG_H -DSINGLE_FORMAT -DSINGLE_FORMAT_$* -D__x86_64__ -I.." $(FRAMA_C_FLAGS) -save $@ frama-c $^ -cpp-extra-args="-DMAIN_$* -DHAVE_CONFIG_H -DSINGLE_FORMAT -DSINGLE_FORMAT_$* -D__x86_64__ -I.." $(FRAMA_C_FLAGS) -save $@

View file

@ -41,7 +41,7 @@
#include "filegen.h" #include "filegen.h"
#include "log.h" #include "log.h"
#include "memmem.h" #include "memmem.h"
#include "file_txt.h" #include "utfsize.h"
#if defined(__FRAMAC__) #if defined(__FRAMAC__)
#include "__fc_builtin.h" #include "__fc_builtin.h"
#endif #endif
@ -682,90 +682,6 @@ static int UTF2Lat(unsigned char *buffer_lower, const unsigned char *buffer, con
return offset_dst; return offset_dst;
} }
int UTFsize(const unsigned char *buffer, const unsigned int buf_len)
{
const unsigned char *p=buffer; /* pointers to actual position in source buffer */
unsigned int i=0;
/*@
@ loop invariant 0 <= i < buf_len + 3;
@ loop invariant p == buffer + i;
@ loop assigns i, p;
@ loop variant buf_len - 1 - i;
@*/
while(i<buf_len)
{
/*@ assert i < buf_len; */
/*@ assert p == buffer + i; */
const unsigned char c=*p;
if(c=='\0')
return i;
/* Reject some invalid UTF-8 sequences */
if(c==0xc0 || c==0xc1 || c==0xf7 || c>=0xfd)
return i;
/*@ assert i + 1 >= buf_len || \valid_read(p+1); */
/*@ assert i + 2 >= buf_len || \valid_read(p+2); */
if((c & 0xf0)==0xe0 &&
(i+1 >= buf_len || (*(p+1) & 0xc0)==0x80) &&
(i+2 >= buf_len || (*(p+2) & 0xc0)==0x80))
{ /* UTF8 l=3 */
#ifdef DEBUG_TXT
log_info("UTFsize i=%u l=3\n", i);
#endif
p+=3;
i+=3;
}
else if((c & 0xe0)==0xc0 &&
(i+1 >= buf_len || (*(p+1) & 0xc0)==0x80))
{ /* UTF8 l=2 */
#ifdef DEBUG_TXT
log_info("UTFsize i=%u l=2\n", i);
#endif
p+=2;
i+=2;
}
else
{ /* Ascii UCS */
#ifdef DEBUG_TXT
log_info("UTFsize i=%u l=1 ? *p=%c\n", i, c);
#endif
switch(c)
{
case 0x00:
case 0x01:
case 0x02:
case 0x03:
case 0x04:
case 0x05:
case 0x06:
case 0x07:
case 0x0b:
case 0x0c:
case 0x10:
case 0x11:
case 0x12:
case 0x13:
case 0x14:
case 0x15:
case 0x16:
case 0x17:
case 0x18:
case 0x19:
case 0x1a:
case 0x1b:
case 0x1c:
case 0x1d:
case 0x1e:
case 0x1f:
case 0x7f:
return i;
}
p++;
i++;
}
}
return (i<buf_len?i:buf_len);
}
/*@ /*@
@ requires buffer_size >= 2 && (buffer_size&1)==0; @ requires buffer_size >= 2 && (buffer_size&1)==0;
@ requires \valid_read((char *)buffer+(0..buffer_size-1)); @ requires \valid_read((char *)buffer+(0..buffer_size-1));

View file

@ -33,53 +33,54 @@
#include <stdio.h> #include <stdio.h>
#include "types.h" #include "types.h"
#include "filegen.h" #include "filegen.h"
#include "file_txt.h" #include "utfsize.h"
#include "common.h" #include "common.h"
/*@ requires \valid(file_stat); */
static void register_header_check_win(file_stat_t *file_stat); static void register_header_check_win(file_stat_t *file_stat);
const file_hint_t file_hint_win= { const file_hint_t file_hint_win = {
.extension="win", .extension = "win",
.description="Opera preferences", .description = "Opera preferences",
.max_filesize=PHOTOREC_MAX_FILE_SIZE, .max_filesize = PHOTOREC_MAX_FILE_SIZE,
.recover=1, .recover = 1,
.enable_by_default=1, .enable_by_default = 1,
.register_header_check=&register_header_check_win .register_header_check = &register_header_check_win
}; };
static data_check_t data_check_win(const unsigned char *buffer, const unsigned int buffer_size, file_recovery_t *file_recovery) static data_check_t data_check_win(const unsigned char *buffer, const unsigned int buffer_size, file_recovery_t *file_recovery)
{ {
unsigned int i; unsigned int i;
unsigned int offset=0; unsigned int offset = 0;
if(file_recovery->calculated_file_size==0) if(file_recovery->calculated_file_size == 0)
offset=3; offset = 3;
i=UTFsize(&buffer[buffer_size/2+offset], buffer_size/2-offset); i = UTFsize(&buffer[buffer_size / 2 + offset], buffer_size / 2 - offset);
if(i<buffer_size/2-offset) if(i < buffer_size / 2 - offset)
{ {
if(i>=10) if(i >= 10)
file_recovery->calculated_file_size=file_recovery->file_size+offset+i; file_recovery->calculated_file_size = file_recovery->file_size + offset + i;
return DC_STOP; return DC_STOP;
} }
file_recovery->calculated_file_size=file_recovery->file_size+(buffer_size/2); file_recovery->calculated_file_size = file_recovery->file_size + (buffer_size / 2);
return DC_CONTINUE; return DC_CONTINUE;
} }
static int header_check_win(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new) static int header_check_win(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new)
{ {
reset_file_recovery(file_recovery_new); reset_file_recovery(file_recovery_new);
file_recovery_new->extension=file_hint_win.extension; file_recovery_new->extension = file_hint_win.extension;
file_recovery_new->data_check=&data_check_win; file_recovery_new->data_check = &data_check_win;
file_recovery_new->file_check=&file_check_size; file_recovery_new->file_check = &file_check_size;
return 1; return 1;
} }
static void register_header_check_win(file_stat_t *file_stat) static void register_header_check_win(file_stat_t *file_stat)
{ {
static const unsigned char win_header[31]= { static const unsigned char win_header[31] = {
0xef, 0xbb, 0xbf, 'O' , 'p' , 'e' , 'r' , 'a' , 0xef, 0xbb, 0xbf, 'O', 'p', 'e', 'r', 'a',
' ' , 'P' , 'r' , 'e' , 'f' , 'e' , 'r' , 'e' , ' ', 'P', 'r', 'e', 'f', 'e', 'r', 'e',
'n' , 'c' , 'e' , 's' , ' ' , 'v' , 'e' , 'r' , 'n', 'c', 'e', 's', ' ', 'v', 'e', 'r',
's' , 'i' , 'o' , 'n' , ' ' , '2' , '.' 's', 'i', 'o', 'n', ' ', '2', '.'
}; };
register_header_check(0, win_header, sizeof(win_header), &header_check_win, file_stat); register_header_check(0, win_header, sizeof(win_header), &header_check_win, file_stat);
} }

120
src/utfsize.c Normal file
View file

@ -0,0 +1,120 @@
/*
File: utfsize.c
Copyright (C) 2021 Christophe GRENIER <grenier@cgsecurity.org>
This software is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write the Free Software Foundation, Inc., 51
Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#if !defined(SINGLE_FORMAT) || defined(SINGLE_FORMAT_txt) || defined(SINGLE_FORMAT_win)
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
#ifdef HAVE_STRING_H
#include <string.h>
#endif
#include <stdio.h>
#include "log.h"
#include "utfsize.h"
int UTFsize(const unsigned char *buffer, const unsigned int buf_len)
{
const unsigned char *p=buffer; /* pointers to actual position in source buffer */
unsigned int i=0;
/*@
@ loop invariant 0 <= i < buf_len + 3;
@ loop invariant p == buffer + i;
@ loop assigns i, p;
@ loop variant buf_len - 1 - i;
@*/
while(i<buf_len)
{
/*@ assert i < buf_len; */
/*@ assert p == buffer + i; */
const unsigned char c=*p;
if(c=='\0')
return i;
/* Reject some invalid UTF-8 sequences */
if(c==0xc0 || c==0xc1 || c==0xf7 || c>=0xfd)
return i;
/*@ assert i + 1 >= buf_len || \valid_read(p+1); */
/*@ assert i + 2 >= buf_len || \valid_read(p+2); */
if((c & 0xf0)==0xe0 &&
(i+1 >= buf_len || (*(p+1) & 0xc0)==0x80) &&
(i+2 >= buf_len || (*(p+2) & 0xc0)==0x80))
{ /* UTF8 l=3 */
#ifdef DEBUG_TXT
log_info("UTFsize i=%u l=3\n", i);
#endif
p+=3;
i+=3;
}
else if((c & 0xe0)==0xc0 &&
(i+1 >= buf_len || (*(p+1) & 0xc0)==0x80))
{ /* UTF8 l=2 */
#ifdef DEBUG_TXT
log_info("UTFsize i=%u l=2\n", i);
#endif
p+=2;
i+=2;
}
else
{ /* Ascii UCS */
#ifdef DEBUG_TXT
log_info("UTFsize i=%u l=1 ? *p=%c\n", i, c);
#endif
switch(c)
{
case 0x00:
case 0x01:
case 0x02:
case 0x03:
case 0x04:
case 0x05:
case 0x06:
case 0x07:
case 0x0b:
case 0x0c:
case 0x10:
case 0x11:
case 0x12:
case 0x13:
case 0x14:
case 0x15:
case 0x16:
case 0x17:
case 0x18:
case 0x19:
case 0x1a:
case 0x1b:
case 0x1c:
case 0x1d:
case 0x1e:
case 0x1f:
case 0x7f:
return i;
}
p++;
i++;
}
}
return (i<buf_len?i:buf_len);
}
#endif

View file

@ -1,8 +1,8 @@
/* /*
File: file_txt.h File: utfsize.h
Copyright (C) 2009 Christophe GRENIER <grenier@cgsecurity.org> Copyright (C) 2009-2021 Christophe GRENIER <grenier@cgsecurity.org>
This software is free software; you can redistribute it and/or modify This software is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -19,8 +19,8 @@
Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/ */
#ifndef _FILE_TXT_H #ifndef _UTFSIZE_H
#define _FILE_TXT_H #define _UTFSIZE_H
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif