Move UTFsize() from src/file_txt.c to src/utfsize.c

src/file_win.c: add Frama-C annotations
This commit is contained in:
Christophe Grenier 2021-03-14 11:01:02 +01:00
parent 5909fc97d0
commit cef9304cf0
5 changed files with 157 additions and 115 deletions

View file

@ -387,9 +387,10 @@ file_C = filegen.c \
file_z2d.c \
file_zcode.c \
file_zip.c \
file_zpr.c
file_zpr.c \
utfsize.c
file_H = ext2.h hfsp_struct.h filegen.h file_doc.h file_jpg.h file_gz.h file_riff.h file_sp3.h file_tar.h file_tiff.h file_txt.h luks_struct.h ntfs_struct.h ole.h pe.h suspend.h
file_H = ext2.h hfsp_struct.h filegen.h file_doc.h file_jpg.h file_gz.h file_riff.h file_sp3.h file_tar.h file_tiff.h luks_struct.h ntfs_struct.h ole.h pe.h suspend.h utfsize.h
photorec_C = photorec.c phcfg.c addpart.c chgarch.c chgtype.c dir.c exfatp.c ext2grp.c ext2_dir.c ext2p.c fat_dir.c fatp.c file_found.c geometry.c ntfs_dir.c ntfsp.c pdisksel.c poptions.c sessionp.c dfxml.c partgptro.c
@ -467,6 +468,10 @@ session_tiff_le.framac: file_tiff.c file_tiff_be.c file_tiff_le.c common.c fileg
$(CC) $(CFLAGS) -DMAIN_tiff_le -DHAVE_CONFIG_H -DSINGLE_FORMAT -DSINGLE_FORMAT_tiff -I.. $^ -o demo_tiff_le
frama-c $^ -cpp-extra-args="-DMAIN_tiff_le -DHAVE_CONFIG_H -DSINGLE_FORMAT -DSINGLE_FORMAT_tiff -D__x86_64__ -I.." $(FRAMA_C_FLAGS) -save $@
session_txt.framac: file_txt.c common.c filegen.c log.c utfsize.c
$(CC) $(CFLAGS) -DMAIN_txt -DHAVE_CONFIG_H -DSINGLE_FORMAT -DSINGLE_FORMAT_txt -I.. $^ -o demo_txt
frama-c $^ -cpp-extra-args="-DMAIN_txt -DHAVE_CONFIG_H -DSINGLE_FORMAT -DSINGLE_FORMAT_txt -D__x86_64__ -I.." $(FRAMA_C_FLAGS) -save $@
session_%.framac: file_%.c common.c filegen.c log.c
$(CC) $(CFLAGS) -DMAIN_$* -DHAVE_CONFIG_H -DSINGLE_FORMAT -DSINGLE_FORMAT_$* -I.. $^ -o demo_$*
frama-c $^ -cpp-extra-args="-DMAIN_$* -DHAVE_CONFIG_H -DSINGLE_FORMAT -DSINGLE_FORMAT_$* -D__x86_64__ -I.." $(FRAMA_C_FLAGS) -save $@

View file

@ -41,7 +41,7 @@
#include "filegen.h"
#include "log.h"
#include "memmem.h"
#include "file_txt.h"
#include "utfsize.h"
#if defined(__FRAMAC__)
#include "__fc_builtin.h"
#endif
@ -682,90 +682,6 @@ static int UTF2Lat(unsigned char *buffer_lower, const unsigned char *buffer, con
return offset_dst;
}
int UTFsize(const unsigned char *buffer, const unsigned int buf_len)
{
const unsigned char *p=buffer; /* pointers to actual position in source buffer */
unsigned int i=0;
/*@
@ loop invariant 0 <= i < buf_len + 3;
@ loop invariant p == buffer + i;
@ loop assigns i, p;
@ loop variant buf_len - 1 - i;
@*/
while(i<buf_len)
{
/*@ assert i < buf_len; */
/*@ assert p == buffer + i; */
const unsigned char c=*p;
if(c=='\0')
return i;
/* Reject some invalid UTF-8 sequences */
if(c==0xc0 || c==0xc1 || c==0xf7 || c>=0xfd)
return i;
/*@ assert i + 1 >= buf_len || \valid_read(p+1); */
/*@ assert i + 2 >= buf_len || \valid_read(p+2); */
if((c & 0xf0)==0xe0 &&
(i+1 >= buf_len || (*(p+1) & 0xc0)==0x80) &&
(i+2 >= buf_len || (*(p+2) & 0xc0)==0x80))
{ /* UTF8 l=3 */
#ifdef DEBUG_TXT
log_info("UTFsize i=%u l=3\n", i);
#endif
p+=3;
i+=3;
}
else if((c & 0xe0)==0xc0 &&
(i+1 >= buf_len || (*(p+1) & 0xc0)==0x80))
{ /* UTF8 l=2 */
#ifdef DEBUG_TXT
log_info("UTFsize i=%u l=2\n", i);
#endif
p+=2;
i+=2;
}
else
{ /* Ascii UCS */
#ifdef DEBUG_TXT
log_info("UTFsize i=%u l=1 ? *p=%c\n", i, c);
#endif
switch(c)
{
case 0x00:
case 0x01:
case 0x02:
case 0x03:
case 0x04:
case 0x05:
case 0x06:
case 0x07:
case 0x0b:
case 0x0c:
case 0x10:
case 0x11:
case 0x12:
case 0x13:
case 0x14:
case 0x15:
case 0x16:
case 0x17:
case 0x18:
case 0x19:
case 0x1a:
case 0x1b:
case 0x1c:
case 0x1d:
case 0x1e:
case 0x1f:
case 0x7f:
return i;
}
p++;
i++;
}
}
return (i<buf_len?i:buf_len);
}
/*@
@ requires buffer_size >= 2 && (buffer_size&1)==0;
@ requires \valid_read((char *)buffer+(0..buffer_size-1));

View file

@ -33,9 +33,10 @@
#include <stdio.h>
#include "types.h"
#include "filegen.h"
#include "file_txt.h"
#include "utfsize.h"
#include "common.h"
/*@ requires \valid(file_stat); */
static void register_header_check_win(file_stat_t *file_stat);
const file_hint_t file_hint_win = {

120
src/utfsize.c Normal file
View file

@ -0,0 +1,120 @@
/*
File: utfsize.c
Copyright (C) 2021 Christophe GRENIER <grenier@cgsecurity.org>
This software is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write the Free Software Foundation, Inc., 51
Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#if !defined(SINGLE_FORMAT) || defined(SINGLE_FORMAT_txt) || defined(SINGLE_FORMAT_win)
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
#ifdef HAVE_STRING_H
#include <string.h>
#endif
#include <stdio.h>
#include "log.h"
#include "utfsize.h"
int UTFsize(const unsigned char *buffer, const unsigned int buf_len)
{
const unsigned char *p=buffer; /* pointers to actual position in source buffer */
unsigned int i=0;
/*@
@ loop invariant 0 <= i < buf_len + 3;
@ loop invariant p == buffer + i;
@ loop assigns i, p;
@ loop variant buf_len - 1 - i;
@*/
while(i<buf_len)
{
/*@ assert i < buf_len; */
/*@ assert p == buffer + i; */
const unsigned char c=*p;
if(c=='\0')
return i;
/* Reject some invalid UTF-8 sequences */
if(c==0xc0 || c==0xc1 || c==0xf7 || c>=0xfd)
return i;
/*@ assert i + 1 >= buf_len || \valid_read(p+1); */
/*@ assert i + 2 >= buf_len || \valid_read(p+2); */
if((c & 0xf0)==0xe0 &&
(i+1 >= buf_len || (*(p+1) & 0xc0)==0x80) &&
(i+2 >= buf_len || (*(p+2) & 0xc0)==0x80))
{ /* UTF8 l=3 */
#ifdef DEBUG_TXT
log_info("UTFsize i=%u l=3\n", i);
#endif
p+=3;
i+=3;
}
else if((c & 0xe0)==0xc0 &&
(i+1 >= buf_len || (*(p+1) & 0xc0)==0x80))
{ /* UTF8 l=2 */
#ifdef DEBUG_TXT
log_info("UTFsize i=%u l=2\n", i);
#endif
p+=2;
i+=2;
}
else
{ /* Ascii UCS */
#ifdef DEBUG_TXT
log_info("UTFsize i=%u l=1 ? *p=%c\n", i, c);
#endif
switch(c)
{
case 0x00:
case 0x01:
case 0x02:
case 0x03:
case 0x04:
case 0x05:
case 0x06:
case 0x07:
case 0x0b:
case 0x0c:
case 0x10:
case 0x11:
case 0x12:
case 0x13:
case 0x14:
case 0x15:
case 0x16:
case 0x17:
case 0x18:
case 0x19:
case 0x1a:
case 0x1b:
case 0x1c:
case 0x1d:
case 0x1e:
case 0x1f:
case 0x7f:
return i;
}
p++;
i++;
}
}
return (i<buf_len?i:buf_len);
}
#endif

View file

@ -1,8 +1,8 @@
/*
File: file_txt.h
File: utfsize.h
Copyright (C) 2009 Christophe GRENIER <grenier@cgsecurity.org>
Copyright (C) 2009-2021 Christophe GRENIER <grenier@cgsecurity.org>
This software is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -19,8 +19,8 @@
Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#ifndef _FILE_TXT_H
#define _FILE_TXT_H
#ifndef _UTFSIZE_H
#define _UTFSIZE_H
#ifdef __cplusplus
extern "C" {
#endif