src/file_mkv.c: rewrite mkv parser
This commit is contained in:
parent
c8d6f3ad89
commit
96ecc02a96
1 changed files with 125 additions and 48 deletions
169
src/file_mkv.c
169
src/file_mkv.c
|
@ -35,13 +35,14 @@
|
||||||
#include "types.h"
|
#include "types.h"
|
||||||
#include "filegen.h"
|
#include "filegen.h"
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "memmem.h"
|
|
||||||
#ifdef DEBUG_MKV
|
#ifdef DEBUG_MKV
|
||||||
#include "log.h"
|
#include "log.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/*@
|
||||||
|
@ requires \valid(file_stat);
|
||||||
|
@*/
|
||||||
static void register_header_check_mkv(file_stat_t *file_stat);
|
static void register_header_check_mkv(file_stat_t *file_stat);
|
||||||
static int header_check_mkv(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new);
|
|
||||||
|
|
||||||
const file_hint_t file_hint_mkv= {
|
const file_hint_t file_hint_mkv= {
|
||||||
.extension="mkv",
|
.extension="mkv",
|
||||||
|
@ -52,71 +53,127 @@ const file_hint_t file_hint_mkv= {
|
||||||
.register_header_check=®ister_header_check_mkv
|
.register_header_check=®ister_header_check_mkv
|
||||||
};
|
};
|
||||||
|
|
||||||
static const unsigned char *EBML_find(const unsigned char *buffer, const unsigned int buffer_size, const unsigned char *EBML_Header, const unsigned int EBML_size)
|
/*@
|
||||||
{
|
@ requires \valid_read(p + (0 .. p_size-1));
|
||||||
const unsigned char *tmp=(const unsigned char *)td_memmem(buffer, buffer_size, EBML_Header, EBML_size);
|
@ requires \valid(uint64);
|
||||||
if(tmp==NULL)
|
@ requires \separated(p + (..), uint64);
|
||||||
return NULL;
|
@ ensures -1 == \result || (1 <= \result <= 8);
|
||||||
return tmp+EBML_size;
|
@ ensures -1 != \result ==> \initialized(uint64);
|
||||||
}
|
@ ensures -1 != \result ==> *uint64 <= 0xfeffffffffffffff;
|
||||||
|
@ assigns *uint64;
|
||||||
|
@*/
|
||||||
static int EBML_read_unsigned(const unsigned char *p, const unsigned int p_size, uint64_t *uint64)
|
static int EBML_read_unsigned(const unsigned char *p, const unsigned int p_size, uint64_t *uint64)
|
||||||
{
|
{
|
||||||
unsigned char test_bit = 0x80;
|
unsigned char test_bit = 0x80;
|
||||||
unsigned int i, bytes = 1;
|
unsigned int i, bytes = 1;
|
||||||
if(p_size==0 || *p== 0x00)
|
const unsigned char c=*p;
|
||||||
|
uint64_t val;
|
||||||
|
if(p_size==0 || c== 0x00)
|
||||||
return -1;
|
return -1;
|
||||||
while((*p & test_bit) != test_bit)
|
/*@ assert c != 0; */
|
||||||
|
/*@
|
||||||
|
@ loop invariant test_bit > 0;
|
||||||
|
@ loop invariant test_bit == (0x100 >> bytes);
|
||||||
|
@ loop assigns test_bit, bytes;
|
||||||
|
@ loop unroll 8;
|
||||||
|
@*/
|
||||||
|
while((c & test_bit) != test_bit)
|
||||||
{
|
{
|
||||||
|
/*@ assert c < test_bit; */
|
||||||
test_bit >>= 1;
|
test_bit >>= 1;
|
||||||
bytes++;
|
bytes++;
|
||||||
}
|
}
|
||||||
|
/*@ assert (c & test_bit) == test_bit; */
|
||||||
|
/*@ assert 1 <= bytes <= 8; */
|
||||||
|
/*@ assert c >= test_bit; */
|
||||||
if(p_size < bytes)
|
if(p_size < bytes)
|
||||||
return -1;
|
return -1;
|
||||||
*uint64 = *p - test_bit; //eliminate first bit
|
/*@ assert bytes <= p_size; */
|
||||||
|
val = c - test_bit; //eliminate first bit, val < 0x80
|
||||||
|
/*@ assert val <= 0xfe; */
|
||||||
|
/*@
|
||||||
|
@ loop assigns i, val;
|
||||||
|
@ loop unroll 8;
|
||||||
|
@ loop variant bytes-i;
|
||||||
|
@*/
|
||||||
for(i=1; i<bytes; i++)
|
for(i=1; i<bytes; i++)
|
||||||
{
|
{
|
||||||
*uint64 <<= 8;
|
val <<= 8;
|
||||||
*uint64 += p[i];
|
val += p[i];
|
||||||
}
|
}
|
||||||
|
/*@ assert val <= 0xfeffffffffffffff; */
|
||||||
|
*uint64 = val;
|
||||||
return bytes;
|
return bytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int EBML_read_string(const unsigned char *p, const unsigned int p_size, char **string)
|
/*@
|
||||||
|
@ requires EBML_size > 0;
|
||||||
|
@ requires \valid_read(buffer + (0 .. buffer_size-1));
|
||||||
|
@ requires \valid_read(EBML_Header + (0 .. EBML_size-1));
|
||||||
|
@ assigns \result;
|
||||||
|
@*/
|
||||||
|
static int EBML_find(const unsigned char *buffer, const unsigned int buffer_size, const unsigned char *EBML_Header, const unsigned int EBML_size)
|
||||||
{
|
{
|
||||||
uint64_t strlength;
|
unsigned int offset=0;
|
||||||
unsigned char test_bit = 0x80;
|
/*@
|
||||||
unsigned int i, bytes = 1;
|
@ loop assigns offset;
|
||||||
if(p_size==0 || *p== 0x00)
|
@*/
|
||||||
|
while(offset < buffer_size)
|
||||||
|
{
|
||||||
|
uint64_t uint64=0;
|
||||||
|
int bytes;
|
||||||
|
bytes = EBML_read_unsigned(&buffer[offset], buffer_size-offset, &uint64);
|
||||||
|
#ifdef DEBUG_MKV
|
||||||
|
log_info("EBML_find %02x%02x bytes=%d\n", buffer[offset], buffer[offset+1], bytes);
|
||||||
|
#endif
|
||||||
|
if(bytes <= 0)
|
||||||
return -1;
|
return -1;
|
||||||
while((*p & test_bit) != test_bit)
|
if((unsigned int)bytes == EBML_size && memcmp(&buffer[offset], EBML_Header, EBML_size)==0)
|
||||||
{
|
{
|
||||||
test_bit >>= 1;
|
return offset+bytes;
|
||||||
bytes++;
|
|
||||||
}
|
}
|
||||||
if(p_size < bytes)
|
offset += bytes;
|
||||||
|
if(offset >= buffer_size)
|
||||||
return -1;
|
return -1;
|
||||||
strlength = (uint64_t)(*p - test_bit); //eliminate first bit
|
bytes = EBML_read_unsigned(&buffer[offset], buffer_size-offset, &uint64);
|
||||||
for(i=1; i<bytes; i++)
|
if(bytes <= 0 || uint64 > buffer_size)
|
||||||
{
|
return -1;
|
||||||
strlength <<= 8;
|
offset += bytes;
|
||||||
strlength += p[i];
|
offset += uint64;
|
||||||
}
|
}
|
||||||
if(strlength + bytes > p_size)
|
|
||||||
return -1;
|
return -1;
|
||||||
*string = (char *)MALLOC(strlength+1);
|
}
|
||||||
memcpy(*string, p+bytes, strlength);
|
|
||||||
(*string)[strlength] = '\0';
|
/*@
|
||||||
return bytes+strlength;
|
@ requires \valid_read(p + (0 .. p_size-1));
|
||||||
|
@ requires \valid(strlength);
|
||||||
|
@ requires \separated(p + (..), strlength);
|
||||||
|
@ ensures -1 == \result || (1 <= \result <= 8);
|
||||||
|
@ ensures -1 != \result ==> \initialized(strlength);
|
||||||
|
@ ensures -1 != \result ==> *strlength <= 0xfeffffffffffffff;
|
||||||
|
@ ensures -1 != \result ==> \result + *strlength <= p_size;
|
||||||
|
@ assigns *strlength, \result;
|
||||||
|
@*/
|
||||||
|
static int EBML_read_string(const unsigned char *p, const unsigned int p_size, uint64_t *strlength)
|
||||||
|
{
|
||||||
|
int bytes;
|
||||||
|
*strlength = 0;
|
||||||
|
bytes = EBML_read_unsigned(p, p_size, strlength);
|
||||||
|
#ifdef DEBUG_MKV
|
||||||
|
log_info("EBML_read_string bytes=%d strlength=%llu\n", bytes, (long long unsigned)*strlength);
|
||||||
|
#endif
|
||||||
|
if(bytes <= 0)
|
||||||
|
return -1;
|
||||||
|
/*@ assert 1 <= bytes <= 8; */
|
||||||
|
/*@ assert *strlength <= 0xfeffffffffffffff; */
|
||||||
|
if(bytes + *strlength > p_size)
|
||||||
|
return -1;
|
||||||
|
/*@ assert bytes + *strlength <= p_size; */
|
||||||
|
return bytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
static const unsigned char EBML_header[4]= { 0x1a,0x45,0xdf,0xa3};
|
static const unsigned char EBML_header[4]= { 0x1a,0x45,0xdf,0xa3};
|
||||||
|
|
||||||
static void register_header_check_mkv(file_stat_t *file_stat)
|
|
||||||
{
|
|
||||||
register_header_check(0, EBML_header,sizeof(EBML_header), &header_check_mkv, file_stat);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int header_check_mkv(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new)
|
static int header_check_mkv(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new)
|
||||||
{
|
{
|
||||||
if(memcmp(buffer,EBML_header,sizeof(EBML_header))!=0)
|
if(memcmp(buffer,EBML_header,sizeof(EBML_header))!=0)
|
||||||
|
@ -126,12 +183,14 @@ static int header_check_mkv(const unsigned char *buffer, const unsigned int buff
|
||||||
const unsigned char EBML_Segment[4]= { 0x18,0x53,0x80,0x67};
|
const unsigned char EBML_Segment[4]= { 0x18,0x53,0x80,0x67};
|
||||||
uint64_t segment_size=0;
|
uint64_t segment_size=0;
|
||||||
uint64_t header_data_size=0;
|
uint64_t header_data_size=0;
|
||||||
char *doctype=NULL;
|
|
||||||
const unsigned char *p;
|
const unsigned char *p;
|
||||||
unsigned int header_data_offset;
|
unsigned int header_data_offset;
|
||||||
unsigned int segment_offset;
|
unsigned int segment_offset;
|
||||||
unsigned int segment_data_offset;
|
unsigned int segment_data_offset;
|
||||||
|
uint64_t strlength = 0;
|
||||||
|
int bytes;
|
||||||
int len;
|
int len;
|
||||||
|
int offset_doctype;
|
||||||
|
|
||||||
if((len=EBML_read_unsigned(buffer+sizeof(EBML_header),
|
if((len=EBML_read_unsigned(buffer+sizeof(EBML_header),
|
||||||
buffer_size-sizeof(EBML_header), &header_data_size)) < 0)
|
buffer_size-sizeof(EBML_header), &header_data_size)) < 0)
|
||||||
|
@ -139,10 +198,14 @@ static int header_check_mkv(const unsigned char *buffer, const unsigned int buff
|
||||||
header_data_offset = sizeof(EBML_header) + len;
|
header_data_offset = sizeof(EBML_header) + len;
|
||||||
if(header_data_offset >= buffer_size)
|
if(header_data_offset >= buffer_size)
|
||||||
return 0;
|
return 0;
|
||||||
segment_offset = header_data_offset + header_data_size;
|
|
||||||
#ifdef DEBUG_MKV
|
#ifdef DEBUG_MKV
|
||||||
log_info("header_data_offset %llu\n", (long long unsigned) header_data_offset);
|
log_info("header_data_offset %llu\n", (long long unsigned) header_data_offset);
|
||||||
log_info("header_data_size %llu\n", (long long unsigned) header_data_size);
|
log_info("header_data_size %llu\n", (long long unsigned) header_data_size);
|
||||||
|
#endif
|
||||||
|
if(header_data_size >= buffer_size)
|
||||||
|
return 0;
|
||||||
|
segment_offset = header_data_offset + header_data_size;
|
||||||
|
#ifdef DEBUG_MKV
|
||||||
log_info("segment_offset %llu\n", (long long unsigned) segment_offset);
|
log_info("segment_offset %llu\n", (long long unsigned) segment_offset);
|
||||||
#endif
|
#endif
|
||||||
if(segment_offset +sizeof(EBML_Segment) >= buffer_size)
|
if(segment_offset +sizeof(EBML_Segment) >= buffer_size)
|
||||||
|
@ -161,17 +224,26 @@ static int header_check_mkv(const unsigned char *buffer, const unsigned int buff
|
||||||
log_info("segment size %llu\n", (long long unsigned) segment_size);
|
log_info("segment size %llu\n", (long long unsigned) segment_size);
|
||||||
#endif
|
#endif
|
||||||
/* get EBML_DocType, it will be used to set the file extension */
|
/* get EBML_DocType, it will be used to set the file extension */
|
||||||
p=EBML_find(&buffer[header_data_offset], header_data_size, EBML_DocType, sizeof(EBML_DocType));
|
offset_doctype=EBML_find(&buffer[header_data_offset], header_data_size, EBML_DocType, sizeof(EBML_DocType));
|
||||||
if (p == NULL || EBML_read_string(p, header_data_size-(p-&buffer[header_data_offset]), &doctype) < 0)
|
#ifdef DEBUG_MKV
|
||||||
|
log_info("offset_doctype = %u\n", offset_doctype);
|
||||||
|
#endif
|
||||||
|
if(offset_doctype < 0 || header_data_size <= (uint64_t)offset_doctype)
|
||||||
|
return 0;
|
||||||
|
/*@ assert header_data_size > offset_doctype; */
|
||||||
|
p = &buffer[header_data_offset+offset_doctype];
|
||||||
|
bytes = EBML_read_string(&buffer[header_data_offset+offset_doctype], header_data_size-offset_doctype, &strlength);
|
||||||
|
if (bytes < 0)
|
||||||
return 0;
|
return 0;
|
||||||
reset_file_recovery(file_recovery_new);
|
reset_file_recovery(file_recovery_new);
|
||||||
if(strcmp(doctype,"matroska")==0)
|
if( (strlength == 8 && memcmp(p+bytes,"matroska", 8)==0) ||
|
||||||
|
(strlength == 9 && memcmp(p+bytes,"matroska", 9)==0))
|
||||||
file_recovery_new->extension=file_hint_mkv.extension;
|
file_recovery_new->extension=file_hint_mkv.extension;
|
||||||
else if(strcmp(doctype,"webm")==0)
|
else if((strlength == 4 && memcmp(p+bytes,"webm", 4)==0) ||
|
||||||
|
( strlength == 5 && memcmp(p+bytes,"webm", 5)==0))
|
||||||
file_recovery_new->extension="webm";
|
file_recovery_new->extension="webm";
|
||||||
else
|
else
|
||||||
file_recovery_new->extension="ebml";
|
file_recovery_new->extension="ebml";
|
||||||
free(doctype);
|
|
||||||
if(segment_size > 0)
|
if(segment_size > 0)
|
||||||
{
|
{
|
||||||
file_recovery_new->calculated_file_size = segment_data_offset + segment_size;
|
file_recovery_new->calculated_file_size = segment_data_offset + segment_size;
|
||||||
|
@ -184,4 +256,9 @@ static int header_check_mkv(const unsigned char *buffer, const unsigned int buff
|
||||||
}
|
}
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void register_header_check_mkv(file_stat_t *file_stat)
|
||||||
|
{
|
||||||
|
register_header_check(0, EBML_header,sizeof(EBML_header), &header_check_mkv, file_stat);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Reference in a new issue