887 lines
23 KiB
C
887 lines
23 KiB
C
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
|
|
/*
|
|
* Copyright (C) 2011 Norbert Thiebaud
|
|
* License: GPLv3
|
|
*/
|
|
|
|
/* define to activate stats reporting on hash usage*/
|
|
/* #define HASH_STAT */
|
|
|
|
/* ===============================================
|
|
* Set-up: defines to identify the system and system related properties
|
|
* ===============================================
|
|
*/
|
|
|
|
#ifdef __APPLE__
|
|
#ifdef __x86_64__
|
|
#define CORE_BIG_ENDIAN 0
|
|
#define CORE_LITTLE_ENDIAN 1
|
|
#define USE_MEMORY_ALIGNMENT 64 /* big value -> no alignment */
|
|
#else
|
|
#define CORE_BIG_ENDIAN 1
|
|
#define CORE_LITTLE_ENDIAN 0
|
|
#define USE_MEMORY_ALIGNMENT 4
|
|
#endif
|
|
|
|
#endif
|
|
#ifdef _AIX
|
|
#define CORE_BIG_ENDIAN 1
|
|
#define CORE_LITTLE_ENDIAN 0
|
|
#define USE_MEMORY_ALIGNMENT 4
|
|
#endif /* Def _AIX */
|
|
|
|
#ifdef __CYGWIN__
|
|
#define __windows
|
|
#define CORE_BIG_ENDIAN 0
|
|
#define CORE_LITTLE_ENDIAN 1
|
|
#define USE_MEMORY_ALIGNMENT 64 /* big value -> no alignment */
|
|
#endif /* Def __CYGWIN__ */
|
|
|
|
#if defined(__linux) || defined(__OpenBSD__) || \
|
|
defined(__FreeBSD__) || defined(__NetBSD__) || \
|
|
defined(__DragonFly__)
|
|
#if __BYTE_ORDER == __LITTLE_ENDIAN
|
|
#define CORE_BIG_ENDIAN 0
|
|
#define CORE_LITTLE_ENDIAN 1
|
|
#if defined(__x86_64) || defined(__i386)
|
|
#define USE_MEMORY_ALIGNMENT 64
|
|
#else
|
|
#define USE_MEMORY_ALIGNMENT 4
|
|
#endif
|
|
#else /* !(__BYTE_ORDER == __LITTLE_ENDIAN) */
|
|
#if __BYTE_ORDER == __BIG_ENDIAN
|
|
#define CORE_BIG_ENDIAN 1
|
|
#define CORE_LITTLE_ENDIAN 0
|
|
#define USE_MEMORY_ALIGNMENT 4
|
|
#endif /* __BYTE_ORDER == __BIG_ENDIAN */
|
|
#endif /* !(__BYTE_ORDER == __LITTLE_ENDIAN) */
|
|
#endif /* Def __linux || Def *BSD */
|
|
|
|
#ifdef __sun
|
|
#ifdef __sparc
|
|
#define CORE_BIG_ENDIAN 1
|
|
#define CORE_LITTLE_ENDIAN 0
|
|
#define USE_MEMORY_ALIGNMENT 4
|
|
#else /* Ndef __sparc */
|
|
#define CORE_BIG_ENDIAN 0
|
|
#define CORE_LITTLE_ENDIAN 1
|
|
#define USE_MEMORY_ALIGNMENT 4
|
|
#endif /* Ndef __sparc */
|
|
#endif /* Def __sun */
|
|
|
|
/* Note USE_MEMORY_ALIGNMENT is 4 for platform that allow short non-aligned but required int access to be aligned (e.g sparc, ppc, zos..)
|
|
* USE_MEMORY_ALIGNMENT is 2 for platform that require short and int access to be aligned (e.g hppa )
|
|
* if the platform does not have alignment requirement (x86/amd64) use a big value (i.e > 16)
|
|
*/
|
|
#ifndef USE_MEMORY_ALIGNMENT
|
|
#error "USE_MEMORY_ALIGNMENT must be defined to the proper alignment value for the platform"
|
|
#endif
|
|
|
|
#include <assert.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <sys/types.h>
|
|
#include <sys/stat.h>
|
|
#include <errno.h>
|
|
#include <fcntl.h>
|
|
#include <string.h>
|
|
#include <ctype.h>
|
|
|
|
#ifdef __windows
|
|
#include <io.h>
|
|
#else
|
|
#include <unistd.h>
|
|
#endif
|
|
|
|
/* modes */
|
|
#ifdef __windows
|
|
#define FILE_O_RDONLY _O_RDONLY
|
|
#define FILE_O_BINARY _O_BINARY
|
|
#else /* not windaube */
|
|
#define FILE_O_RDONLY O_RDONLY
|
|
#define FILE_O_BINARY 0
|
|
#endif /* not windaube */
|
|
|
|
#ifndef TRUE
|
|
#define TRUE 1
|
|
#endif
|
|
#ifndef FALSE
|
|
#define FALSE 0
|
|
#endif
|
|
|
|
|
|
#ifdef __GNUC__
|
|
#define clz __builtin_clz
|
|
#else
|
|
static inline int clz(unsigned int value)
|
|
{
|
|
int result = 32;
|
|
|
|
while(value)
|
|
{
|
|
value >>= 1;
|
|
result -= 1;
|
|
}
|
|
return result;
|
|
}
|
|
#endif
|
|
|
|
#if (USE_MEMORY_ALIGNMENT > 4)
|
|
#define get_unaligned_uint(str) (*(unsigned int*)(str))
|
|
#else
|
|
static inline unsigned int get_unaligned_uint(const unsigned char* cursor)
|
|
{
|
|
unsigned int result;
|
|
|
|
memcpy(&result, cursor, sizeof(unsigned int));
|
|
return result;
|
|
}
|
|
#endif
|
|
|
|
/* ===============================================
|
|
* memory pool for fast fix-size allocation (non-tread-safe)
|
|
* ===============================================
|
|
*/
|
|
struct pool
|
|
{
|
|
void* head_free; /**< head of a linked list of freed element */
|
|
char* fresh; /**< top of a memory block to dig new element */
|
|
char* tail; /**< to detect end of extent... when fresh pass tail */
|
|
void* extent; /**< pointer to the primary extent block */
|
|
int size_elem; /**< size of an element. */
|
|
int primary; /**< primary allocation in bytes */
|
|
int secondary; /**< secondary allocation in bytes */
|
|
};
|
|
#define POOL_ALIGN_INCREMENT 8 /**< Alignement, must be a power of 2 and of size > to sizeof(void*) */
|
|
|
|
|
|
static void* pool_take_extent(struct pool* pool, int allocate)
|
|
{
|
|
unsigned int size = 0;
|
|
void* extent;
|
|
void* data = NULL;
|
|
|
|
if(pool->extent)
|
|
{
|
|
fputs("taking a pool extent\n", stderr);
|
|
/* we already have an extent, so this is a secondary */
|
|
if(pool->secondary)
|
|
{
|
|
size = pool->secondary;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
assert(pool->primary);
|
|
size = pool->primary;
|
|
}
|
|
if(size)
|
|
{
|
|
extent = malloc(size);
|
|
if(extent)
|
|
{
|
|
*(void**)extent = pool->extent;
|
|
pool->extent = extent;
|
|
if(allocate)
|
|
{
|
|
data = ((char*)extent) + POOL_ALIGN_INCREMENT;
|
|
pool->fresh = ((char*)data) + pool->size_elem;
|
|
pool->tail = pool->fresh + (size - pool->size_elem);
|
|
}
|
|
else
|
|
{
|
|
pool->fresh = ((char*)extent) + POOL_ALIGN_INCREMENT;
|
|
pool->tail = pool->fresh + (size - pool->size_elem);
|
|
}
|
|
}
|
|
}
|
|
return data;
|
|
}
|
|
|
|
/* Create a memory pool for fix size objects
|
|
* this is a simplified implementation that
|
|
* is _not_ thread safe.
|
|
*/
|
|
struct pool* pool_create(int size_elem, int flags, int primary, int secondary)
|
|
{
|
|
struct pool* pool;
|
|
|
|
assert(primary > 0);
|
|
assert(secondary >= 0);
|
|
assert(size_elem > 0);
|
|
|
|
pool = (struct pool*)calloc(1, sizeof(struct pool));
|
|
if(!pool) return NULL;
|
|
/* Adjust the element size so that it be aligned, and so that an element could
|
|
* at least contain a void*
|
|
*/
|
|
pool->size_elem = size_elem = (size_elem + POOL_ALIGN_INCREMENT - 1) & ~(POOL_ALIGN_INCREMENT - 1);
|
|
|
|
pool->primary = (size_elem * primary) + POOL_ALIGN_INCREMENT;
|
|
pool->secondary = secondary > 0 ? (size_elem * secondary) + POOL_ALIGN_INCREMENT : 0;
|
|
pool_take_extent(pool, FALSE);
|
|
|
|
return pool;
|
|
|
|
}
|
|
|
|
void pool_destroy(struct pool* pool)
|
|
{
|
|
void* extent;
|
|
void* next;
|
|
|
|
if(pool != NULL)
|
|
{
|
|
extent = pool->extent;
|
|
while(extent)
|
|
{
|
|
next = *(void**)extent;
|
|
free(extent);
|
|
extent = next;
|
|
}
|
|
free(pool);
|
|
}
|
|
}
|
|
|
|
static inline void* pool_alloc(struct pool* pool)
|
|
{
|
|
void* data;
|
|
|
|
data = pool->head_free;
|
|
if(data == NULL)
|
|
{
|
|
/* we have no old-freed elem */
|
|
if(pool->fresh <= pool->tail)
|
|
{
|
|
/* pick a slice of the current extent */
|
|
data = (void*)pool->fresh;
|
|
pool->fresh += pool->size_elem;
|
|
}
|
|
else
|
|
{
|
|
/* allocate a new extent */
|
|
data = pool_take_extent(pool, TRUE);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/* re-used old freed element by chopipng the head of the free list */
|
|
pool->head_free = *(void**)data;
|
|
}
|
|
|
|
return data;
|
|
}
|
|
|
|
|
|
static inline void pool_free(struct pool* pool, void* data)
|
|
{
|
|
assert(pool && data);
|
|
|
|
/* stack on top of the free list */
|
|
*(void**)data = pool->head_free;
|
|
pool->head_free = data;
|
|
}
|
|
|
|
|
|
/* ===============================================
|
|
* Hash implementation custumized to be just tracking
|
|
* a unique list of string (i.e no data associated
|
|
* with the key, no need for retrieval, etc..
|
|
*
|
|
* This is tuned for the particular use-case we have here
|
|
* measures in tail_build showed that
|
|
* we can get north of 4000 distinct values stored in a hash
|
|
* the collision rate is at worse around 2%
|
|
* the collision needing an expensive memcmp to resolve
|
|
* have a rate typically at 1 per 1000
|
|
* for tail_build we register 37229 unique key
|
|
* with a total of 377 extra memcmp needed
|
|
* which is completely negligible compared to the
|
|
* number of memcmp required to eliminate duplicate
|
|
* entry (north of 2.5 millions for tail_build)
|
|
* ===============================================
|
|
*/
|
|
|
|
struct hash_elem
|
|
{
|
|
struct hash_elem* next;
|
|
const char* key;
|
|
int key_len;
|
|
};
|
|
|
|
struct hash
|
|
{
|
|
struct hash_elem** array;
|
|
struct pool* elems_pool;
|
|
int flags;
|
|
unsigned int used;
|
|
unsigned int size;
|
|
unsigned int load_limit;
|
|
#ifdef HASH_STAT
|
|
int stored;
|
|
int collisions;
|
|
int cost;
|
|
int memcmp;
|
|
#endif
|
|
};
|
|
#define HASH_F_NO_RESIZE (1<<0)
|
|
|
|
/* The following hash_compute function was adapted from :
|
|
* lookup3.c, by Bob Jenkins, May 2006, Public Domain.
|
|
*
|
|
* The changes from the original are mostly cosmetic
|
|
*/
|
|
#define hashsize(n) (1<<(n))
|
|
#define hashmask(n) (hashsize(n)-1)
|
|
#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
|
|
|
|
|
|
#if CORE_BIG_ENDIAN
|
|
#define MASK_C1 0xFFFFFF00
|
|
#define MASK_C2 0xFFFF0000
|
|
#define MASK_C3 0xFF000000
|
|
#else
|
|
#if CORE_LITTLE_ENDIAN
|
|
#define MASK_C1 0xFFFFFF
|
|
#define MASK_C2 0xFFFF
|
|
#define MASK_C3 0xFF
|
|
#else
|
|
#error "Missing Endianness definition"
|
|
#endif
|
|
#endif
|
|
|
|
|
|
#define mix(a,b,c) \
|
|
{ \
|
|
a -= c; a ^= rot(c, 4); c += b; \
|
|
b -= a; b ^= rot(a, 6); a += c; \
|
|
c -= b; c ^= rot(b, 8); b += a; \
|
|
a -= c; a ^= rot(c,16); c += b; \
|
|
b -= a; b ^= rot(a,19); a += c; \
|
|
c -= b; c ^= rot(b, 4); b += a; \
|
|
}
|
|
#define final(a,b,c) \
|
|
{ \
|
|
c ^= b; c -= rot(b,14); \
|
|
a ^= c; a -= rot(c,11); \
|
|
b ^= a; b -= rot(a,25); \
|
|
c ^= b; c -= rot(b,16); \
|
|
a ^= c; a -= rot(c,4); \
|
|
b ^= a; b -= rot(a,14); \
|
|
c ^= b; c -= rot(b,24); \
|
|
}
|
|
|
|
static unsigned int hash_compute( struct hash* hash, const char* key, int length)
|
|
{
|
|
unsigned int a;
|
|
unsigned int b;
|
|
unsigned int c; /* internal state */
|
|
const unsigned char* uk = (const unsigned char*)key;
|
|
|
|
/* Set up the internal state */
|
|
a = b = c = 0xdeadbeef + (length << 2);
|
|
|
|
/* we use this to 'hash' full path with mostly a common root
|
|
* let's now waste too much cycles hashing mostly constant stuff
|
|
*/
|
|
if(length > 36)
|
|
{
|
|
uk += length - 36;
|
|
length = 36;
|
|
}
|
|
/*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
|
|
while (length > 12)
|
|
{
|
|
a += get_unaligned_uint(uk);
|
|
b += get_unaligned_uint(uk+4);
|
|
c += get_unaligned_uint(uk+8);
|
|
mix(a,b,c);
|
|
length -= 12;
|
|
uk += 12;
|
|
}
|
|
|
|
/*----------------------------- handle the last (probably partial) block */
|
|
/* Note: we possibly over-read, which would trigger complaint from VALGRIND
|
|
* but we mask the undefined stuff if any, so we are still good, thanks
|
|
* to alignment of memory allocation and tail-memory managment overhead
|
|
* we always can read 3 bytes past the official end without triggering
|
|
* a segfault -- if you find a platform/compiler couple for which that postulat
|
|
* is false, then you just need to over-allocate by 2 more bytes in file_load()
|
|
* file_load already over-allocate by 1 to sitck a \0 at the end of the buffer.
|
|
*/
|
|
switch(length)
|
|
{
|
|
case 12: c+=get_unaligned_uint(uk+8); b+=get_unaligned_uint(uk+4); a+=get_unaligned_uint(uk); break;
|
|
case 11: c+=get_unaligned_uint(uk+8) & MASK_C1; b+=get_unaligned_uint(uk+4); a+=get_unaligned_uint(uk); break;
|
|
case 10: c+=get_unaligned_uint(uk+8) & MASK_C2; b+=get_unaligned_uint(uk+4); a+=get_unaligned_uint(uk); break;
|
|
case 9 : c+=get_unaligned_uint(uk+8) & MASK_C3; b+=get_unaligned_uint(uk+4); a+=get_unaligned_uint(uk); break;
|
|
case 8 : b+=get_unaligned_uint(uk+4); a+=get_unaligned_uint(uk); break;
|
|
case 7 : b+=get_unaligned_uint(uk+4) & MASK_C1; a+=get_unaligned_uint(uk); break;
|
|
case 6 : b+=get_unaligned_uint(uk+4) & MASK_C2; a+=get_unaligned_uint(uk); break;
|
|
case 5 : b+=get_unaligned_uint(uk+4) & MASK_C3; a+=get_unaligned_uint(uk); break;
|
|
case 4 : a+=get_unaligned_uint(uk); break;
|
|
case 3 : a+=get_unaligned_uint(uk) & MASK_C1; break;
|
|
case 2 : a+=get_unaligned_uint(uk) & MASK_C2; break;
|
|
case 1 : a+=get_unaligned_uint(uk) & MASK_C3; break;
|
|
case 0 : return c & hash->size; /* zero length strings require no mixing */
|
|
}
|
|
|
|
final(a,b,c);
|
|
return c & hash->size;
|
|
}
|
|
|
|
static void hash_destroy(struct hash* hash)
|
|
{
|
|
if(hash)
|
|
{
|
|
if(hash->array)
|
|
{
|
|
free(hash->array);
|
|
}
|
|
if(hash->elems_pool)
|
|
{
|
|
pool_destroy(hash->elems_pool);
|
|
}
|
|
free(hash);
|
|
}
|
|
}
|
|
|
|
static struct hash* hash_create(unsigned int size)
|
|
{
|
|
struct hash* hash;
|
|
|
|
assert(size > 0);
|
|
hash = calloc(1, sizeof(struct hash));
|
|
if(hash)
|
|
{
|
|
size += (size >> 2) + 1; /* ~ 75% load factor */
|
|
if(size >= 15)
|
|
{
|
|
hash->size = (((unsigned int)0xFFFFFFFF) >> clz((unsigned int)size));
|
|
}
|
|
else
|
|
{
|
|
hash->size = size = 15;
|
|
}
|
|
hash->load_limit = hash->size - (hash->size >> 2);
|
|
hash->used = 0;
|
|
hash->array = (struct hash_elem**)calloc(hash->size + 1, sizeof(struct hash_elem*));
|
|
if(hash->array == NULL)
|
|
{
|
|
hash_destroy(hash);
|
|
hash = NULL;
|
|
}
|
|
}
|
|
if(hash)
|
|
{
|
|
hash->elems_pool = pool_create(sizeof(struct hash_elem),
|
|
0, size, size << 1);
|
|
if(!hash->elems_pool)
|
|
{
|
|
hash_destroy(hash);
|
|
hash = NULL;
|
|
}
|
|
}
|
|
return hash;
|
|
}
|
|
|
|
static void hash_resize(struct hash* hash)
|
|
{
|
|
unsigned int old_size = hash->size;
|
|
unsigned int hashed;
|
|
struct hash_elem* hash_elem;
|
|
struct hash_elem* next;
|
|
struct hash_elem** array;
|
|
int i;
|
|
|
|
hash->size = (old_size << 1) + 1;
|
|
/* we really should avoid to get there... so print a message to alert of the condition */
|
|
fprintf(stderr, "resize hash %d -> %d\n", old_size, hash->size);
|
|
if(hash->size == old_size)
|
|
{
|
|
hash->flags |= HASH_F_NO_RESIZE;
|
|
return;
|
|
}
|
|
array = calloc(hash->size + 1, sizeof(struct hash_elem*));
|
|
if(array)
|
|
{
|
|
hash->load_limit = hash->size - (hash->size >> 2);
|
|
for(i=0; i <= old_size; i++)
|
|
{
|
|
hash_elem = (struct hash_elem*)hash->array[i];
|
|
while(hash_elem)
|
|
{
|
|
next = hash_elem->next;
|
|
|
|
hashed = hash_compute(hash, hash_elem->key, hash_elem->key_len);
|
|
hash_elem->next = array[hashed];
|
|
array[hashed] = hash_elem;
|
|
hash_elem = next;
|
|
}
|
|
}
|
|
free(hash->array);
|
|
hash->array = (struct hash_elem**)array;
|
|
}
|
|
else
|
|
{
|
|
hash->size = old_size;
|
|
hash->flags |= HASH_F_NO_RESIZE;
|
|
}
|
|
}
|
|
|
|
#ifdef HASH_STAT
|
|
static inline int compare_key(struct hash* hash, const char* a, const char* b, int len, int* cost)
|
|
{
|
|
*cost += 1;
|
|
hash->memcmp += 1;
|
|
return memcmp(a,b, len);
|
|
}
|
|
#else
|
|
#define compare_key(h,a,b,l,c) memcmp(a,b,l)
|
|
#endif
|
|
|
|
/* a customized hash_store function that just store the key and return
|
|
* TRUE if the key was effectively stored, or FALSE if the key was already there
|
|
*/
|
|
static int hash_store(struct hash* hash, const char* key, int key_len)
|
|
{
|
|
unsigned int hashed;
|
|
struct hash_elem* hash_elem;
|
|
int cost = 0;
|
|
|
|
hashed = hash_compute(hash, key, key_len);
|
|
#ifdef HASH_STAT
|
|
hash->stored += 1;
|
|
#endif
|
|
hash_elem = (struct hash_elem*)hash->array[hashed];
|
|
while(hash_elem && (hash_elem->key_len != key_len || compare_key(hash, hash_elem->key, key, key_len, &cost)))
|
|
{
|
|
hash_elem = hash_elem->next;
|
|
}
|
|
|
|
if(!hash_elem)
|
|
{
|
|
hash_elem = pool_alloc(hash->elems_pool);
|
|
if(hash_elem)
|
|
{
|
|
hash_elem->key = key;
|
|
hash_elem->key_len = key_len;
|
|
hash_elem->next = hash->array[hashed];
|
|
|
|
#ifdef HASH_STAT
|
|
if(hash_elem->next)
|
|
{
|
|
hash->collisions += 1;
|
|
hash->cost += cost;
|
|
}
|
|
#endif
|
|
hash->array[hashed] = hash_elem;
|
|
hash->used += 1;
|
|
if(hash->used > hash->load_limit)
|
|
{
|
|
hash_resize(hash);
|
|
}
|
|
}
|
|
return TRUE;
|
|
}
|
|
return FALSE;
|
|
}
|
|
|
|
static int file_stat(const char* name, struct stat* buffer_stat, int* rc)
|
|
{
|
|
int rc_local = 0;
|
|
|
|
rc_local = stat(name, buffer_stat);
|
|
if (rc_local < 0)
|
|
{
|
|
*rc = errno;
|
|
}
|
|
return rc_local;
|
|
}
|
|
|
|
static off_t file_get_size(const char* name, int* rc)
|
|
{
|
|
struct stat buffer_stat;
|
|
off_t size = -1;
|
|
|
|
if (!file_stat(name, &buffer_stat, rc))
|
|
{
|
|
if(S_ISREG(buffer_stat.st_mode))
|
|
{
|
|
size = buffer_stat.st_size;
|
|
}
|
|
else
|
|
{
|
|
*rc = EINVAL;
|
|
}
|
|
}
|
|
return size;
|
|
}
|
|
|
|
static char* file_load(const char* name, off_t* size, int* return_rc)
|
|
{
|
|
off_t local_size = 0;
|
|
int rc = 0;
|
|
char* buffer = NULL;
|
|
int fd;
|
|
|
|
assert(name != NULL);
|
|
|
|
if(!size)
|
|
{
|
|
size = &local_size;
|
|
}
|
|
*size = file_get_size(name, &rc);
|
|
if (!rc)
|
|
{
|
|
fd = open(name, FILE_O_RDONLY | FILE_O_BINARY);
|
|
if (!(fd == -1))
|
|
{
|
|
buffer = malloc((size_t)(*size + 1));
|
|
if (buffer == NULL)
|
|
{
|
|
rc = ENOMEM;
|
|
}
|
|
else
|
|
{
|
|
ssize_t i;
|
|
|
|
REDO:
|
|
i = read(fd, buffer, (size_t)(*size));
|
|
if(i == -1)
|
|
{
|
|
if(errno == EINTR)
|
|
{
|
|
goto REDO;
|
|
}
|
|
else
|
|
{
|
|
rc = errno;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (i != *size)
|
|
{
|
|
rc = EIO;
|
|
}
|
|
}
|
|
close(fd);
|
|
buffer[*size] = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
if(rc && buffer)
|
|
{
|
|
free(buffer);
|
|
buffer = NULL;
|
|
}
|
|
if(return_rc)
|
|
{
|
|
*return_rc = rc;
|
|
}
|
|
return buffer;
|
|
}
|
|
|
|
static void _cancel_relative(char* base, char** ref_cursor, char** ref_cursor_out, char* end)
|
|
{
|
|
char* cursor = *ref_cursor;
|
|
char* cursor_out = *ref_cursor_out;
|
|
|
|
do
|
|
{
|
|
cursor += 3;
|
|
while(cursor_out > base && cursor_out[-1] == '/')
|
|
cursor_out--;
|
|
while(cursor_out > base && *--cursor_out != '/');
|
|
}
|
|
while(cursor + 3 < end && !memcmp(cursor, "/../", 4));
|
|
*ref_cursor = cursor;
|
|
*ref_cursor_out = cursor_out;
|
|
}
|
|
|
|
static int _process(struct hash* dep_hash, char* fn)
|
|
{
|
|
int rc;
|
|
char* buffer;
|
|
char* end;
|
|
char* cursor;
|
|
char* cursor_out;
|
|
char* base;
|
|
int continuation = 0;
|
|
char last_ns = 0;
|
|
off_t size;
|
|
|
|
buffer = file_load(fn, &size, &rc);
|
|
/* Note: yes we are going to leak 'buffer'
|
|
* this is on purpose, to avoid cloning the 'key' out of it
|
|
* and our special 'hash' just store the pointer to the key
|
|
* inside of buffer, hence it need to remain allocated
|
|
*/
|
|
if(!rc)
|
|
{
|
|
base = cursor_out = cursor = end = buffer;
|
|
end += size;
|
|
while(cursor < end)
|
|
{
|
|
if(*cursor == '\\')
|
|
{
|
|
continuation = 1;
|
|
*cursor_out++ = *cursor++;
|
|
}
|
|
else if(*cursor == '/')
|
|
{
|
|
if(cursor + 3 < end)
|
|
{
|
|
if(!memcmp(cursor, "/../", 4))
|
|
{
|
|
_cancel_relative(base, &cursor, &cursor_out, end);
|
|
}
|
|
}
|
|
*cursor_out++ = *cursor++;
|
|
}
|
|
else if(*cursor == '\n')
|
|
{
|
|
if(!continuation)
|
|
{
|
|
*cursor_out = 0;
|
|
if(base < cursor)
|
|
{
|
|
/* here we have a complete rule */
|
|
if(last_ns == ':')
|
|
{
|
|
/* if the rule ended in ':' that is a no-dep rule
|
|
* these are the one for which we want to filter
|
|
* duplicate out
|
|
*/
|
|
if(hash_store(dep_hash, base, (int)(cursor_out - base)))
|
|
{
|
|
puts(base);
|
|
putc('\n', stdout);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/* rule with dep, just write it */
|
|
puts(base);
|
|
putc('\n', stdout);
|
|
}
|
|
}
|
|
cursor += 1;
|
|
base = cursor_out = cursor;
|
|
}
|
|
else
|
|
{
|
|
/* here we have a '\' followed by \n this is a continuation
|
|
* i.e not a complete rule yet
|
|
*/
|
|
*cursor_out++ = *cursor++;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
continuation = 0;
|
|
/* not using isspace() here save 25% of I refs and 75% of D refs based on cachegrind */
|
|
if(*cursor != ' ' && *cursor != '\n' && *cursor != '\t' )
|
|
{
|
|
last_ns = *cursor;
|
|
}
|
|
*cursor_out++ = *cursor++;
|
|
}
|
|
}
|
|
/* just in case the file did not end with a \n, there may be a pending rule */
|
|
if(base < cursor_out)
|
|
{
|
|
if(last_ns == ':')
|
|
{
|
|
if(hash_store(dep_hash, base, (int)(cursor_out - base)))
|
|
{
|
|
puts(base);
|
|
putc('\n', stdout);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
puts(base);
|
|
putc('\n', stdout);
|
|
}
|
|
}
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
static void _usage(void)
|
|
{
|
|
fputs("Usage: concat-deps <file that contains dep_files>\n", stderr);
|
|
}
|
|
|
|
#define kDEFAULT_HASH_SIZE 4096
|
|
|
|
int main(int argc, char** argv)
|
|
{
|
|
int rc = 0;
|
|
off_t in_list_size = 0;
|
|
char* in_list;
|
|
char* in_list_cursor;
|
|
char* in_list_base;
|
|
struct hash* dep_hash;
|
|
char* base_dir;
|
|
|
|
if(argc < 2)
|
|
{
|
|
_usage();
|
|
return 1;
|
|
}
|
|
base_dir = getenv("SRCDIR");
|
|
if(!base_dir)
|
|
{
|
|
fputs("Error: SRCDIR si missing in the environement\n", stderr);
|
|
return 1;
|
|
}
|
|
in_list = file_load(argv[1], &in_list_size, &rc);
|
|
if(!rc)
|
|
{
|
|
dep_hash = hash_create( kDEFAULT_HASH_SIZE);
|
|
in_list_base = in_list_cursor = in_list;
|
|
|
|
/* extract filename of dep file from a 'space' separated list */
|
|
while(*in_list_cursor)
|
|
{
|
|
if(*in_list_cursor == ' ' || *in_list_cursor == '\n')
|
|
{
|
|
*in_list_cursor = 0;
|
|
if(in_list_base < in_list_cursor)
|
|
{
|
|
rc = _process(dep_hash, in_list_base);
|
|
if(rc)
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
in_list_cursor += 1;
|
|
in_list_base = in_list_cursor;
|
|
}
|
|
else
|
|
{
|
|
in_list_cursor += 1;
|
|
}
|
|
}
|
|
if(!rc)
|
|
{
|
|
/* catch the last entry in case the input did not terminate with a 'space' */
|
|
if(in_list_base < in_list_cursor)
|
|
{
|
|
rc = _process(dep_hash, in_list_base);
|
|
}
|
|
}
|
|
#ifdef HASH_STAT
|
|
fprintf(stderr, "stats: u:%d s:%d l:%d t:%d c:%d m:%d $:%d\n",
|
|
dep_hash->used, dep_hash->size, dep_hash->load_limit, dep_hash->stored,
|
|
dep_hash->collisions, dep_hash->memcmp, dep_hash->cost);
|
|
#endif
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|