Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * Copyright (C) 2011 Norbert Thiebaud
4 : * License: GPLv3
5 : */
6 :
7 : /* define to activate stats reporting on hash usage*/
8 : /* #define HASH_STAT */
9 :
10 : /* ===============================================
11 : * Set-up: defines to identify the system and system related properties
12 : * ===============================================
13 : */
14 :
15 : #ifdef __APPLE__
16 : #ifdef __x86_64__
17 : #undef CORE_BIG_ENDIAN
18 : #define CORE_LITTLE_ENDIAN
19 : #define USE_MEMORY_ALIGNMENT 64 /* big value -> no alignment */
20 : #else
21 : #define CORE_BIG_ENDIAN
22 : #undef CORE_LITTLE_ENDIAN
23 : #define USE_MEMORY_ALIGNMENT 4
24 : #endif
25 :
26 : #endif
27 : #ifdef _AIX
28 : #define CORE_BIG_ENDIAN
29 : #undef CORE_LITTLE_ENDIAN
30 : #define USE_MEMORY_ALIGNMENT 4
31 : #endif /* Def _AIX */
32 :
33 : #ifdef _MSC_VER
34 : #define __windows
35 : #undef CORE_BIG_ENDIAN
36 : #define CORE_LITTLE_ENDIAN
37 : #define USE_MEMORY_ALIGNMENT 64 /* big value -> no alignment */
38 : #endif /* Def _MSC_VER */
39 :
40 : #if defined(__linux) || defined(__OpenBSD__) || \
41 : defined(__FreeBSD__) || defined(__NetBSD__) || \
42 : defined(__DragonFly__) || defined(__FreeBSD_kernel__)
43 : #include <sys/param.h>
44 : #if __BYTE_ORDER == __LITTLE_ENDIAN
45 : #undef CORE_BIG_ENDIAN
46 : #define CORE_LITTLE_ENDIAN
47 : #if defined(__x86_64) || defined(__i386)
48 : #define USE_MEMORY_ALIGNMENT 64
49 : #else
50 : #define USE_MEMORY_ALIGNMENT 4
51 : #endif
52 : #else /* !(__BYTE_ORDER == __LITTLE_ENDIAN) */
53 : #if __BYTE_ORDER == __BIG_ENDIAN
54 : #define CORE_BIG_ENDIAN
55 : #undef CORE_LITTLE_ENDIAN
56 : #define USE_MEMORY_ALIGNMENT 4
57 : #endif /* __BYTE_ORDER == __BIG_ENDIAN */
58 : #endif /* !(__BYTE_ORDER == __LITTLE_ENDIAN) */
59 : #endif /* Def __linux || Def *BSD */
60 :
61 : #ifdef __sun
62 : #ifdef __sparc
63 : #define CORE_BIG_ENDIAN
64 : #undef CORE_LITTLE_ENDIAN
65 : #define USE_MEMORY_ALIGNMENT 4
66 : #else /* Ndef __sparc */
67 : #undef CORE_BIG_ENDIAN
68 : #define CORE_LITTLE_ENDIAN
69 : #define USE_MEMORY_ALIGNMENT 4
70 : #endif /* Ndef __sparc */
71 : #endif /* Def __sun */
72 :
73 : /* Note USE_MEMORY_ALIGNMENT is 4 for platform that allow short non-aligned but required int access to be aligned (e.g sparc, ppc, zos..)
74 : * USE_MEMORY_ALIGNMENT is 2 for platform that require short and int access to be aligned (e.g hppa )
75 : * if the platform does not have alignment requirement (x86/amd64) use a big value (i.e > 16)
76 : */
77 : #ifndef USE_MEMORY_ALIGNMENT
78 : #error "USE_MEMORY_ALIGNMENT must be defined to the proper alignment value for the platform"
79 : #endif
80 :
81 : #include <assert.h>
82 : #include <stdio.h>
83 : #include <stdlib.h>
84 : #include <sys/types.h>
85 : #include <sys/stat.h>
86 : #include <errno.h>
87 : #include <fcntl.h>
88 : #include <string.h>
89 : #include <ctype.h>
90 :
91 : #ifdef __windows
92 : #include <io.h>
93 : #else
94 : #include <unistd.h>
95 : #endif
96 :
97 : /* modes */
98 : #ifdef __windows
99 : #define FILE_O_RDONLY _O_RDONLY
100 : #define FILE_O_BINARY _O_BINARY
101 : #define PATHNCMP _strnicmp /* MSVC converts paths to lower-case sometimes? */
102 : #define inline __inline
103 : #define ssize_t long
104 : #define S_ISREG(mode) (((mode) & _S_IFMT) == (_S_IFREG)) /* MSVC does not have this macro */
105 : #else /* not windaube */
106 : #define FILE_O_RDONLY O_RDONLY
107 : #define FILE_O_BINARY 0
108 : #define PATHNCMP strncmp
109 : #endif /* not windaube */
110 :
111 : #ifndef TRUE
112 : #define TRUE 1
113 : #endif
114 : #ifndef FALSE
115 : #define FALSE 0
116 : #endif
117 :
118 : int internal_boost = 0;
119 : static char* base_dir;
120 : static char* work_dir;
121 : int work_dir_len;
122 :
123 : #ifdef __GNUC__
124 : #define clz __builtin_clz
125 : #else
126 : static inline int clz(unsigned int value)
127 : {
128 : int result = 32;
129 :
130 : while(value)
131 : {
132 : value >>= 1;
133 : result -= 1;
134 : }
135 : return result;
136 : }
137 : #endif
138 :
139 : #if (USE_MEMORY_ALIGNMENT > 4)
140 : #define get_unaligned_uint(str) (*(unsigned int*)(str))
141 : #else
142 : static inline unsigned int get_unaligned_uint(const unsigned char* cursor)
143 : {
144 : unsigned int result;
145 :
146 : memcpy(&result, cursor, sizeof(unsigned int));
147 : return result;
148 : }
149 : #endif
150 :
151 : /* ===============================================
152 : * memory pool for fast fix-size allocation (non-tread-safe)
153 : * ===============================================
154 : */
155 : struct pool
156 : {
157 : void* head_free; /**< head of a linked list of freed element */
158 : char* fresh; /**< top of a memory block to dig new element */
159 : char* tail; /**< to detect end of extent... when fresh pass tail */
160 : void* extent; /**< pointer to the primary extent block */
161 : int size_elem; /**< size of an element. */
162 : int primary; /**< primary allocation in bytes */
163 : int secondary; /**< secondary allocation in bytes */
164 : };
165 : #define POOL_ALIGN_INCREMENT 8 /**< Alignement, must be a power of 2 and of size > to sizeof(void*) */
166 :
167 :
168 1073 : static void* pool_take_extent(struct pool* pool, int allocate)
169 : {
170 : unsigned int size = 0;
171 : void* extent;
172 : void* data = NULL;
173 :
174 1073 : if(pool->extent)
175 : {
176 : /* we already have an extent, so this is a secondary */
177 0 : if(pool->secondary)
178 : {
179 0 : size = pool->secondary;
180 : }
181 : }
182 : else
183 : {
184 : assert(pool->primary);
185 1073 : size = pool->primary;
186 : }
187 1073 : if(size)
188 : {
189 1073 : extent = malloc(size);
190 1073 : if(extent)
191 : {
192 1073 : *(void**)extent = pool->extent;
193 1073 : pool->extent = extent;
194 1073 : if(allocate)
195 : {
196 0 : data = ((char*)extent) + POOL_ALIGN_INCREMENT;
197 0 : pool->fresh = ((char*)data) + pool->size_elem;
198 0 : pool->tail = pool->fresh + (size - pool->size_elem);
199 : }
200 : else
201 : {
202 1073 : pool->fresh = ((char*)extent) + POOL_ALIGN_INCREMENT;
203 1073 : pool->tail = pool->fresh + (size - pool->size_elem);
204 : }
205 : }
206 : }
207 1073 : return data;
208 : }
209 :
210 : /* Create a memory pool for fix size objects
211 : * this is a simplified implementation that
212 : * is _not_ thread safe.
213 : */
214 1073 : struct pool* pool_create(int size_elem, int primary, int secondary)
215 : {
216 : struct pool* pool;
217 :
218 : assert(primary > 0);
219 : assert(secondary >= 0);
220 : assert(size_elem > 0);
221 :
222 1073 : pool = (struct pool*)calloc(1, sizeof(struct pool));
223 1073 : if(!pool) return NULL;
224 : /* Adjust the element size so that it be aligned, and so that an element could
225 : * at least contain a void*
226 : */
227 1073 : pool->size_elem = size_elem = (size_elem + POOL_ALIGN_INCREMENT - 1) & ~(POOL_ALIGN_INCREMENT - 1);
228 :
229 1073 : pool->primary = (size_elem * primary) + POOL_ALIGN_INCREMENT;
230 1073 : pool->secondary = secondary > 0 ? (size_elem * secondary) + POOL_ALIGN_INCREMENT : 0;
231 1073 : pool_take_extent(pool, FALSE);
232 :
233 1073 : return pool;
234 :
235 : }
236 :
237 0 : void pool_destroy(struct pool* pool)
238 : {
239 : void* extent;
240 : void* next;
241 :
242 0 : if(pool != NULL)
243 : {
244 0 : extent = pool->extent;
245 0 : while(extent)
246 : {
247 0 : next = *(void**)extent;
248 0 : free(extent);
249 : extent = next;
250 : }
251 0 : free(pool);
252 : }
253 0 : }
254 :
255 197868 : static inline void* pool_alloc(struct pool* pool)
256 : {
257 : void* data;
258 :
259 197868 : data = pool->head_free;
260 197868 : if(data == NULL)
261 : {
262 : /* we have no old-freed elem */
263 197868 : if(pool->fresh <= pool->tail)
264 : {
265 : /* pick a slice of the current extent */
266 : data = (void*)pool->fresh;
267 197868 : pool->fresh += pool->size_elem;
268 : }
269 : else
270 : {
271 : /* allocate a new extent */
272 0 : data = pool_take_extent(pool, TRUE);
273 : }
274 : }
275 : else
276 : {
277 : /* re-used old freed element by chopipng the head of the free list */
278 0 : pool->head_free = *(void**)data;
279 : }
280 :
281 197868 : return data;
282 : }
283 :
284 :
285 : static inline void pool_free(struct pool* pool, void* data)
286 : {
287 : assert(pool && data);
288 :
289 : /* stack on top of the free list */
290 : *(void**)data = pool->head_free;
291 : pool->head_free = data;
292 : }
293 :
294 :
295 : /* ===============================================
296 : * Hash implementation custumized to be just tracking
297 : * a unique list of string (i.e no data associated
298 : * with the key, no need for retrieval, etc..
299 : *
300 : * This is tuned for the particular use-case we have here
301 : * measures in tail_build showed that
302 : * we can get north of 4000 distinct values stored in a hash
303 : * the collision rate is at worse around 2%
304 : * the collision needing an expensive memcmp to resolve
305 : * have a rate typically at 1 per 1000
306 : * for tail_build we register 37229 unique key
307 : * with a total of 377 extra memcmp needed
308 : * which is completely negligible compared to the
309 : * number of memcmp required to eliminate duplicate
310 : * entry (north of 2.5 millions for tail_build)
311 : * ===============================================
312 : */
313 :
314 : struct hash_elem
315 : {
316 : struct hash_elem* next;
317 : const char* key;
318 : int key_len;
319 : };
320 :
321 : struct hash
322 : {
323 : struct hash_elem** array;
324 : struct pool* elems_pool;
325 : int flags;
326 : unsigned int used;
327 : unsigned int size;
328 : unsigned int load_limit;
329 : #ifdef HASH_STAT
330 : int stored;
331 : int collisions;
332 : int cost;
333 : int memcmp;
334 : #endif
335 : };
336 : #define HASH_F_NO_RESIZE (1<<0)
337 :
338 : /* The following hash_compute function was adapted from :
339 : * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
340 : *
341 : * The changes from the original are mostly cosmetic
342 : */
343 : #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
344 :
345 :
346 : #if defined CORE_BIG_ENDIAN
347 : #define MASK_C1 0xFFFFFF00
348 : #define MASK_C2 0xFFFF0000
349 : #define MASK_C3 0xFF000000
350 : #elif defined CORE_LITTLE_ENDIAN
351 : #define MASK_C1 0xFFFFFF
352 : #define MASK_C2 0xFFFF
353 : #define MASK_C3 0xFF
354 : #else
355 : #error "Missing Endianness definition"
356 : #endif
357 :
358 :
359 : #define mix(a,b,c) \
360 : { \
361 : a -= c; a ^= rot(c, 4); c += b; \
362 : b -= a; b ^= rot(a, 6); a += c; \
363 : c -= b; c ^= rot(b, 8); b += a; \
364 : a -= c; a ^= rot(c,16); c += b; \
365 : b -= a; b ^= rot(a,19); a += c; \
366 : c -= b; c ^= rot(b, 4); b += a; \
367 : }
368 : #define final(a,b,c) \
369 : { \
370 : c ^= b; c -= rot(b,14); \
371 : a ^= c; a -= rot(c,11); \
372 : b ^= a; b -= rot(a,25); \
373 : c ^= b; c -= rot(b,16); \
374 : a ^= c; a -= rot(c,4); \
375 : b ^= a; b -= rot(a,14); \
376 : c ^= b; c -= rot(b,24); \
377 : }
378 :
379 3206719 : static unsigned int hash_compute( struct hash* hash, const char* key, int length)
380 : {
381 : unsigned int a;
382 : unsigned int b;
383 : unsigned int c; /* internal state */
384 : const unsigned char* uk = (const unsigned char*)key;
385 :
386 : /* Set up the internal state */
387 3206719 : a = b = c = 0xdeadbeef + (length << 2);
388 :
389 : /* we use this to 'hash' full path with mostly a common root
390 : * let's now waste too much cycles hashing mostly constant stuff
391 : */
392 3206719 : if(length > 36)
393 : {
394 3206070 : uk += length - 36;
395 : length = 36;
396 : }
397 : /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
398 9620157 : while (length > 12)
399 : {
400 6413438 : a += get_unaligned_uint(uk);
401 6413438 : b += get_unaligned_uint(uk+4);
402 6413438 : c += get_unaligned_uint(uk+8);
403 6413438 : mix(a,b,c);
404 6413438 : length -= 12;
405 6413438 : uk += 12;
406 : }
407 :
408 : /*----------------------------- handle the last (probably partial) block */
409 : /* Note: we possibly over-read, which would trigger complaint from VALGRIND
410 : * but we mask the undefined stuff if any, so we are still good, thanks
411 : * to alignment of memory allocation and tail-memory management overhead
412 : * we always can read 3 bytes past the official end without triggering
413 : * a segfault -- if you find a platform/compiler couple for which that postulat
414 : * is false, then you just need to over-allocate by 2 more bytes in file_load()
415 : * file_load already over-allocate by 1 to sitck a \0 at the end of the buffer.
416 : */
417 3206719 : switch(length)
418 : {
419 3206191 : case 12: c+=get_unaligned_uint(uk+8); b+=get_unaligned_uint(uk+4); a+=get_unaligned_uint(uk); break;
420 121 : case 11: c+=get_unaligned_uint(uk+8) & MASK_C1; b+=get_unaligned_uint(uk+4); a+=get_unaligned_uint(uk); break;
421 110 : case 10: c+=get_unaligned_uint(uk+8) & MASK_C2; b+=get_unaligned_uint(uk+4); a+=get_unaligned_uint(uk); break;
422 77 : case 9 : c+=get_unaligned_uint(uk+8) & MASK_C3; b+=get_unaligned_uint(uk+4); a+=get_unaligned_uint(uk); break;
423 77 : case 8 : b+=get_unaligned_uint(uk+4); a+=get_unaligned_uint(uk); break;
424 77 : case 7 : b+=get_unaligned_uint(uk+4) & MASK_C1; a+=get_unaligned_uint(uk); break;
425 33 : case 6 : b+=get_unaligned_uint(uk+4) & MASK_C2; a+=get_unaligned_uint(uk); break;
426 33 : case 5 : b+=get_unaligned_uint(uk+4) & MASK_C3; a+=get_unaligned_uint(uk); break;
427 0 : case 4 : a+=get_unaligned_uint(uk); break;
428 0 : case 3 : a+=get_unaligned_uint(uk) & MASK_C1; break;
429 0 : case 2 : a+=get_unaligned_uint(uk) & MASK_C2; break;
430 0 : case 1 : a+=get_unaligned_uint(uk) & MASK_C3; break;
431 0 : case 0 : return c & hash->size; /* zero length strings require no mixing */
432 : }
433 :
434 3206719 : final(a,b,c);
435 3206719 : return c & hash->size;
436 : }
437 :
438 0 : static void hash_destroy(struct hash* hash)
439 : {
440 0 : if(hash)
441 : {
442 0 : if(hash->array)
443 : {
444 0 : free(hash->array);
445 : }
446 0 : if(hash->elems_pool)
447 : {
448 0 : pool_destroy(hash->elems_pool);
449 : }
450 0 : free(hash);
451 : }
452 0 : }
453 :
454 1073 : static struct hash* hash_create(unsigned int size)
455 : {
456 : struct hash* hash;
457 :
458 : assert(size > 0);
459 1073 : hash = calloc(1, sizeof(struct hash));
460 1073 : if(hash)
461 : {
462 1073 : size += (size >> 2) + 1; /* ~ 75% load factor */
463 1073 : if(size >= 15)
464 : {
465 1073 : hash->size = (((unsigned int)0xFFFFFFFF) >> clz((unsigned int)size));
466 : }
467 : else
468 : {
469 0 : hash->size = size = 15;
470 : }
471 1073 : hash->load_limit = hash->size - (hash->size >> 2);
472 1073 : hash->used = 0;
473 1073 : hash->array = (struct hash_elem**)calloc(hash->size + 1, sizeof(struct hash_elem*));
474 1073 : if(hash->array == NULL)
475 : {
476 0 : hash_destroy(hash);
477 : hash = NULL;
478 : }
479 : }
480 1073 : if(hash)
481 : {
482 1073 : hash->elems_pool = pool_create(sizeof(struct hash_elem),
483 1073 : size, size << 1);
484 1073 : if(!hash->elems_pool)
485 : {
486 0 : hash_destroy(hash);
487 : hash = NULL;
488 : }
489 : }
490 1073 : return hash;
491 : }
492 :
493 0 : static void hash_resize(struct hash* hash)
494 : {
495 0 : unsigned int old_size = hash->size;
496 : unsigned int hashed;
497 : struct hash_elem* hash_elem;
498 : struct hash_elem* next;
499 : struct hash_elem** array;
500 : unsigned int i;
501 :
502 0 : hash->size = (old_size << 1) + 1;
503 : /* we really should avoid to get there... so print a message to alert of the condition */
504 0 : fprintf(stderr, "resize hash %d -> %d\n", old_size, hash->size);
505 0 : if(hash->size == old_size)
506 : {
507 0 : hash->flags |= HASH_F_NO_RESIZE;
508 0 : return;
509 : }
510 0 : array = calloc(hash->size + 1, sizeof(struct hash_elem*));
511 0 : if(array)
512 : {
513 0 : hash->load_limit = hash->size - (hash->size >> 2);
514 0 : for(i=0; i <= old_size; i++)
515 : {
516 0 : hash_elem = (struct hash_elem*)hash->array[i];
517 0 : while(hash_elem)
518 : {
519 0 : next = hash_elem->next;
520 :
521 0 : hashed = hash_compute(hash, hash_elem->key, hash_elem->key_len);
522 0 : hash_elem->next = array[hashed];
523 0 : array[hashed] = hash_elem;
524 : hash_elem = next;
525 : }
526 : }
527 0 : free(hash->array);
528 0 : hash->array = (struct hash_elem**)array;
529 : }
530 : else
531 : {
532 0 : hash->size = old_size;
533 0 : hash->flags |= HASH_F_NO_RESIZE;
534 : }
535 : }
536 :
537 : #ifdef HASH_STAT
538 : static inline int compare_key(struct hash* hash, const char* a, const char* b, int len, int* cost)
539 : {
540 : *cost += 1;
541 : hash->memcmp += 1;
542 : return memcmp(a,b, len);
543 : }
544 : #else
545 : #define compare_key(h,a,b,l,c) memcmp(a,b,l)
546 : #endif
547 :
548 : /* a customized hash_store function that just store the key and return
549 : * TRUE if the key was effectively stored, or FALSE if the key was already there
550 : */
551 3206719 : static int hash_store(struct hash* hash, const char* key, int key_len)
552 : {
553 : unsigned int hashed;
554 : struct hash_elem* hash_elem;
555 : int cost = 0;
556 :
557 : (void) cost;
558 3206719 : hashed = hash_compute(hash, key, key_len);
559 : #ifdef HASH_STAT
560 : hash->stored += 1;
561 : #endif
562 3206719 : hash_elem = (struct hash_elem*)hash->array[hashed];
563 6925280 : while(hash_elem && (hash_elem->key_len != key_len || compare_key(hash, hash_elem->key, key, key_len, &cost)))
564 : {
565 511842 : hash_elem = hash_elem->next;
566 : }
567 :
568 3206719 : if(!hash_elem)
569 : {
570 197868 : hash_elem = pool_alloc(hash->elems_pool);
571 197868 : if(hash_elem)
572 : {
573 197868 : hash_elem->key = key;
574 197868 : hash_elem->key_len = key_len;
575 197868 : hash_elem->next = hash->array[hashed];
576 :
577 : #ifdef HASH_STAT
578 : if(hash_elem->next)
579 : {
580 : hash->collisions += 1;
581 : hash->cost += cost;
582 : }
583 : #endif
584 197868 : hash->array[hashed] = hash_elem;
585 197868 : hash->used += 1;
586 197868 : if(hash->used > hash->load_limit)
587 : {
588 0 : hash_resize(hash);
589 : }
590 : }
591 : return TRUE;
592 : }
593 : return FALSE;
594 : }
595 :
596 30244 : static int file_stat(const char* name, struct stat* buffer_stat, int* rc)
597 : {
598 : int rc_local = 0;
599 :
600 30244 : rc_local = stat(name, buffer_stat);
601 30244 : if (rc_local < 0)
602 : {
603 14534 : *rc = errno;
604 : }
605 30244 : return rc_local;
606 : }
607 :
608 30244 : static off_t file_get_size(const char* name, int* rc)
609 : {
610 : struct stat buffer_stat;
611 : off_t size = -1;
612 :
613 30244 : if (!file_stat(name, &buffer_stat, rc))
614 : {
615 15710 : if(S_ISREG(buffer_stat.st_mode))
616 : {
617 15710 : size = buffer_stat.st_size;
618 : }
619 : else
620 : {
621 0 : *rc = EINVAL;
622 : }
623 : }
624 30244 : return size;
625 : }
626 :
627 30244 : static char* file_load(const char* name, off_t* size, int* return_rc)
628 : {
629 30244 : off_t local_size = 0;
630 30244 : int rc = 0;
631 : char* buffer = NULL;
632 : int fd;
633 :
634 : assert(name != NULL);
635 :
636 30244 : if(!size)
637 : {
638 : size = &local_size;
639 : }
640 30244 : *size = file_get_size(name, &rc);
641 30244 : if (!rc)
642 : {
643 15710 : fd = open(name, FILE_O_RDONLY | FILE_O_BINARY);
644 15710 : if (!(fd == -1))
645 : {
646 15710 : buffer = malloc((size_t)(*size + 1));
647 15710 : if (buffer == NULL)
648 : {
649 0 : rc = ENOMEM;
650 : }
651 : else
652 : {
653 : ssize_t i;
654 :
655 : REDO:
656 15710 : i = read(fd, buffer, (size_t)(*size));
657 15710 : if(i == -1)
658 : {
659 0 : if(errno == EINTR)
660 : {
661 : goto REDO;
662 : }
663 : else
664 : {
665 0 : rc = errno;
666 : }
667 : }
668 : else
669 : {
670 15710 : if (i != *size)
671 : {
672 0 : rc = EIO;
673 : }
674 : }
675 15710 : buffer[*size] = 0;
676 : }
677 15710 : close(fd);
678 : }
679 : }
680 :
681 30244 : if(rc && buffer)
682 : {
683 0 : free(buffer);
684 : buffer = NULL;
685 : }
686 30244 : if(return_rc)
687 : {
688 30244 : *return_rc = rc;
689 : }
690 30244 : return buffer;
691 : }
692 :
693 932 : static void _cancel_relative(char* base, char** ref_cursor, char** ref_cursor_out, char* end)
694 : {
695 932 : char* cursor = *ref_cursor;
696 932 : char* cursor_out = *ref_cursor_out;
697 :
698 : do
699 : {
700 1000 : cursor += 3;
701 2000 : while(cursor_out > base && cursor_out[-1] == '/')
702 0 : cursor_out--;
703 5908 : while(cursor_out > base && *--cursor_out != '/');
704 : }
705 1000 : while(cursor + 3 < end && !memcmp(cursor, "/../", 4));
706 932 : *ref_cursor = cursor;
707 932 : *ref_cursor_out = cursor_out;
708 932 : }
709 :
710 7084258 : static inline void eat_space(char ** token)
711 : {
712 20620908 : while ((' ' == **token) || ('\t' == **token)) {
713 6452392 : ++(*token);
714 : }
715 7084258 : }
716 :
717 : /*
718 : * Prune LibreOffice specific duplicate dependencies to improve
719 : * gnumake startup time, and shrink the disk-space footprint.
720 : */
721 : static inline int
722 9656192 : elide_dependency(const char* key, int key_len, const char **unpacked_end)
723 : {
724 : #if 0
725 : {
726 : int i;
727 : fprintf (stderr, "elide?%d!: '", internal_boost);
728 : for (i = 0; i < key_len; i++) {
729 : fprintf (stderr, "%c", key[i]);
730 : }
731 : fprintf (stderr, "'\n");
732 : }
733 : #endif
734 :
735 : /* boost brings a plague of header files */
736 : int i;
737 : int unpacked = 0;
738 : /* walk down path elements */
739 591198700 : for (i = 0; i < key_len - 1; i++)
740 : {
741 581547256 : if (key[i] == '/')
742 : {
743 71470797 : if (0 == unpacked)
744 : {
745 39059909 : if (!PATHNCMP(key + i + 1, "workdir/", 8))
746 : {
747 : unpacked = 1;
748 3572723 : continue;
749 : }
750 : }
751 : else
752 : {
753 32410888 : if (!PATHNCMP(key + i + 1, "UnpackedTarball/", 16))
754 : {
755 4748 : if (unpacked_end)
756 2378 : *unpacked_end = strchr(key + i + 17, '/');
757 : return 1;
758 : }
759 : }
760 : }
761 : }
762 :
763 : return 0;
764 : }
765 :
766 : /*
767 : * We collapse tens of internal boost headers to the unpacked target, such
768 : * that you can re-compile / install boost and all is well.
769 : */
770 0 : static void emit_single_boost_header(void)
771 : {
772 : #define BOOST_TARGET "/UnpackedTarball/boost.done"
773 0 : fprintf(stdout, "%s" BOOST_TARGET " ", work_dir);
774 0 : }
775 :
776 2378 : static void emit_unpacked_target(const char* token, const char* end)
777 : {
778 2378 : fwrite(token, 1, end-token, stdout);
779 2378 : fputs(".done ", stdout);
780 2378 : }
781 :
782 : /* prefix paths to absolute */
783 212385 : static inline void print_fullpaths(char* line)
784 : {
785 : char* token;
786 : char* end;
787 : int boost_count = 0;
788 : int token_len;
789 212385 : const char * unpacked_end = 0; /* end of UnpackedTarget match (if any) */
790 : /* for UnpackedTarget the target is GenC{,xx}Object, dont mangle! */
791 : int target_seen = 0;
792 :
793 212385 : token = line;
794 212385 : eat_space(&token);
795 7084258 : while (*token)
796 : {
797 : end = token;
798 : /* hard to believe that in this day and age drive letters still exist */
799 6659488 : if (*end && (':' == *(end+1)) &&
800 0 : (('\\' == *(end+2)) || ('/' == *(end+2))) && isalpha(*end))
801 : {
802 0 : end = end + 3; /* only one cross, err drive letter per filename */
803 : }
804 326234697 : while (*end && (' ' != *end) && ('\t' != *end) && (':' != *end)) {
805 319575209 : ++end;
806 : }
807 6659488 : token_len = end - token;
808 13106591 : if (target_seen &&
809 6447103 : elide_dependency(token, token_len, &unpacked_end))
810 : {
811 2378 : if (unpacked_end)
812 : {
813 2378 : if (internal_boost && !PATHNCMP(unpacked_end - 5, "boost", 5))
814 : {
815 0 : ++boost_count;
816 0 : if (boost_count == 1)
817 0 : emit_single_boost_header();
818 : else
819 : {
820 : /* don't output, and swallow trailing \\\n if any */
821 0 : token = end;
822 0 : eat_space(&token);
823 0 : if (token[0] == '\\' && token[1] == '\n')
824 0 : end = token + 2;
825 : }
826 : }
827 : else
828 : {
829 2378 : emit_unpacked_target(token, unpacked_end);
830 : }
831 2378 : unpacked_end = 0;
832 : }
833 : }
834 : else
835 : {
836 6657110 : if (fwrite(token, token_len, 1, stdout) != 1)
837 0 : abort();
838 6657110 : fputc(' ', stdout);
839 : }
840 6659488 : token = end;
841 6659488 : eat_space(&token);
842 6659488 : if (!target_seen)
843 : {
844 212385 : if (':' == *token)
845 : {
846 : target_seen = 1;
847 212385 : fputc(':', stdout);
848 212385 : ++token;
849 212385 : eat_space(&token);
850 : }
851 : }
852 : }
853 212385 : }
854 :
855 3209089 : static inline char * eat_space_at_end(char * end)
856 : {
857 : char * real_end;
858 : assert('\0' == *end);
859 3209089 : real_end = end - 1;
860 9627267 : while (' ' == *real_end || '\t' == *real_end || '\n' == *real_end
861 6418178 : || ':' == *real_end)
862 : { /* eat colon and whitespace at end */
863 3209089 : --real_end;
864 : }
865 3209089 : return real_end;
866 : }
867 :
868 : static char* phony_content_buffer;
869 14534 : static inline char* generate_phony_line(char* phony_target, char* extension)
870 : {
871 : char* src;
872 : char* dest;
873 : char* last_dot = NULL;
874 : //fprintf(stderr, "generate_phony_line called with phony_target %s and extension %s\n", phony_target, extension);
875 598774 : for(dest = phony_content_buffer+work_dir_len, src = phony_target; *src != 0; ++src, ++dest)
876 : {
877 584240 : *dest = *src;
878 584240 : if(*dest == '.')
879 : {
880 : last_dot = dest;
881 : }
882 : }
883 : //fprintf(stderr, "generate_phony_line after phony_target copy: %s\n", phony_content_buffer);
884 39079 : for(dest = last_dot+1, src = extension; *src != 0; ++src, ++dest)
885 : {
886 24545 : *dest = *src;
887 : }
888 : //fprintf(stderr, "generate_phony_line after extension add: %s\n", phony_content_buffer);
889 14534 : strcpy(dest, ": $(gb_Helper_PHONY)\n");
890 : //fprintf(stderr, "generate_phony_line after phony add: %s\n", phony_content_buffer);
891 14534 : return phony_content_buffer;
892 : }
893 :
894 14534 : static inline int generate_phony_file(char* fn, char* content)
895 : {
896 : FILE* depfile;
897 14534 : depfile = fopen(fn, "w");
898 14534 : if(depfile)
899 : {
900 14534 : fputs(content, depfile);
901 14534 : fclose(depfile);
902 : }
903 14534 : return !depfile;
904 : }
905 :
906 29171 : static int _process(struct hash* dep_hash, char* fn)
907 : {
908 : int rc;
909 : char* buffer;
910 : char* end;
911 : char* cursor;
912 : char* cursor_out;
913 : char* base;
914 : char* created_line = NULL;
915 : char* src_relative;
916 : int continuation = 0;
917 : char last_ns = 0;
918 : off_t size;
919 :
920 29171 : buffer = file_load(fn, &size, &rc);
921 : /* Note: yes we are going to leak 'buffer'
922 : * this is on purpose, to avoid cloning the 'key' out of it
923 : * and our special 'hash' just store the pointer to the key
924 : * inside of buffer, hence it need to remain allocated
925 : */
926 29171 : if(!rc)
927 : {
928 14637 : base = cursor_out = cursor = end = buffer;
929 14637 : end += size;
930 :
931 : /* first eat unneeded space at the beginning of file
932 : */
933 29274 : while(cursor < end && (*cursor == ' ' || *cursor == '\\'))
934 0 : ++cursor;
935 :
936 609072986 : while(cursor < end)
937 : {
938 609058349 : if(*cursor == '\\')
939 : {
940 : continuation = 1;
941 3223430 : *cursor_out++ = *cursor++;
942 : }
943 605834919 : else if(*cursor == '/')
944 : {
945 71664730 : if(cursor + 3 < end)
946 : {
947 71664730 : if(!memcmp(cursor, "/../", 4))
948 : {
949 932 : _cancel_relative(base, &cursor, &cursor_out, end);
950 : }
951 : }
952 71664730 : *cursor_out++ = *cursor++;
953 : }
954 534170189 : else if(*cursor == '\n')
955 : {
956 9661413 : if(!continuation)
957 : {
958 6437983 : *cursor_out = 0;
959 6437983 : if(base < cursor)
960 : {
961 : /* here we have a complete rule */
962 3223606 : if(last_ns == ':')
963 : {
964 : /* if the rule ended in ':' that is a no-dep rule
965 : * these are the one for which we want to filter
966 : * duplicate out
967 : */
968 3209089 : int key_len = eat_space_at_end(cursor_out) - base;
969 3209089 : if (!elide_dependency(base,key_len + 1, NULL)
970 3206719 : && hash_store(dep_hash, base, key_len))
971 : {
972 : /* DO NOT modify base after it has been added
973 : as key by hash_store */
974 197868 : print_fullpaths(base);
975 197868 : putc('\n', stdout);
976 : }
977 : }
978 : else
979 : {
980 : /* rule with dep, just write it */
981 14517 : print_fullpaths(base);
982 14517 : putc('\n', stdout);
983 : }
984 : last_ns = ' '; // cannot hurt to reset it
985 : }
986 6437983 : cursor += 1;
987 6437983 : base = cursor_out = cursor;
988 : }
989 : else
990 : {
991 : /* here we have a '\' followed by \n this is a continuation
992 : * i.e not a complete rule yet
993 : */
994 3223430 : *cursor_out++ = *cursor++;
995 : continuation = 0; // cancel current one (empty lines!)
996 : }
997 : }
998 : else
999 : {
1000 : continuation = 0;
1001 : /* not using isspace() here save 25% of I refs and 75% of D refs based on cachegrind */
1002 524508776 : if(*cursor != ' ' && *cursor != '\n' && *cursor != '\t' )
1003 : {
1004 : last_ns = *cursor;
1005 : }
1006 524508776 : *cursor_out++ = *cursor++;
1007 : }
1008 : }
1009 : /* just in case the file did not end with a \n, there may be a pending rule */
1010 14637 : if(base < cursor_out)
1011 : {
1012 0 : if(last_ns == ':')
1013 : {
1014 0 : int key_len = eat_space_at_end(cursor_out) - base;
1015 0 : if (!elide_dependency(base,key_len + 1, NULL) &&
1016 0 : hash_store(dep_hash, base, key_len))
1017 : {
1018 0 : puts(base);
1019 0 : putc('\n', stdout);
1020 : }
1021 : }
1022 : else
1023 : {
1024 0 : puts(base);
1025 0 : putc('\n', stdout);
1026 : }
1027 : }
1028 : }
1029 : else
1030 : {
1031 14534 : if(strncmp(fn, work_dir, work_dir_len) == 0)
1032 : {
1033 14534 : if(strncmp(fn+work_dir_len, "/Dep/", 5) == 0)
1034 : {
1035 14534 : src_relative = fn+work_dir_len+5;
1036 : // cases ordered by frequency
1037 14534 : if(strncmp(src_relative, "CxxObject/", 10) == 0)
1038 : {
1039 8299 : created_line = generate_phony_line(src_relative+10, "o");
1040 8299 : rc = generate_phony_file(fn, created_line);
1041 : }
1042 6235 : else if(strncmp(fn+work_dir_len+5, "UnoApiPartTarget/", 17) == 0)
1043 : {
1044 5289 : created_line = generate_phony_line(src_relative+17, "urd");
1045 5289 : rc = generate_phony_file(fn, created_line);
1046 : }
1047 946 : else if(strncmp(fn+work_dir_len+5, "SrsPartTarget/", 14) == 0)
1048 : {
1049 567 : created_line = generate_phony_line(src_relative+14, "");
1050 567 : rc = generate_phony_file(fn, created_line);
1051 : }
1052 379 : else if(strncmp(src_relative, "GenCxxObject/", 13) == 0)
1053 : {
1054 298 : created_line = generate_phony_line(src_relative+13, "o");
1055 298 : rc = generate_phony_file(fn, created_line);
1056 : }
1057 81 : else if(strncmp(src_relative, "CObject/", 8) == 0)
1058 : {
1059 56 : created_line = generate_phony_line(src_relative+8, "o");
1060 56 : rc = generate_phony_file(fn, created_line);
1061 : }
1062 25 : else if(strncmp(src_relative, "GenCObject/", 11) == 0)
1063 : {
1064 24 : created_line = generate_phony_line(src_relative+11, "o");
1065 24 : rc = generate_phony_file(fn, created_line);
1066 : }
1067 1 : else if(strncmp(src_relative, "SdiObject/", 10) == 0)
1068 : {
1069 0 : created_line = generate_phony_line(src_relative+10, "o");
1070 0 : rc = generate_phony_file(fn, created_line);
1071 : }
1072 1 : else if(strncmp(src_relative, "AsmObject/", 10) == 0)
1073 : {
1074 1 : created_line = generate_phony_line(src_relative+10, "o");
1075 1 : rc = generate_phony_file(fn, created_line);
1076 : }
1077 0 : else if(strncmp(src_relative, "ObjCxxObject/", 13) == 0)
1078 : {
1079 0 : created_line = generate_phony_line(src_relative+13, "o");
1080 0 : rc = generate_phony_file(fn, created_line);
1081 : }
1082 0 : else if(strncmp(src_relative, "ObjCObject/", 11) == 0)
1083 : {
1084 0 : created_line = generate_phony_line(src_relative+11, "o");
1085 0 : rc = generate_phony_file(fn, created_line);
1086 : }
1087 : else
1088 : {
1089 0 : fprintf(stderr, "no magic for %s(%s) in %s\n", fn, src_relative, work_dir);
1090 : }
1091 : }
1092 14534 : if(!rc)
1093 : {
1094 14534 : puts(created_line);
1095 : }
1096 : }
1097 : }
1098 29171 : return rc;
1099 : }
1100 :
1101 0 : static void _usage(void)
1102 : {
1103 0 : fputs("Usage: concat-deps <file that contains dep_files>\n", stderr);
1104 0 : }
1105 :
1106 : #define kDEFAULT_HASH_SIZE 4096
1107 : #define PHONY_TARGET_BUFFER 4096
1108 :
1109 2146 : static int get_var(char **var, const char *name)
1110 : {
1111 2146 : *var = (char *)getenv(name);
1112 2146 : if(!*var)
1113 : {
1114 0 : fprintf(stderr,"Error: %s is missing in the environement\n", name);
1115 0 : return 1;
1116 : }
1117 : return 0;
1118 : }
1119 :
1120 1073 : int main(int argc, char** argv)
1121 : {
1122 1073 : int rc = 0;
1123 1073 : off_t in_list_size = 0;
1124 : char* in_list;
1125 : char* in_list_cursor;
1126 : char* in_list_base;
1127 : struct hash* dep_hash;
1128 : const char *env_str;
1129 :
1130 1073 : if(argc < 2)
1131 : {
1132 0 : _usage();
1133 0 : return 1;
1134 : }
1135 1073 : if(get_var(&base_dir, "SRCDIR") || get_var(&work_dir, "WORKDIR"))
1136 : return 1;
1137 1073 : work_dir_len = strlen(work_dir);
1138 1073 : phony_content_buffer = malloc(PHONY_TARGET_BUFFER);
1139 1073 : strcpy(phony_content_buffer, work_dir);
1140 1073 : phony_content_buffer[work_dir_len] = '/';
1141 :
1142 1073 : env_str = getenv("SYSTEM_BOOST");
1143 1073 : internal_boost = !env_str || strcmp(env_str,"TRUE");
1144 :
1145 1073 : in_list = file_load(argv[1], &in_list_size, &rc);
1146 1073 : if(!rc)
1147 : {
1148 1073 : dep_hash = hash_create( kDEFAULT_HASH_SIZE);
1149 : in_list_base = in_list_cursor = in_list;
1150 :
1151 : /* extract filename of dep file from a 'space' separated list */
1152 3091621 : while(*in_list_cursor)
1153 : {
1154 3089475 : if(*in_list_cursor == ' ' || *in_list_cursor == '\n')
1155 : {
1156 31315 : *in_list_cursor = 0;
1157 31315 : if(in_list_base < in_list_cursor)
1158 : {
1159 29171 : rc = _process(dep_hash, in_list_base);
1160 29171 : if(rc)
1161 : {
1162 : break;
1163 : }
1164 : }
1165 31315 : in_list_cursor += 1;
1166 : in_list_base = in_list_cursor;
1167 : }
1168 : else
1169 : {
1170 3058160 : in_list_cursor += 1;
1171 : }
1172 : }
1173 1073 : if(!rc)
1174 : {
1175 : /* catch the last entry in case the input did not terminate with a 'space' */
1176 1073 : if(in_list_base < in_list_cursor)
1177 : {
1178 0 : rc = _process(dep_hash, in_list_base);
1179 : }
1180 : }
1181 : #ifdef HASH_STAT
1182 : fprintf(stderr, "stats: u:%d s:%d l:%d t:%d c:%d m:%d $:%d\n",
1183 : dep_hash->used, dep_hash->size, dep_hash->load_limit, dep_hash->stored,
1184 : dep_hash->collisions, dep_hash->memcmp, dep_hash->cost);
1185 : #endif
1186 : }
1187 1073 : return rc;
1188 : }
1189 :
1190 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|