Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * Copyright (C) 2011 Norbert Thiebaud
4 : * License: GPLv3
5 : */
6 :
7 : /* define to activate stats reporting on hash usage*/
8 : /* #define HASH_STAT */
9 :
10 : /* ===============================================
11 : * Set-up: defines to identify the system and system related properties
12 : * ===============================================
13 : */
14 :
15 : #ifdef __APPLE__
16 : #ifdef __x86_64__
17 : #undef CORE_BIG_ENDIAN
18 : #define CORE_LITTLE_ENDIAN
19 : #else
20 : #define CORE_BIG_ENDIAN
21 : #undef CORE_LITTLE_ENDIAN
22 : #endif
23 :
24 : #endif
25 : #ifdef _AIX
26 : #define CORE_BIG_ENDIAN
27 : #undef CORE_LITTLE_ENDIAN
28 : #endif /* Def _AIX */
29 :
30 : #ifdef _MSC_VER
31 : #define __windows
32 : #undef CORE_BIG_ENDIAN
33 : #define CORE_LITTLE_ENDIAN
34 : #endif /* Def _MSC_VER */
35 :
36 : #if defined(__linux) || defined(__OpenBSD__) || \
37 : defined(__FreeBSD__) || defined(__NetBSD__) || \
38 : defined(__DragonFly__) || defined(__FreeBSD_kernel__)
39 : #include <sys/param.h>
40 : #if __BYTE_ORDER == __LITTLE_ENDIAN
41 : #undef CORE_BIG_ENDIAN
42 : #define CORE_LITTLE_ENDIAN
43 : #else /* !(__BYTE_ORDER == __LITTLE_ENDIAN) */
44 : #if __BYTE_ORDER == __BIG_ENDIAN
45 : #define CORE_BIG_ENDIAN
46 : #undef CORE_LITTLE_ENDIAN
47 : #endif /* __BYTE_ORDER == __BIG_ENDIAN */
48 : #endif /* !(__BYTE_ORDER == __LITTLE_ENDIAN) */
49 : #endif /* Def __linux || Def *BSD */
50 :
51 : #ifdef __sun
52 : #ifdef __sparc
53 : #define CORE_BIG_ENDIAN
54 : #undef CORE_LITTLE_ENDIAN
55 : #else /* Ndef __sparc */
56 : #undef CORE_BIG_ENDIAN
57 : #define CORE_LITTLE_ENDIAN
58 : #endif /* Ndef __sparc */
59 : #endif /* Def __sun */
60 :
61 : #include <assert.h>
62 : #include <stdio.h>
63 : #include <stdlib.h>
64 : #include <sys/types.h>
65 : #include <sys/stat.h>
66 : #include <errno.h>
67 : #include <fcntl.h>
68 : #include <string.h>
69 : #include <ctype.h>
70 :
71 : #ifdef __windows
72 : #include <io.h>
73 : #else
74 : #include <unistd.h>
75 : #endif
76 :
77 : /* modes */
78 : #ifdef __windows
79 : #define FILE_O_RDONLY _O_RDONLY
80 : #define FILE_O_BINARY _O_BINARY
81 : #define PATHNCMP _strnicmp /* MSVC converts paths to lower-case sometimes? */
82 : #define inline __inline
83 : #define ssize_t long
84 : #define S_ISREG(mode) (((mode) & _S_IFMT) == (_S_IFREG)) /* MSVC does not have this macro */
85 : #else /* not windaube */
86 : #define FILE_O_RDONLY O_RDONLY
87 : #define FILE_O_BINARY 0
88 : #define PATHNCMP strncmp
89 : #endif /* not windaube */
90 :
91 : #ifndef TRUE
92 : #define TRUE 1
93 : #endif
94 : #ifndef FALSE
95 : #define FALSE 0
96 : #endif
97 :
98 : int internal_boost = 0;
99 : static char* base_dir;
100 : static char* work_dir;
101 : size_t work_dir_len;
102 :
103 : #ifdef __GNUC__
104 : #define clz __builtin_clz
105 : #else
106 : static inline int clz(unsigned int value)
107 : {
108 : int result = 32;
109 :
110 : while(value)
111 : {
112 : value >>= 1;
113 : result -= 1;
114 : }
115 : return result;
116 : }
117 : #endif
118 :
119 : static inline unsigned int get_unaligned_uint(const unsigned char* cursor)
120 : {
121 : unsigned int result;
122 :
123 0 : memcpy(&result, cursor, sizeof(unsigned int));
124 : return result;
125 : }
126 :
127 : /* ===============================================
128 : * memory pool for fast fix-size allocation (non-tread-safe)
129 : * ===============================================
130 : */
131 : struct pool
132 : {
133 : void* head_free; /**< head of a linked list of freed element */
134 : char* fresh; /**< top of a memory block to dig new element */
135 : char* tail; /**< to detect end of extent... when fresh pass tail */
136 : void* extent; /**< pointer to the primary extent block */
137 : int size_elem; /**< size of an element. */
138 : int primary; /**< primary allocation in bytes */
139 : int secondary; /**< secondary allocation in bytes */
140 : };
141 : #define POOL_ALIGN_INCREMENT 8 /**< Alignement, must be a power of 2 and of size > to sizeof(void*) */
142 :
143 :
144 0 : static void* pool_take_extent(struct pool* pool, int allocate)
145 : {
146 : unsigned int size = 0;
147 : void* extent;
148 : void* data = NULL;
149 :
150 0 : if(pool->extent)
151 : {
152 : /* we already have an extent, so this is a secondary */
153 0 : if(pool->secondary)
154 : {
155 0 : size = pool->secondary;
156 : }
157 : }
158 : else
159 : {
160 : assert(pool->primary);
161 0 : size = pool->primary;
162 : }
163 0 : if(size)
164 : {
165 0 : extent = malloc(size);
166 0 : if(extent)
167 : {
168 0 : *(void**)extent = pool->extent;
169 0 : pool->extent = extent;
170 0 : if(allocate)
171 : {
172 0 : data = ((char*)extent) + POOL_ALIGN_INCREMENT;
173 0 : pool->fresh = ((char*)data) + pool->size_elem;
174 0 : pool->tail = pool->fresh + (size - pool->size_elem);
175 : }
176 : else
177 : {
178 0 : pool->fresh = ((char*)extent) + POOL_ALIGN_INCREMENT;
179 0 : pool->tail = pool->fresh + (size - pool->size_elem);
180 : }
181 : }
182 : }
183 0 : return data;
184 : }
185 :
186 : /* Create a memory pool for fix size objects
187 : * this is a simplified implementation that
188 : * is _not_ thread safe.
189 : */
190 0 : static struct pool* pool_create(int size_elem, int primary, int secondary)
191 : {
192 : struct pool* pool;
193 :
194 : assert(primary > 0);
195 : assert(secondary >= 0);
196 : assert(size_elem > 0);
197 :
198 0 : pool = (struct pool*)calloc(1, sizeof(struct pool));
199 0 : if(!pool) return NULL;
200 : /* Adjust the element size so that it be aligned, and so that an element could
201 : * at least contain a void*
202 : */
203 0 : pool->size_elem = size_elem = (size_elem + POOL_ALIGN_INCREMENT - 1) & ~(POOL_ALIGN_INCREMENT - 1);
204 :
205 0 : pool->primary = (size_elem * primary) + POOL_ALIGN_INCREMENT;
206 0 : pool->secondary = secondary > 0 ? (size_elem * secondary) + POOL_ALIGN_INCREMENT : 0;
207 0 : pool_take_extent(pool, FALSE);
208 :
209 0 : return pool;
210 :
211 : }
212 :
213 0 : static void pool_destroy(struct pool* pool)
214 : {
215 : void* extent;
216 : void* next;
217 :
218 0 : if(pool != NULL)
219 : {
220 0 : extent = pool->extent;
221 0 : while(extent)
222 : {
223 0 : next = *(void**)extent;
224 0 : free(extent);
225 : extent = next;
226 : }
227 0 : free(pool);
228 : }
229 0 : }
230 :
231 0 : static inline void* pool_alloc(struct pool* pool)
232 : {
233 : void* data;
234 :
235 0 : data = pool->head_free;
236 0 : if(data == NULL)
237 : {
238 : /* we have no old-freed elem */
239 0 : if(pool->fresh <= pool->tail)
240 : {
241 : /* pick a slice of the current extent */
242 : data = (void*)pool->fresh;
243 0 : pool->fresh += pool->size_elem;
244 : }
245 : else
246 : {
247 : /* allocate a new extent */
248 0 : data = pool_take_extent(pool, TRUE);
249 : }
250 : }
251 : else
252 : {
253 : /* re-used old freed element by chopipng the head of the free list */
254 0 : pool->head_free = *(void**)data;
255 : }
256 :
257 0 : return data;
258 : }
259 :
260 :
261 : /* ===============================================
262 : * Hash implementation custumized to be just tracking
263 : * a unique list of string (i.e no data associated
264 : * with the key, no need for retrieval, etc..
265 : *
266 : * This is tuned for the particular use-case we have here
267 : * measures in tail_build showed that
268 : * we can get north of 4000 distinct values stored in a hash
269 : * the collision rate is at worse around 2%
270 : * the collision needing an expensive memcmp to resolve
271 : * have a rate typically at 1 per 1000
272 : * for tail_build we register 37229 unique key
273 : * with a total of 377 extra memcmp needed
274 : * which is completely negligible compared to the
275 : * number of memcmp required to eliminate duplicate
276 : * entry (north of 2.5 millions for tail_build)
277 : * ===============================================
278 : */
279 :
280 : struct hash_elem
281 : {
282 : struct hash_elem* next;
283 : const char* key;
284 : int key_len;
285 : };
286 :
287 : struct hash
288 : {
289 : struct hash_elem** array;
290 : struct pool* elems_pool;
291 : int flags;
292 : unsigned int used;
293 : unsigned int size;
294 : unsigned int load_limit;
295 : #ifdef HASH_STAT
296 : int stored;
297 : int collisions;
298 : int cost;
299 : int memcmp;
300 : #endif
301 : };
302 : #define HASH_F_NO_RESIZE (1<<0)
303 :
304 : /* The following hash_compute function was adapted from :
305 : * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
306 : *
307 : * The changes from the original are mostly cosmetic
308 : */
309 : #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
310 :
311 :
312 : #if defined CORE_BIG_ENDIAN
313 : #define MASK_C1 0xFFFFFF00
314 : #define MASK_C2 0xFFFF0000
315 : #define MASK_C3 0xFF000000
316 : #elif defined CORE_LITTLE_ENDIAN
317 : #define MASK_C1 0xFFFFFF
318 : #define MASK_C2 0xFFFF
319 : #define MASK_C3 0xFF
320 : #else
321 : #error "Missing Endianness definition"
322 : #endif
323 :
324 :
325 : #define mix(a,b,c) \
326 : { \
327 : a -= c; a ^= rot(c, 4); c += b; \
328 : b -= a; b ^= rot(a, 6); a += c; \
329 : c -= b; c ^= rot(b, 8); b += a; \
330 : a -= c; a ^= rot(c,16); c += b; \
331 : b -= a; b ^= rot(a,19); a += c; \
332 : c -= b; c ^= rot(b, 4); b += a; \
333 : }
334 : #define final(a,b,c) \
335 : { \
336 : c ^= b; c -= rot(b,14); \
337 : a ^= c; a -= rot(c,11); \
338 : b ^= a; b -= rot(a,25); \
339 : c ^= b; c -= rot(b,16); \
340 : a ^= c; a -= rot(c,4); \
341 : b ^= a; b -= rot(a,14); \
342 : c ^= b; c -= rot(b,24); \
343 : }
344 :
345 0 : static unsigned int hash_compute( struct hash* hash, const char* key, int length)
346 : {
347 : unsigned int a;
348 : unsigned int b;
349 : unsigned int c; /* internal state */
350 : const unsigned char* uk = (const unsigned char*)key;
351 :
352 : /* Set up the internal state */
353 0 : a = b = c = 0xdeadbeef + (length << 2);
354 :
355 : /* we use this to 'hash' full path with mostly a common root
356 : * let's now waste too much cycles hashing mostly constant stuff
357 : */
358 0 : if(length > 36)
359 : {
360 0 : uk += length - 36;
361 : length = 36;
362 : }
363 : /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
364 0 : while (length > 12)
365 : {
366 0 : a += get_unaligned_uint(uk);
367 0 : b += get_unaligned_uint(uk+4);
368 0 : c += get_unaligned_uint(uk+8);
369 0 : mix(a,b,c);
370 0 : length -= 12;
371 0 : uk += 12;
372 : }
373 :
374 : /*----------------------------- handle the last (probably partial) block */
375 : /* Note: we possibly over-read, which would trigger complaint from VALGRIND
376 : * but we mask the undefined stuff if any, so we are still good, thanks
377 : * to alignment of memory allocation and tail-memory management overhead
378 : * we always can read 3 bytes past the official end without triggering
379 : * a segfault -- if you find a platform/compiler couple for which that postulat
380 : * is false, then you just need to over-allocate by 2 more bytes in file_load()
381 : * file_load already over-allocate by 1 to sitck a \0 at the end of the buffer.
382 : */
383 0 : switch(length)
384 : {
385 0 : case 12: c+=get_unaligned_uint(uk+8); b+=get_unaligned_uint(uk+4); a+=get_unaligned_uint(uk); break;
386 0 : case 11: c+=get_unaligned_uint(uk+8) & MASK_C1; b+=get_unaligned_uint(uk+4); a+=get_unaligned_uint(uk); break;
387 0 : case 10: c+=get_unaligned_uint(uk+8) & MASK_C2; b+=get_unaligned_uint(uk+4); a+=get_unaligned_uint(uk); break;
388 0 : case 9 : c+=get_unaligned_uint(uk+8) & MASK_C3; b+=get_unaligned_uint(uk+4); a+=get_unaligned_uint(uk); break;
389 0 : case 8 : b+=get_unaligned_uint(uk+4); a+=get_unaligned_uint(uk); break;
390 0 : case 7 : b+=get_unaligned_uint(uk+4) & MASK_C1; a+=get_unaligned_uint(uk); break;
391 0 : case 6 : b+=get_unaligned_uint(uk+4) & MASK_C2; a+=get_unaligned_uint(uk); break;
392 0 : case 5 : b+=get_unaligned_uint(uk+4) & MASK_C3; a+=get_unaligned_uint(uk); break;
393 0 : case 4 : a+=get_unaligned_uint(uk); break;
394 0 : case 3 : a+=get_unaligned_uint(uk) & MASK_C1; break;
395 0 : case 2 : a+=get_unaligned_uint(uk) & MASK_C2; break;
396 0 : case 1 : a+=get_unaligned_uint(uk) & MASK_C3; break;
397 0 : case 0 : return c & hash->size; /* zero length strings require no mixing */
398 : }
399 :
400 0 : final(a,b,c);
401 0 : return c & hash->size;
402 : }
403 :
404 0 : static void hash_destroy(struct hash* hash)
405 : {
406 0 : if(hash)
407 : {
408 0 : if(hash->array)
409 : {
410 0 : free(hash->array);
411 : }
412 0 : if(hash->elems_pool)
413 : {
414 0 : pool_destroy(hash->elems_pool);
415 : }
416 0 : free(hash);
417 : }
418 0 : }
419 :
420 0 : static struct hash* hash_create(unsigned int size)
421 : {
422 : struct hash* hash;
423 :
424 : assert(size > 0);
425 0 : hash = calloc(1, sizeof(struct hash));
426 0 : if(hash)
427 : {
428 0 : size += (size >> 2) + 1; /* ~ 75% load factor */
429 0 : if(size >= 15)
430 : {
431 0 : hash->size = (((unsigned int)0xFFFFFFFF) >> clz((unsigned int)size));
432 : }
433 : else
434 : {
435 0 : hash->size = size = 15;
436 : }
437 0 : hash->load_limit = hash->size - (hash->size >> 2);
438 0 : hash->used = 0;
439 0 : hash->array = (struct hash_elem**)calloc(hash->size + 1, sizeof(struct hash_elem*));
440 0 : if(hash->array == NULL)
441 : {
442 0 : hash_destroy(hash);
443 : hash = NULL;
444 : }
445 : }
446 0 : if(hash)
447 : {
448 0 : hash->elems_pool = pool_create(sizeof(struct hash_elem),
449 0 : size, size << 1);
450 0 : if(!hash->elems_pool)
451 : {
452 0 : hash_destroy(hash);
453 : hash = NULL;
454 : }
455 : }
456 0 : return hash;
457 : }
458 :
459 0 : static void hash_resize(struct hash* hash)
460 : {
461 0 : unsigned int old_size = hash->size;
462 : unsigned int hashed;
463 : struct hash_elem* hash_elem;
464 : struct hash_elem* next;
465 : struct hash_elem** array;
466 : unsigned int i;
467 :
468 0 : hash->size = (old_size << 1) + 1;
469 : /* we really should avoid to get there... so print a message to alert of the condition */
470 0 : fprintf(stderr, "resize hash %d -> %d\n", old_size, hash->size);
471 0 : if(hash->size == old_size)
472 : {
473 0 : hash->flags |= HASH_F_NO_RESIZE;
474 0 : return;
475 : }
476 0 : array = calloc(hash->size + 1, sizeof(struct hash_elem*));
477 0 : if(array)
478 : {
479 0 : hash->load_limit = hash->size - (hash->size >> 2);
480 0 : for(i=0; i <= old_size; i++)
481 : {
482 0 : hash_elem = (struct hash_elem*)hash->array[i];
483 0 : while(hash_elem)
484 : {
485 0 : next = hash_elem->next;
486 :
487 0 : hashed = hash_compute(hash, hash_elem->key, hash_elem->key_len);
488 0 : hash_elem->next = array[hashed];
489 0 : array[hashed] = hash_elem;
490 : hash_elem = next;
491 : }
492 : }
493 0 : free(hash->array);
494 0 : hash->array = (struct hash_elem**)array;
495 : }
496 : else
497 : {
498 0 : hash->size = old_size;
499 0 : hash->flags |= HASH_F_NO_RESIZE;
500 : }
501 : }
502 :
503 : #ifdef HASH_STAT
504 : static inline int compare_key(struct hash* hash, const char* a, const char* b, int len, int* cost)
505 : {
506 : *cost += 1;
507 : hash->memcmp += 1;
508 : return memcmp(a,b, len);
509 : }
510 : #else
511 : #define compare_key(h,a,b,l,c) memcmp(a,b,l)
512 : #endif
513 :
514 : /* a customized hash_store function that just store the key and return
515 : * TRUE if the key was effectively stored, or FALSE if the key was already there
516 : */
517 0 : static int hash_store(struct hash* hash, const char* key, int key_len)
518 : {
519 : unsigned int hashed;
520 : struct hash_elem* hash_elem;
521 : int cost = 0;
522 :
523 : (void) cost;
524 0 : hashed = hash_compute(hash, key, key_len);
525 : #ifdef HASH_STAT
526 : hash->stored += 1;
527 : #endif
528 0 : hash_elem = (struct hash_elem*)hash->array[hashed];
529 0 : while(hash_elem && (hash_elem->key_len != key_len || compare_key(hash, hash_elem->key, key, key_len, &cost)))
530 : {
531 0 : hash_elem = hash_elem->next;
532 : }
533 :
534 0 : if(!hash_elem)
535 : {
536 0 : hash_elem = pool_alloc(hash->elems_pool);
537 0 : if(hash_elem)
538 : {
539 0 : hash_elem->key = key;
540 0 : hash_elem->key_len = key_len;
541 0 : hash_elem->next = hash->array[hashed];
542 :
543 : #ifdef HASH_STAT
544 : if(hash_elem->next)
545 : {
546 : hash->collisions += 1;
547 : hash->cost += cost;
548 : }
549 : #endif
550 0 : hash->array[hashed] = hash_elem;
551 0 : hash->used += 1;
552 0 : if(hash->used > hash->load_limit)
553 : {
554 0 : hash_resize(hash);
555 : }
556 : }
557 : return TRUE;
558 : }
559 : return FALSE;
560 : }
561 :
562 0 : static int file_stat(const char* name, struct stat* buffer_stat, int* rc)
563 : {
564 : int rc_local = 0;
565 :
566 0 : rc_local = stat(name, buffer_stat);
567 0 : if (rc_local < 0)
568 : {
569 0 : *rc = errno;
570 : }
571 0 : return rc_local;
572 : }
573 :
574 0 : static off_t file_get_size(const char* name, int* rc)
575 : {
576 : struct stat buffer_stat;
577 : off_t size = -1;
578 :
579 0 : if (!file_stat(name, &buffer_stat, rc))
580 : {
581 0 : if(S_ISREG(buffer_stat.st_mode))
582 : {
583 0 : size = buffer_stat.st_size;
584 : }
585 : else
586 : {
587 0 : *rc = EINVAL;
588 : }
589 : }
590 0 : return size;
591 : }
592 :
593 0 : static char* file_load(const char* name, off_t* size, int* return_rc)
594 : {
595 0 : off_t local_size = 0;
596 0 : int rc = 0;
597 : char* buffer = NULL;
598 : int fd;
599 :
600 : assert(name != NULL);
601 :
602 0 : if(!size)
603 : {
604 : size = &local_size;
605 : }
606 0 : *size = file_get_size(name, &rc);
607 0 : if (!rc && *size >= 0)
608 : {
609 0 : fd = open(name, FILE_O_RDONLY | FILE_O_BINARY);
610 0 : if (!(fd == -1))
611 : {
612 0 : buffer = malloc((size_t)(*size + 1));
613 0 : if (buffer == NULL)
614 : {
615 0 : rc = ENOMEM;
616 : }
617 : else
618 : {
619 : ssize_t i;
620 :
621 : REDO:
622 0 : i = read(fd, buffer, (size_t)(*size));
623 0 : if(i == -1)
624 : {
625 0 : if(errno == EINTR)
626 : {
627 : goto REDO;
628 : }
629 : else
630 : {
631 0 : rc = errno;
632 : }
633 : }
634 : else
635 : {
636 0 : if (i != *size)
637 : {
638 0 : rc = EIO;
639 : }
640 : }
641 0 : buffer[*size] = 0;
642 : }
643 0 : close(fd);
644 : }
645 : }
646 :
647 0 : if(rc && buffer)
648 : {
649 0 : free(buffer);
650 : buffer = NULL;
651 : }
652 0 : if(return_rc)
653 : {
654 0 : *return_rc = rc;
655 : }
656 0 : return buffer;
657 : }
658 :
659 0 : static void _cancel_relative(char* base, char** ref_cursor, char** ref_cursor_out, char* end)
660 : {
661 0 : char* cursor = *ref_cursor;
662 0 : char* cursor_out = *ref_cursor_out;
663 :
664 : do
665 : {
666 0 : cursor += 3;
667 0 : while(cursor_out > base && cursor_out[-1] == '/')
668 0 : cursor_out--;
669 0 : while(cursor_out > base && *--cursor_out != '/');
670 : }
671 0 : while(cursor + 3 < end && !memcmp(cursor, "/../", 4));
672 0 : *ref_cursor = cursor;
673 0 : *ref_cursor_out = cursor_out;
674 0 : }
675 :
676 0 : static inline void eat_space(char ** token)
677 : {
678 0 : while ((' ' == **token) || ('\t' == **token)) {
679 0 : ++(*token);
680 : }
681 0 : }
682 :
683 : /*
684 : * Prune LibreOffice specific duplicate dependencies to improve
685 : * gnumake startup time, and shrink the disk-space footprint.
686 : */
687 : static inline int
688 0 : elide_dependency(const char* key, int key_len, const char **unpacked_end)
689 : {
690 : #if 0
691 : {
692 : int i;
693 : fprintf (stderr, "elide?%d!: '", internal_boost);
694 : for (i = 0; i < key_len; i++) {
695 : fprintf (stderr, "%c", key[i]);
696 : }
697 : fprintf (stderr, "'\n");
698 : }
699 : #endif
700 :
701 : /* boost brings a plague of header files */
702 : int i;
703 : int unpacked = 0;
704 : /* walk down path elements */
705 0 : for (i = 0; i < key_len - 1; i++)
706 : {
707 0 : if (key[i] == '/')
708 : {
709 0 : if (0 == unpacked)
710 : {
711 0 : if (!PATHNCMP(key + i + 1, "workdir/", 8))
712 : {
713 : unpacked = 1;
714 0 : continue;
715 : }
716 : }
717 : else
718 : {
719 0 : if (!PATHNCMP(key + i + 1, "UnpackedTarball/", 16))
720 : {
721 0 : if (unpacked_end)
722 0 : *unpacked_end = strchr(key + i + 17, '/');
723 : return 1;
724 : }
725 : }
726 : }
727 : }
728 :
729 : return 0;
730 : }
731 :
732 : /*
733 : * We collapse tens of internal boost headers to the unpacked target, such
734 : * that you can re-compile / install boost and all is well.
735 : */
736 0 : static void emit_single_boost_header(void)
737 : {
738 : #define BOOST_TARGET "/UnpackedTarball/boost.done"
739 0 : fprintf(stdout, "%s" BOOST_TARGET " ", work_dir);
740 0 : }
741 :
742 0 : static void emit_unpacked_target(const char* token, const char* end)
743 : {
744 0 : fwrite(token, 1, end-token, stdout);
745 0 : fputs(".done ", stdout);
746 0 : }
747 :
748 : /* prefix paths to absolute */
749 0 : static inline void print_fullpaths(char* line)
750 : {
751 : char* token;
752 : char* end;
753 : int boost_count = 0;
754 : int token_len;
755 0 : const char * unpacked_end = 0; /* end of UnpackedTarget match (if any) */
756 : /* for UnpackedTarget the target is GenC{,xx}Object, dont mangle! */
757 : int target_seen = 0;
758 :
759 0 : token = line;
760 0 : eat_space(&token);
761 0 : while (*token)
762 : {
763 : end = token;
764 : /* hard to believe that in this day and age drive letters still exist */
765 0 : if (*end && (':' == *(end+1)) &&
766 0 : (('\\' == *(end+2)) || ('/' == *(end+2))) && isalpha(*end))
767 : {
768 0 : end = end + 3; /* only one cross, err drive letter per filename */
769 : }
770 0 : while (*end && (' ' != *end) && ('\t' != *end) && (':' != *end)) {
771 0 : ++end;
772 : }
773 0 : token_len = end - token;
774 0 : if (target_seen &&
775 0 : elide_dependency(token, token_len, &unpacked_end))
776 : {
777 0 : if (unpacked_end)
778 : {
779 0 : if (internal_boost && !PATHNCMP(unpacked_end - 5, "boost", 5))
780 : {
781 0 : ++boost_count;
782 0 : if (boost_count == 1)
783 0 : emit_single_boost_header();
784 : else
785 : {
786 : /* don't output, and swallow trailing \\\n if any */
787 0 : token = end;
788 0 : eat_space(&token);
789 0 : if (token[0] == '\\' && token[1] == '\n')
790 0 : end = token + 2;
791 : }
792 : }
793 : else
794 : {
795 0 : emit_unpacked_target(token, unpacked_end);
796 : }
797 0 : unpacked_end = 0;
798 : }
799 : }
800 : else
801 : {
802 0 : if (fwrite(token, token_len, 1, stdout) != 1)
803 0 : abort();
804 0 : fputc(' ', stdout);
805 : }
806 0 : token = end;
807 0 : eat_space(&token);
808 0 : if (!target_seen)
809 : {
810 0 : if (':' == *token)
811 : {
812 : target_seen = 1;
813 0 : fputc(':', stdout);
814 0 : ++token;
815 0 : eat_space(&token);
816 : }
817 : }
818 : }
819 0 : }
820 :
821 0 : static inline char * eat_space_at_end(char * end)
822 : {
823 : char * real_end;
824 : assert('\0' == *end);
825 0 : real_end = end - 1;
826 0 : while (' ' == *real_end || '\t' == *real_end || '\n' == *real_end
827 0 : || ':' == *real_end)
828 : { /* eat colon and whitespace at end */
829 0 : --real_end;
830 : }
831 0 : return real_end;
832 : }
833 :
834 : static char* phony_content_buffer;
835 0 : static inline char* generate_phony_line(char* phony_target, char* extension)
836 : {
837 : char* src;
838 : char* dest;
839 : char* last_dot = NULL;
840 : //fprintf(stderr, "generate_phony_line called with phony_target %s and extension %s\n", phony_target, extension);
841 0 : for(dest = phony_content_buffer+work_dir_len, src = phony_target; *src != 0; ++src, ++dest)
842 : {
843 0 : *dest = *src;
844 0 : if(*dest == '.')
845 : {
846 : last_dot = dest;
847 : }
848 : }
849 : //fprintf(stderr, "generate_phony_line after phony_target copy: %s\n", phony_content_buffer);
850 0 : for(dest = last_dot+1, src = extension; *src != 0; ++src, ++dest)
851 : {
852 0 : *dest = *src;
853 : }
854 : //fprintf(stderr, "generate_phony_line after extension add: %s\n", phony_content_buffer);
855 0 : strcpy(dest, ": $(gb_Helper_PHONY)\n");
856 : //fprintf(stderr, "generate_phony_line after phony add: %s\n", phony_content_buffer);
857 0 : return phony_content_buffer;
858 : }
859 :
860 0 : static inline int generate_phony_file(char* fn, char* content)
861 : {
862 : FILE* depfile;
863 0 : depfile = fopen(fn, "w");
864 0 : if(!depfile)
865 : {
866 0 : fprintf(stderr, "Could not open '%s' for writing: %s\n", fn, strerror(errno));
867 : }
868 : else
869 : {
870 0 : fputs(content, depfile);
871 0 : fclose(depfile);
872 : }
873 0 : return !depfile;
874 : }
875 :
876 0 : static int _process(struct hash* dep_hash, char* fn)
877 : {
878 : int rc;
879 : char* buffer;
880 : char* end;
881 : char* cursor;
882 : char* cursor_out;
883 : char* base;
884 : char* created_line = NULL;
885 : char* src_relative;
886 : int continuation = 0;
887 : char last_ns = 0;
888 : off_t size;
889 :
890 0 : buffer = file_load(fn, &size, &rc);
891 : /* Note: yes we are going to leak 'buffer'
892 : * this is on purpose, to avoid cloning the 'key' out of it
893 : * and our special 'hash' just store the pointer to the key
894 : * inside of buffer, hence it need to remain allocated
895 : */
896 0 : if(!rc)
897 : {
898 0 : base = cursor_out = cursor = end = buffer;
899 0 : end += size;
900 :
901 : /* first eat unneeded space at the beginning of file
902 : */
903 0 : while(cursor < end && (*cursor == ' ' || *cursor == '\\'))
904 0 : ++cursor;
905 :
906 0 : while(cursor < end)
907 : {
908 0 : if(*cursor == '\\')
909 : {
910 : continuation = 1;
911 0 : *cursor_out++ = *cursor++;
912 : }
913 0 : else if(*cursor == '/')
914 : {
915 0 : if(cursor + 3 < end)
916 : {
917 0 : if(!memcmp(cursor, "/../", 4))
918 : {
919 0 : _cancel_relative(base, &cursor, &cursor_out, end);
920 : }
921 : }
922 0 : *cursor_out++ = *cursor++;
923 : }
924 0 : else if(*cursor == '\n')
925 : {
926 0 : if(!continuation)
927 : {
928 0 : *cursor_out = 0;
929 0 : if(base < cursor)
930 : {
931 : /* here we have a complete rule */
932 0 : if(last_ns == ':')
933 : {
934 : /* if the rule ended in ':' that is a no-dep rule
935 : * these are the one for which we want to filter
936 : * duplicate out
937 : */
938 0 : int key_len = eat_space_at_end(cursor_out) - base;
939 0 : if (!elide_dependency(base,key_len + 1, NULL)
940 0 : && hash_store(dep_hash, base, key_len))
941 : {
942 : /* DO NOT modify base after it has been added
943 : as key by hash_store */
944 0 : print_fullpaths(base);
945 0 : putc('\n', stdout);
946 : }
947 : }
948 : else
949 : {
950 : /* rule with dep, just write it */
951 0 : print_fullpaths(base);
952 0 : putc('\n', stdout);
953 : }
954 : last_ns = ' '; // cannot hurt to reset it
955 : }
956 0 : cursor += 1;
957 0 : base = cursor_out = cursor;
958 : }
959 : else
960 : {
961 : /* here we have a '\' followed by \n this is a continuation
962 : * i.e not a complete rule yet
963 : */
964 0 : *cursor_out++ = *cursor++;
965 : continuation = 0; // cancel current one (empty lines!)
966 : }
967 : }
968 : else
969 : {
970 : continuation = 0;
971 : /* not using isspace() here save 25% of I refs and 75% of D refs based on cachegrind */
972 0 : if(*cursor != ' ' && *cursor != '\n' && *cursor != '\t' )
973 : {
974 : last_ns = *cursor;
975 : }
976 0 : *cursor_out++ = *cursor++;
977 : }
978 : }
979 : /* just in case the file did not end with a \n, there may be a pending rule */
980 0 : if(base < cursor_out)
981 : {
982 0 : if(last_ns == ':')
983 : {
984 0 : int key_len = eat_space_at_end(cursor_out) - base;
985 0 : if (!elide_dependency(base,key_len + 1, NULL) &&
986 0 : hash_store(dep_hash, base, key_len))
987 : {
988 0 : puts(base);
989 0 : putc('\n', stdout);
990 : }
991 : }
992 : else
993 : {
994 0 : puts(base);
995 0 : putc('\n', stdout);
996 : }
997 : }
998 : }
999 : else
1000 : {
1001 0 : if(strncmp(fn, work_dir, work_dir_len) == 0)
1002 : {
1003 0 : if(strncmp(fn+work_dir_len, "/Dep/", 5) == 0)
1004 : {
1005 0 : src_relative = fn+work_dir_len+5;
1006 : // cases ordered by frequency
1007 0 : if(strncmp(src_relative, "CxxObject/", 10) == 0)
1008 : {
1009 0 : created_line = generate_phony_line(src_relative+10, "o");
1010 0 : rc = generate_phony_file(fn, created_line);
1011 : }
1012 0 : else if(strncmp(fn+work_dir_len+5, "SrsPartTarget/", 14) == 0)
1013 : {
1014 0 : created_line = generate_phony_line(src_relative+14, "");
1015 0 : rc = generate_phony_file(fn, created_line);
1016 : }
1017 0 : else if(strncmp(src_relative, "GenCxxObject/", 13) == 0)
1018 : {
1019 0 : created_line = generate_phony_line(src_relative+13, "o");
1020 0 : rc = generate_phony_file(fn, created_line);
1021 : }
1022 0 : else if(strncmp(src_relative, "CObject/", 8) == 0)
1023 : {
1024 0 : created_line = generate_phony_line(src_relative+8, "o");
1025 0 : rc = generate_phony_file(fn, created_line);
1026 : }
1027 0 : else if(strncmp(src_relative, "GenCObject/", 11) == 0)
1028 : {
1029 0 : created_line = generate_phony_line(src_relative+11, "o");
1030 0 : rc = generate_phony_file(fn, created_line);
1031 : }
1032 0 : else if(strncmp(src_relative, "SdiObject/", 10) == 0)
1033 : {
1034 0 : created_line = generate_phony_line(src_relative+10, "o");
1035 0 : rc = generate_phony_file(fn, created_line);
1036 : }
1037 0 : else if(strncmp(src_relative, "AsmObject/", 10) == 0)
1038 : {
1039 0 : created_line = generate_phony_line(src_relative+10, "o");
1040 0 : rc = generate_phony_file(fn, created_line);
1041 : }
1042 0 : else if(strncmp(src_relative, "ObjCxxObject/", 13) == 0)
1043 : {
1044 0 : created_line = generate_phony_line(src_relative+13, "o");
1045 0 : rc = generate_phony_file(fn, created_line);
1046 : }
1047 0 : else if(strncmp(src_relative, "ObjCObject/", 11) == 0)
1048 : {
1049 0 : created_line = generate_phony_line(src_relative+11, "o");
1050 0 : rc = generate_phony_file(fn, created_line);
1051 : }
1052 : else
1053 : {
1054 0 : fprintf(stderr, "no magic for %s(%s) in %s\n", fn, src_relative, work_dir);
1055 : }
1056 : }
1057 0 : if(!rc)
1058 : {
1059 0 : puts(created_line);
1060 : }
1061 : }
1062 : }
1063 0 : return rc;
1064 : }
1065 :
1066 0 : static void _usage(void)
1067 : {
1068 0 : fputs("Usage: concat-deps <file that contains dep_files>\n", stderr);
1069 0 : }
1070 :
1071 : #define kDEFAULT_HASH_SIZE 4096
1072 : #define PHONY_TARGET_BUFFER 4096
1073 :
1074 0 : static int get_var(char **var, const char *name)
1075 : {
1076 0 : *var = (char *)getenv(name);
1077 0 : if(!*var)
1078 : {
1079 0 : fprintf(stderr,"Error: %s is missing in the environement\n", name);
1080 0 : return 1;
1081 : }
1082 : return 0;
1083 : }
1084 :
1085 0 : int main(int argc, char** argv)
1086 : {
1087 0 : int rc = 0;
1088 0 : off_t in_list_size = 0;
1089 : char* in_list;
1090 : char* in_list_cursor;
1091 : char* in_list_base;
1092 : struct hash* dep_hash;
1093 : const char *env_str;
1094 :
1095 0 : if(argc < 2)
1096 : {
1097 0 : _usage();
1098 0 : return 1;
1099 : }
1100 0 : if(get_var(&base_dir, "SRCDIR") || get_var(&work_dir, "WORKDIR"))
1101 : return 1;
1102 0 : work_dir_len = strlen(work_dir);
1103 0 : phony_content_buffer = malloc(PHONY_TARGET_BUFFER);
1104 0 : strcpy(phony_content_buffer, work_dir);
1105 0 : phony_content_buffer[work_dir_len] = '/';
1106 :
1107 0 : env_str = getenv("SYSTEM_BOOST");
1108 0 : internal_boost = !env_str || strcmp(env_str,"TRUE");
1109 :
1110 0 : in_list = file_load(argv[1], &in_list_size, &rc);
1111 0 : if(!rc)
1112 : {
1113 0 : dep_hash = hash_create( kDEFAULT_HASH_SIZE);
1114 : in_list_base = in_list_cursor = in_list;
1115 :
1116 : /* extract filename of dep file from a 'space' separated list */
1117 0 : while(*in_list_cursor)
1118 : {
1119 0 : if(*in_list_cursor == ' ' || *in_list_cursor == '\n')
1120 : {
1121 0 : *in_list_cursor = 0;
1122 0 : if(in_list_base < in_list_cursor)
1123 : {
1124 0 : rc = _process(dep_hash, in_list_base);
1125 0 : if(rc)
1126 : {
1127 : break;
1128 : }
1129 : }
1130 0 : in_list_cursor += 1;
1131 : in_list_base = in_list_cursor;
1132 : }
1133 : else
1134 : {
1135 0 : in_list_cursor += 1;
1136 : }
1137 : }
1138 0 : if(!rc)
1139 : {
1140 : /* catch the last entry in case the input did not terminate with a 'space' */
1141 0 : if(in_list_base < in_list_cursor)
1142 : {
1143 0 : rc = _process(dep_hash, in_list_base);
1144 : }
1145 : }
1146 : #ifdef HASH_STAT
1147 : fprintf(stderr, "stats: u:%d s:%d l:%d t:%d c:%d m:%d $:%d\n",
1148 : dep_hash->used, dep_hash->size, dep_hash->load_limit, dep_hash->stored,
1149 : dep_hash->collisions, dep_hash->memcmp, dep_hash->cost);
1150 : #endif
1151 : }
1152 0 : return rc;
1153 : }
1154 :
1155 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|