Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * Copyright (C) 2011 Norbert Thiebaud
4 : * License: GPLv3
5 : */
6 :
7 : /* define to activate stats reporting on hash usage*/
8 : /* #define HASH_STAT */
9 :
10 : /* ===============================================
11 : * Set-up: defines to identify the system and system related properties
12 : * ===============================================
13 : */
14 :
15 : #ifdef __APPLE__
16 : #ifdef __x86_64__
17 : #undef CORE_BIG_ENDIAN
18 : #define CORE_LITTLE_ENDIAN
19 : #else
20 : #define CORE_BIG_ENDIAN
21 : #undef CORE_LITTLE_ENDIAN
22 : #endif
23 :
24 : #endif
25 : #ifdef _AIX
26 : #define CORE_BIG_ENDIAN
27 : #undef CORE_LITTLE_ENDIAN
28 : #endif /* Def _AIX */
29 :
30 : #ifdef _MSC_VER
31 : #define __windows
32 : #undef CORE_BIG_ENDIAN
33 : #define CORE_LITTLE_ENDIAN
34 : #endif /* Def _MSC_VER */
35 :
36 : #if defined(__linux) || defined(__OpenBSD__) || \
37 : defined(__FreeBSD__) || defined(__NetBSD__) || \
38 : defined(__DragonFly__) || defined(__FreeBSD_kernel__)
39 : #include <sys/param.h>
40 : #if __BYTE_ORDER == __LITTLE_ENDIAN
41 : #undef CORE_BIG_ENDIAN
42 : #define CORE_LITTLE_ENDIAN
43 : #else /* !(__BYTE_ORDER == __LITTLE_ENDIAN) */
44 : #if __BYTE_ORDER == __BIG_ENDIAN
45 : #define CORE_BIG_ENDIAN
46 : #undef CORE_LITTLE_ENDIAN
47 : #endif /* __BYTE_ORDER == __BIG_ENDIAN */
48 : #endif /* !(__BYTE_ORDER == __LITTLE_ENDIAN) */
49 : #endif /* Def __linux || Def *BSD */
50 :
51 : #ifdef __sun
52 : #ifdef __sparc
53 : #define CORE_BIG_ENDIAN
54 : #undef CORE_LITTLE_ENDIAN
55 : #else /* Ndef __sparc */
56 : #undef CORE_BIG_ENDIAN
57 : #define CORE_LITTLE_ENDIAN
58 : #endif /* Ndef __sparc */
59 : #endif /* Def __sun */
60 :
61 : #include <assert.h>
62 : #include <stdio.h>
63 : #include <stdlib.h>
64 : #include <sys/types.h>
65 : #include <sys/stat.h>
66 : #include <errno.h>
67 : #include <fcntl.h>
68 : #include <string.h>
69 : #include <ctype.h>
70 :
71 : #ifdef __windows
72 : #include <io.h>
73 : #else
74 : #include <unistd.h>
75 : #endif
76 :
77 : #include <config_options.h>
78 :
79 : /* modes */
80 : #ifdef __windows
81 : #define FILE_O_RDONLY _O_RDONLY
82 : #define FILE_O_BINARY _O_BINARY
83 : #define PATHNCMP _strnicmp /* MSVC converts paths to lower-case sometimes? */
84 : #define inline __inline
85 : #define ssize_t long
86 : #define S_ISREG(mode) (((mode) & _S_IFMT) == (_S_IFREG)) /* MSVC does not have this macro */
87 : #else /* not windaube */
88 : #define FILE_O_RDONLY O_RDONLY
89 : #define FILE_O_BINARY 0
90 : #define PATHNCMP strncmp
91 : #endif /* not windaube */
92 :
93 : #ifndef TRUE
94 : #define TRUE 1
95 : #endif
96 : #ifndef FALSE
97 : #define FALSE 0
98 : #endif
99 :
100 : int internal_boost = 0;
101 : static char* base_dir;
102 : static char* work_dir;
103 : size_t work_dir_len;
104 :
105 : #ifdef __GNUC__
106 : #define clz __builtin_clz
107 : #else
108 : static inline int clz(unsigned int value)
109 : {
110 : int result = 32;
111 :
112 : while(value)
113 : {
114 : value >>= 1;
115 : result -= 1;
116 : }
117 : return result;
118 : }
119 : #endif
120 :
121 : static inline unsigned int get_unaligned_uint(const unsigned char* cursor)
122 : {
123 : unsigned int result;
124 :
125 32487556 : memcpy(&result, cursor, sizeof(unsigned int));
126 : return result;
127 : }
128 :
129 : /* ===============================================
130 : * memory pool for fast fix-size allocation (non-tread-safe)
131 : * ===============================================
132 : */
133 : struct pool
134 : {
135 : void* head_free; /**< head of a linked list of freed element */
136 : char* fresh; /**< top of a memory block to dig new element */
137 : char* tail; /**< to detect end of extent... when fresh pass tail */
138 : void* extent; /**< pointer to the primary extent block */
139 : int size_elem; /**< size of an element. */
140 : int primary; /**< primary allocation in bytes */
141 : int secondary; /**< secondary allocation in bytes */
142 : };
143 : #define POOL_ALIGN_INCREMENT 8 /**< Alignement, must be a power of 2 and of size > to sizeof(void*) */
144 :
145 :
146 1225 : static void* pool_take_extent(struct pool* pool, int allocate)
147 : {
148 : unsigned int size = 0;
149 : void* extent;
150 : void* data = NULL;
151 :
152 1225 : if(pool->extent)
153 : {
154 : /* we already have an extent, so this is a secondary */
155 0 : if(pool->secondary)
156 : {
157 0 : size = pool->secondary;
158 : }
159 : }
160 : else
161 : {
162 : assert(pool->primary);
163 1225 : size = pool->primary;
164 : }
165 1225 : if(size)
166 : {
167 1225 : extent = malloc(size);
168 1225 : if(extent)
169 : {
170 1225 : *(void**)extent = pool->extent;
171 1225 : pool->extent = extent;
172 1225 : if(allocate)
173 : {
174 0 : data = ((char*)extent) + POOL_ALIGN_INCREMENT;
175 0 : pool->fresh = ((char*)data) + pool->size_elem;
176 0 : pool->tail = pool->fresh + (size - pool->size_elem);
177 : }
178 : else
179 : {
180 1225 : pool->fresh = ((char*)extent) + POOL_ALIGN_INCREMENT;
181 1225 : pool->tail = pool->fresh + (size - pool->size_elem);
182 : }
183 : }
184 : }
185 1225 : return data;
186 : }
187 :
188 : /* Create a memory pool for fix size objects
189 : * this is a simplified implementation that
190 : * is _not_ thread safe.
191 : */
192 1225 : static struct pool* pool_create(int size_elem, int primary, int secondary)
193 : {
194 : struct pool* pool;
195 :
196 : assert(primary > 0);
197 : assert(secondary >= 0);
198 : assert(size_elem > 0);
199 :
200 1225 : pool = (struct pool*)calloc(1, sizeof(struct pool));
201 1225 : if(!pool) return NULL;
202 : /* Adjust the element size so that it be aligned, and so that an element could
203 : * at least contain a void*
204 : */
205 1225 : pool->size_elem = size_elem = (size_elem + POOL_ALIGN_INCREMENT - 1) & ~(POOL_ALIGN_INCREMENT - 1);
206 :
207 1225 : pool->primary = (size_elem * primary) + POOL_ALIGN_INCREMENT;
208 1225 : pool->secondary = secondary > 0 ? (size_elem * secondary) + POOL_ALIGN_INCREMENT : 0;
209 1225 : pool_take_extent(pool, FALSE);
210 :
211 1225 : return pool;
212 :
213 : }
214 :
215 0 : static void pool_destroy(struct pool* pool)
216 : {
217 : void* extent;
218 : void* next;
219 :
220 0 : if(pool != NULL)
221 : {
222 0 : extent = pool->extent;
223 0 : while(extent)
224 : {
225 0 : next = *(void**)extent;
226 0 : free(extent);
227 : extent = next;
228 : }
229 0 : free(pool);
230 : }
231 0 : }
232 :
233 239717 : static inline void* pool_alloc(struct pool* pool)
234 : {
235 : void* data;
236 :
237 239717 : data = pool->head_free;
238 239717 : if(data == NULL)
239 : {
240 : /* we have no old-freed elem */
241 239717 : if(pool->fresh <= pool->tail)
242 : {
243 : /* pick a slice of the current extent */
244 : data = (void*)pool->fresh;
245 239717 : pool->fresh += pool->size_elem;
246 : }
247 : else
248 : {
249 : /* allocate a new extent */
250 0 : data = pool_take_extent(pool, TRUE);
251 : }
252 : }
253 : else
254 : {
255 : /* re-used old freed element by chopipng the head of the free list */
256 0 : pool->head_free = *(void**)data;
257 : }
258 :
259 239717 : return data;
260 : }
261 :
262 :
263 : /* ===============================================
264 : * Hash implementation custumized to be just tracking
265 : * a unique list of string (i.e no data associated
266 : * with the key, no need for retrieval, etc..
267 : *
268 : * This is tuned for the particular use-case we have here
269 : * measures in tail_build showed that
270 : * we can get north of 4000 distinct values stored in a hash
271 : * the collision rate is at worse around 2%
272 : * the collision needing an expensive memcmp to resolve
273 : * have a rate typically at 1 per 1000
274 : * for tail_build we register 37229 unique key
275 : * with a total of 377 extra memcmp needed
276 : * which is completely negligible compared to the
277 : * number of memcmp required to eliminate duplicate
278 : * entry (north of 2.5 millions for tail_build)
279 : * ===============================================
280 : */
281 :
282 : struct hash_elem
283 : {
284 : struct hash_elem* next;
285 : const char* key;
286 : int key_len;
287 : };
288 :
289 : struct hash
290 : {
291 : struct hash_elem** array;
292 : struct pool* elems_pool;
293 : int flags;
294 : unsigned int used;
295 : unsigned int size;
296 : unsigned int load_limit;
297 : #ifdef HASH_STAT
298 : int stored;
299 : int collisions;
300 : int cost;
301 : int memcmp;
302 : #endif
303 : };
304 : #define HASH_F_NO_RESIZE (1<<0)
305 :
306 : /* The following hash_compute function was adapted from :
307 : * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
308 : *
309 : * The changes from the original are mostly cosmetic
310 : */
311 : #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
312 :
313 :
314 : #if defined CORE_BIG_ENDIAN
315 : #define MASK_C1 0xFFFFFF00
316 : #define MASK_C2 0xFFFF0000
317 : #define MASK_C3 0xFF000000
318 : #elif defined CORE_LITTLE_ENDIAN
319 : #define MASK_C1 0xFFFFFF
320 : #define MASK_C2 0xFFFF
321 : #define MASK_C3 0xFF
322 : #else
323 : #error "Missing Endianness definition"
324 : #endif
325 :
326 :
327 : #define mix(a,b,c) \
328 : { \
329 : a -= c; a ^= rot(c, 4); c += b; \
330 : b -= a; b ^= rot(a, 6); a += c; \
331 : c -= b; c ^= rot(b, 8); b += a; \
332 : a -= c; a ^= rot(c,16); c += b; \
333 : b -= a; b ^= rot(a,19); a += c; \
334 : c -= b; c ^= rot(b, 4); b += a; \
335 : }
336 : #define final(a,b,c) \
337 : { \
338 : c ^= b; c -= rot(b,14); \
339 : a ^= c; a -= rot(c,11); \
340 : b ^= a; b -= rot(a,25); \
341 : c ^= b; c -= rot(b,16); \
342 : a ^= c; a -= rot(c,4); \
343 : b ^= a; b -= rot(a,14); \
344 : c ^= b; c -= rot(b,24); \
345 : }
346 :
347 3609759 : static unsigned int hash_compute( struct hash* hash, const char* key, int length)
348 : {
349 : unsigned int a;
350 : unsigned int b;
351 : unsigned int c; /* internal state */
352 : const unsigned char* uk = (const unsigned char*)key;
353 :
354 : /* Set up the internal state */
355 3609759 : a = b = c = 0xdeadbeef + (length << 2);
356 :
357 : /* we use this to 'hash' full path with mostly a common root
358 : * let's now waste too much cycles hashing mostly constant stuff
359 : */
360 3609759 : if(length > 36)
361 : {
362 3609069 : uk += length - 36;
363 : length = 36;
364 : }
365 : /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
366 10829277 : while (length > 12)
367 : {
368 7219518 : a += get_unaligned_uint(uk);
369 7219518 : b += get_unaligned_uint(uk+4);
370 7219518 : c += get_unaligned_uint(uk+8);
371 7219518 : mix(a,b,c);
372 7219518 : length -= 12;
373 7219518 : uk += 12;
374 : }
375 :
376 : /*----------------------------- handle the last (probably partial) block */
377 : /* Note: we possibly over-read, which would trigger complaint from VALGRIND
378 : * but we mask the undefined stuff if any, so we are still good, thanks
379 : * to alignment of memory allocation and tail-memory management overhead
380 : * we always can read 3 bytes past the official end without triggering
381 : * a segfault -- if you find a platform/compiler couple for which that postulat
382 : * is false, then you just need to over-allocate by 2 more bytes in file_load()
383 : * file_load already over-allocate by 1 to sitck a \0 at the end of the buffer.
384 : */
385 3609759 : switch(length)
386 : {
387 10827570 : case 12: c+=get_unaligned_uint(uk+8); b+=get_unaligned_uint(uk+4); a+=get_unaligned_uint(uk); break;
388 363 : case 11: c+=get_unaligned_uint(uk+8) & MASK_C1; b+=get_unaligned_uint(uk+4); a+=get_unaligned_uint(uk); break;
389 333 : case 10: c+=get_unaligned_uint(uk+8) & MASK_C2; b+=get_unaligned_uint(uk+4); a+=get_unaligned_uint(uk); break;
390 243 : case 9 : c+=get_unaligned_uint(uk+8) & MASK_C3; b+=get_unaligned_uint(uk+4); a+=get_unaligned_uint(uk); break;
391 154 : case 8 : b+=get_unaligned_uint(uk+4); a+=get_unaligned_uint(uk); break;
392 186 : case 7 : b+=get_unaligned_uint(uk+4) & MASK_C1; a+=get_unaligned_uint(uk); break;
393 68 : case 6 : b+=get_unaligned_uint(uk+4) & MASK_C2; a+=get_unaligned_uint(uk); break;
394 66 : case 5 : b+=get_unaligned_uint(uk+4) & MASK_C3; a+=get_unaligned_uint(uk); break;
395 18 : case 4 : a+=get_unaligned_uint(uk); break;
396 0 : case 3 : a+=get_unaligned_uint(uk) & MASK_C1; break;
397 0 : case 2 : a+=get_unaligned_uint(uk) & MASK_C2; break;
398 1 : case 1 : a+=get_unaligned_uint(uk) & MASK_C3; break;
399 0 : case 0 : return c & hash->size; /* zero length strings require no mixing */
400 : }
401 :
402 3609759 : final(a,b,c);
403 3609759 : return c & hash->size;
404 : }
405 :
406 0 : static void hash_destroy(struct hash* hash)
407 : {
408 0 : if(hash)
409 : {
410 0 : if(hash->array)
411 : {
412 0 : free(hash->array);
413 : }
414 0 : if(hash->elems_pool)
415 : {
416 0 : pool_destroy(hash->elems_pool);
417 : }
418 0 : free(hash);
419 : }
420 0 : }
421 :
422 1225 : static struct hash* hash_create(unsigned int size)
423 : {
424 : struct hash* hash;
425 :
426 : assert(size > 0);
427 1225 : hash = calloc(1, sizeof(struct hash));
428 1225 : if(hash)
429 : {
430 1225 : size += (size >> 2) + 1; /* ~ 75% load factor */
431 1225 : if(size >= 15)
432 : {
433 1225 : hash->size = (((unsigned int)0xFFFFFFFF) >> clz((unsigned int)size));
434 : }
435 : else
436 : {
437 0 : hash->size = size = 15;
438 : }
439 1225 : hash->load_limit = hash->size - (hash->size >> 2);
440 1225 : hash->used = 0;
441 1225 : hash->array = (struct hash_elem**)calloc(hash->size + 1, sizeof(struct hash_elem*));
442 1225 : if(hash->array == NULL)
443 : {
444 0 : hash_destroy(hash);
445 : hash = NULL;
446 : }
447 : }
448 1225 : if(hash)
449 : {
450 1225 : hash->elems_pool = pool_create(sizeof(struct hash_elem),
451 1225 : size, size << 1);
452 1225 : if(!hash->elems_pool)
453 : {
454 0 : hash_destroy(hash);
455 : hash = NULL;
456 : }
457 : }
458 1225 : return hash;
459 : }
460 :
461 0 : static void hash_resize(struct hash* hash)
462 : {
463 0 : unsigned int old_size = hash->size;
464 : unsigned int hashed;
465 : struct hash_elem* hash_elem;
466 : struct hash_elem* next;
467 : struct hash_elem** array;
468 : unsigned int i;
469 :
470 0 : hash->size = (old_size << 1) + 1;
471 : /* we really should avoid to get there... so print a message to alert of the condition */
472 0 : fprintf(stderr, "resize hash %d -> %d\n", old_size, hash->size);
473 0 : if(hash->size == old_size)
474 : {
475 0 : hash->flags |= HASH_F_NO_RESIZE;
476 0 : return;
477 : }
478 0 : array = calloc(hash->size + 1, sizeof(struct hash_elem*));
479 0 : if(array)
480 : {
481 0 : hash->load_limit = hash->size - (hash->size >> 2);
482 0 : for(i=0; i <= old_size; i++)
483 : {
484 0 : hash_elem = (struct hash_elem*)hash->array[i];
485 0 : while(hash_elem)
486 : {
487 0 : next = hash_elem->next;
488 :
489 0 : hashed = hash_compute(hash, hash_elem->key, hash_elem->key_len);
490 0 : hash_elem->next = array[hashed];
491 0 : array[hashed] = hash_elem;
492 : hash_elem = next;
493 : }
494 : }
495 0 : free(hash->array);
496 0 : hash->array = (struct hash_elem**)array;
497 : }
498 : else
499 : {
500 0 : hash->size = old_size;
501 0 : hash->flags |= HASH_F_NO_RESIZE;
502 : }
503 : }
504 :
505 : #ifdef HASH_STAT
506 : static inline int compare_key(struct hash* hash, const char* a, const char* b, int len, int* cost)
507 : {
508 : *cost += 1;
509 : hash->memcmp += 1;
510 : return memcmp(a,b, len);
511 : }
512 : #else
513 : #define compare_key(h,a,b,l,c) memcmp(a,b,l)
514 : #endif
515 :
516 : /* a customized hash_store function that just store the key and return
517 : * TRUE if the key was effectively stored, or FALSE if the key was already there
518 : */
519 3609759 : static int hash_store(struct hash* hash, const char* key, int key_len)
520 : {
521 : unsigned int hashed;
522 : struct hash_elem* hash_elem;
523 : int cost = 0;
524 :
525 : (void) cost;
526 3609759 : hashed = hash_compute(hash, key, key_len);
527 : #ifdef HASH_STAT
528 : hash->stored += 1;
529 : #endif
530 3609759 : hash_elem = (struct hash_elem*)hash->array[hashed];
531 7860467 : while(hash_elem && (hash_elem->key_len != key_len || compare_key(hash, hash_elem->key, key, key_len, &cost)))
532 : {
533 640949 : hash_elem = hash_elem->next;
534 : }
535 :
536 3609759 : if(!hash_elem)
537 : {
538 239717 : hash_elem = pool_alloc(hash->elems_pool);
539 239717 : if(hash_elem)
540 : {
541 239717 : hash_elem->key = key;
542 239717 : hash_elem->key_len = key_len;
543 239717 : hash_elem->next = hash->array[hashed];
544 :
545 : #ifdef HASH_STAT
546 : if(hash_elem->next)
547 : {
548 : hash->collisions += 1;
549 : hash->cost += cost;
550 : }
551 : #endif
552 239717 : hash->array[hashed] = hash_elem;
553 239717 : hash->used += 1;
554 239717 : if(hash->used > hash->load_limit)
555 : {
556 0 : hash_resize(hash);
557 : }
558 : }
559 : return TRUE;
560 : }
561 : return FALSE;
562 : }
563 :
564 20374 : static int file_stat(const char* name, struct stat* buffer_stat, int* rc)
565 : {
566 : int rc_local = 0;
567 :
568 : rc_local = stat(name, buffer_stat);
569 20374 : if (rc_local < 0)
570 : {
571 9483 : *rc = errno;
572 : }
573 20374 : return rc_local;
574 : }
575 :
576 20374 : static off_t file_get_size(const char* name, int* rc)
577 : {
578 : struct stat buffer_stat;
579 : off_t size = -1;
580 :
581 20374 : if (!file_stat(name, &buffer_stat, rc))
582 : {
583 10891 : if(S_ISREG(buffer_stat.st_mode))
584 : {
585 10891 : size = buffer_stat.st_size;
586 : }
587 : else
588 : {
589 0 : *rc = EINVAL;
590 : }
591 : }
592 20374 : return size;
593 : }
594 :
595 : #if !ENABLE_RUNTIME_OPTIMIZATIONS
596 : static void * file_load_buffers[100000];
597 : static size_t file_load_buffer_count = 0;
598 : #endif
599 :
600 20374 : static char* file_load(const char* name, off_t* size, int* return_rc)
601 : {
602 20374 : off_t local_size = 0;
603 20374 : int rc = 0;
604 : char* buffer = NULL;
605 : int fd;
606 :
607 : assert(name != NULL);
608 :
609 20374 : if(!size)
610 : {
611 : size = &local_size;
612 : }
613 20374 : *size = file_get_size(name, &rc);
614 20374 : if (!rc && *size >= 0)
615 : {
616 10891 : fd = open(name, FILE_O_RDONLY | FILE_O_BINARY);
617 10891 : if (!(fd == -1))
618 : {
619 10891 : buffer = malloc((size_t)(*size + 1));
620 : #if !ENABLE_RUNTIME_OPTIMIZATIONS
621 : if (buffer != NULL)
622 : {
623 : if (file_load_buffer_count == 100000)
624 : {
625 : free(buffer);
626 : buffer = NULL;
627 : }
628 : else
629 : {
630 : file_load_buffers[file_load_buffer_count++] = buffer;
631 : }
632 : }
633 : #endif
634 10891 : if (buffer == NULL)
635 : {
636 0 : rc = ENOMEM;
637 : }
638 : else
639 : {
640 : ssize_t i;
641 :
642 : REDO:
643 10891 : i = read(fd, buffer, (size_t)(*size));
644 10891 : if(i == -1)
645 : {
646 0 : if(errno == EINTR)
647 : {
648 : goto REDO;
649 : }
650 : else
651 : {
652 0 : rc = errno;
653 : }
654 : }
655 : else
656 : {
657 10891 : if (i != *size)
658 : {
659 0 : rc = EIO;
660 : }
661 : }
662 10891 : buffer[*size] = 0;
663 : }
664 10891 : close(fd);
665 : }
666 : }
667 :
668 20374 : if(rc && buffer)
669 : {
670 0 : free(buffer);
671 : buffer = NULL;
672 : }
673 20374 : if(return_rc)
674 : {
675 20374 : *return_rc = rc;
676 : }
677 20374 : return buffer;
678 : }
679 :
680 1026 : static void _cancel_relative(char* base, char** ref_cursor, char** ref_cursor_out, char* end)
681 : {
682 1026 : char* cursor = *ref_cursor;
683 1026 : char* cursor_out = *ref_cursor_out;
684 :
685 : do
686 : {
687 1122 : cursor += 3;
688 2244 : while(cursor_out > base && cursor_out[-1] == '/')
689 0 : cursor_out--;
690 6684 : while(cursor_out > base && *--cursor_out != '/');
691 : }
692 1122 : while(cursor + 3 < end && !memcmp(cursor, "/../", 4));
693 1026 : *ref_cursor = cursor;
694 1026 : *ref_cursor_out = cursor_out;
695 1026 : }
696 :
697 : static inline void eat_space(char ** token)
698 : {
699 15338418 : while ((' ' == **token) || ('\t' == **token)) {
700 7295135 : ++(*token);
701 : }
702 : }
703 :
704 : /*
705 : * Prune LibreOffice specific duplicate dependencies to improve
706 : * gnumake startup time, and shrink the disk-space footprint.
707 : */
708 : static inline int
709 10933172 : elide_dependency(const char* key, int key_len, const char **unpacked_end)
710 : {
711 : #if 0
712 : {
713 : int i;
714 : fprintf (stderr, "elide?%d!: '", internal_boost);
715 : for (i = 0; i < key_len; i++) {
716 : fprintf (stderr, "%c", key[i]);
717 : }
718 : fprintf (stderr, "'\n");
719 : }
720 : #endif
721 :
722 : /* boost brings a plague of header files */
723 : int i;
724 : int unpacked = 0;
725 : /* walk down path elements */
726 615430230 : for (i = 0; i < key_len - 1; i++)
727 : {
728 604553855 : if (key[i] == '/')
729 : {
730 76606805 : if (0 == unpacked)
731 : {
732 43547758 : if (!PATHNCMP(key + i + 1, "workdir/", 8))
733 : {
734 : unpacked = 1;
735 4139831 : continue;
736 : }
737 : }
738 : else
739 : {
740 33059047 : if (!PATHNCMP(key + i + 1, "UnpackedTarball/", 16))
741 : {
742 56797 : if (unpacked_end)
743 28518 : *unpacked_end = strchr(key + i + 17, '/');
744 : return 1;
745 : }
746 : }
747 : }
748 : }
749 :
750 : return 0;
751 : }
752 :
753 : /*
754 : * We collapse tens of internal boost headers to the unpacked target, such
755 : * that you can re-compile / install boost and all is well.
756 : */
757 : static void emit_single_boost_header(void)
758 : {
759 : #define BOOST_TARGET "/UnpackedTarball/boost.done"
760 0 : fprintf(stdout, "%s" BOOST_TARGET " ", work_dir);
761 : }
762 :
763 28518 : static void emit_unpacked_target(const char* token, const char* end)
764 : {
765 28518 : fwrite(token, 1, end-token, stdout);
766 28518 : fputs(".done ", stdout);
767 28518 : }
768 :
769 : /* prefix paths to absolute */
770 249383 : static inline void print_fullpaths(char* line)
771 : {
772 : char* token;
773 : char* end;
774 : int boost_count = 0;
775 : int token_len;
776 249383 : const char * unpacked_end = 0; /* end of UnpackedTarget match (if any) */
777 : /* for UnpackedTarget the target is GenC{,xx}Object, dont mangle! */
778 : int target_seen = 0;
779 :
780 : token = line;
781 : eat_space(&token);
782 7793900 : while (*token)
783 : {
784 : end = token;
785 : /* hard to believe that in this day and age drive letters still exist */
786 7544517 : if (*end && (':' == *(end+1)) &&
787 0 : (('\\' == *(end+2)) || ('/' == *(end+2))) && isalpha(*end))
788 : {
789 0 : end = end + 3; /* only one cross, err drive letter per filename */
790 : }
791 343044788 : while (*end && (' ' != *end) && ('\t' != *end) && (':' != *end)) {
792 335500271 : ++end;
793 : }
794 7544517 : token_len = end - token;
795 14839651 : if (target_seen &&
796 7295134 : elide_dependency(token, token_len, &unpacked_end))
797 : {
798 28518 : if (unpacked_end)
799 : {
800 28518 : if (internal_boost && !PATHNCMP(unpacked_end - 5, "boost", 5))
801 : {
802 0 : ++boost_count;
803 0 : if (boost_count == 1)
804 : emit_single_boost_header();
805 : else
806 : {
807 : /* don't output, and swallow trailing \\\n if any */
808 0 : token = end;
809 : eat_space(&token);
810 0 : if (token[0] == '\\' && token[1] == '\n')
811 0 : end = token + 2;
812 : }
813 : }
814 : else
815 : {
816 28518 : emit_unpacked_target(token, unpacked_end);
817 : }
818 28518 : unpacked_end = 0;
819 : }
820 : }
821 : else
822 : {
823 7515999 : if (fwrite(token, token_len, 1, stdout) != 1)
824 0 : abort();
825 7515999 : fputc(' ', stdout);
826 : }
827 : token = end;
828 : eat_space(&token);
829 7544517 : if (!target_seen)
830 : {
831 249383 : if (':' == *token)
832 : {
833 : target_seen = 1;
834 249383 : fputc(':', stdout);
835 249383 : ++token;
836 : eat_space(&token);
837 : }
838 : }
839 : }
840 249383 : }
841 :
842 : static inline char * eat_space_at_end(char * end)
843 : {
844 : char * real_end;
845 : assert('\0' == *end);
846 3638038 : real_end = end - 1;
847 7276076 : while (' ' == *real_end || '\t' == *real_end || '\n' == *real_end
848 7276076 : || ':' == *real_end)
849 : { /* eat colon and whitespace at end */
850 3638038 : --real_end;
851 : }
852 3638038 : return real_end;
853 : }
854 :
855 : static char* phony_content_buffer;
856 9483 : static inline char* generate_phony_line(char* phony_target, char* extension)
857 : {
858 : char* src;
859 : char* dest;
860 : char* last_dot = NULL;
861 : //fprintf(stderr, "generate_phony_line called with phony_target %s and extension %s\n", phony_target, extension);
862 376449 : for(dest = phony_content_buffer+work_dir_len, src = phony_target; *src != 0; ++src, ++dest)
863 : {
864 366966 : *dest = *src;
865 366966 : if(*dest == '.')
866 : {
867 : last_dot = dest;
868 : }
869 : }
870 : //fprintf(stderr, "generate_phony_line after phony_target copy: %s\n", phony_content_buffer);
871 18603 : for(dest = last_dot+1, src = extension; *src != 0; ++src, ++dest)
872 : {
873 9120 : *dest = *src;
874 : }
875 : //fprintf(stderr, "generate_phony_line after extension add: %s\n", phony_content_buffer);
876 9483 : strcpy(dest, ": $(gb_Helper_PHONY)\n");
877 : //fprintf(stderr, "generate_phony_line after phony add: %s\n", phony_content_buffer);
878 9483 : return phony_content_buffer;
879 : }
880 :
881 9483 : static inline int generate_phony_file(char* fn, char* content)
882 : {
883 : FILE* depfile;
884 9483 : depfile = fopen(fn, "w");
885 9483 : if(!depfile)
886 : {
887 0 : fprintf(stderr, "Could not open '%s' for writing: %s\n", fn, strerror(errno));
888 : }
889 : else
890 : {
891 9483 : fputs(content, depfile);
892 9483 : fclose(depfile);
893 : }
894 9483 : return !depfile;
895 : }
896 :
897 19149 : static int _process(struct hash* dep_hash, char* fn)
898 : {
899 : int rc;
900 : char* buffer;
901 : char* end;
902 : char* cursor;
903 : char* cursor_out;
904 : char* base;
905 : char* created_line = NULL;
906 : char* src_relative;
907 : int continuation = 0;
908 : char last_ns = 0;
909 : off_t size;
910 :
911 19149 : buffer = file_load(fn, &size, &rc);
912 : /* Note: yes we are going to leak 'buffer'
913 : * this is on purpose, to avoid cloning the 'key' out of it
914 : * and our special 'hash' just store the pointer to the key
915 : * inside of buffer, hence it need to remain allocated
916 : */
917 19149 : if(!rc)
918 : {
919 9666 : base = cursor_out = cursor = end = buffer;
920 9666 : end += size;
921 :
922 : /* first eat unneeded space at the beginning of file
923 : */
924 19332 : while(cursor < end && (*cursor == ' ' || *cursor == '\\'))
925 0 : ++cursor;
926 :
927 639024268 : while(cursor < end)
928 : {
929 639014602 : if(*cursor == '\\')
930 : {
931 : continuation = 1;
932 3647430 : *cursor_out++ = *cursor++;
933 : }
934 635367172 : else if(*cursor == '/')
935 : {
936 76947010 : if(cursor + 3 < end)
937 : {
938 76947010 : if(!memcmp(cursor, "/../", 4))
939 : {
940 1026 : _cancel_relative(base, &cursor, &cursor_out, end);
941 : }
942 : }
943 76947010 : *cursor_out++ = *cursor++;
944 : }
945 558420162 : else if(*cursor == '\n')
946 : {
947 10933172 : if(!continuation)
948 : {
949 7285742 : *cursor_out = 0;
950 7285742 : if(base < cursor)
951 : {
952 : /* here we have a complete rule */
953 3647704 : if(last_ns == ':')
954 : {
955 : /* if the rule ended in ':' that is a no-dep rule
956 : * these are the one for which we want to filter
957 : * duplicate out
958 : */
959 7276076 : int key_len = eat_space_at_end(cursor_out) - base;
960 3638038 : if (!elide_dependency(base,key_len + 1, NULL)
961 3609759 : && hash_store(dep_hash, base, key_len))
962 : {
963 : /* DO NOT modify base after it has been added
964 : as key by hash_store */
965 239717 : print_fullpaths(base);
966 239717 : putc('\n', stdout);
967 : }
968 : }
969 : else
970 : {
971 : /* rule with dep, just write it */
972 9666 : print_fullpaths(base);
973 9666 : putc('\n', stdout);
974 : }
975 : last_ns = ' '; // cannot hurt to reset it
976 : }
977 7285742 : cursor += 1;
978 7285742 : base = cursor_out = cursor;
979 : }
980 : else
981 : {
982 : /* here we have a '\' followed by \n this is a continuation
983 : * i.e not a complete rule yet
984 : */
985 3647430 : *cursor_out++ = *cursor++;
986 : continuation = 0; // cancel current one (empty lines!)
987 : }
988 : }
989 : else
990 : {
991 : continuation = 0;
992 : /* not using isspace() here save 25% of I refs and 75% of D refs based on cachegrind */
993 547486990 : if(*cursor != ' ' && *cursor != '\n' && *cursor != '\t' )
994 : {
995 : last_ns = *cursor;
996 : }
997 547486990 : *cursor_out++ = *cursor++;
998 : }
999 : }
1000 : /* just in case the file did not end with a \n, there may be a pending rule */
1001 9666 : if(base < cursor_out)
1002 : {
1003 0 : if(last_ns == ':')
1004 : {
1005 0 : int key_len = eat_space_at_end(cursor_out) - base;
1006 0 : if (!elide_dependency(base,key_len + 1, NULL) &&
1007 0 : hash_store(dep_hash, base, key_len))
1008 : {
1009 0 : puts(base);
1010 0 : putc('\n', stdout);
1011 : }
1012 : }
1013 : else
1014 : {
1015 0 : puts(base);
1016 0 : putc('\n', stdout);
1017 : }
1018 : }
1019 : }
1020 : else
1021 : {
1022 9483 : if(strncmp(fn, work_dir, work_dir_len) == 0)
1023 : {
1024 9483 : if(strncmp(fn+work_dir_len, "/Dep/", 5) == 0)
1025 : {
1026 9483 : src_relative = fn+work_dir_len+5;
1027 : // cases ordered by frequency
1028 9483 : if(strncmp(src_relative, "CxxObject/", 10) == 0)
1029 : {
1030 8497 : created_line = generate_phony_line(src_relative+10, "o");
1031 8497 : rc = generate_phony_file(fn, created_line);
1032 : }
1033 986 : else if(strncmp(fn+work_dir_len+5, "SrsPartTarget/", 14) == 0)
1034 : {
1035 363 : created_line = generate_phony_line(src_relative+14, "");
1036 363 : rc = generate_phony_file(fn, created_line);
1037 : }
1038 623 : else if(strncmp(src_relative, "GenCxxObject/", 13) == 0)
1039 : {
1040 539 : created_line = generate_phony_line(src_relative+13, "o");
1041 539 : rc = generate_phony_file(fn, created_line);
1042 : }
1043 84 : else if(strncmp(src_relative, "CObject/", 8) == 0)
1044 : {
1045 53 : created_line = generate_phony_line(src_relative+8, "o");
1046 53 : rc = generate_phony_file(fn, created_line);
1047 : }
1048 31 : else if(strncmp(src_relative, "GenCObject/", 11) == 0)
1049 : {
1050 30 : created_line = generate_phony_line(src_relative+11, "o");
1051 30 : rc = generate_phony_file(fn, created_line);
1052 : }
1053 1 : else if(strncmp(src_relative, "SdiObject/", 10) == 0)
1054 : {
1055 0 : created_line = generate_phony_line(src_relative+10, "o");
1056 0 : rc = generate_phony_file(fn, created_line);
1057 : }
1058 1 : else if(strncmp(src_relative, "AsmObject/", 10) == 0)
1059 : {
1060 1 : created_line = generate_phony_line(src_relative+10, "o");
1061 1 : rc = generate_phony_file(fn, created_line);
1062 : }
1063 0 : else if(strncmp(src_relative, "ObjCxxObject/", 13) == 0)
1064 : {
1065 0 : created_line = generate_phony_line(src_relative+13, "o");
1066 0 : rc = generate_phony_file(fn, created_line);
1067 : }
1068 0 : else if(strncmp(src_relative, "ObjCObject/", 11) == 0)
1069 : {
1070 0 : created_line = generate_phony_line(src_relative+11, "o");
1071 0 : rc = generate_phony_file(fn, created_line);
1072 : }
1073 : else
1074 : {
1075 0 : fprintf(stderr, "no magic for %s(%s) in %s\n", fn, src_relative, work_dir);
1076 : }
1077 : }
1078 9483 : if(!rc)
1079 : {
1080 9483 : puts(created_line);
1081 : }
1082 : }
1083 : }
1084 19149 : return rc;
1085 : }
1086 :
1087 : static void _usage(void)
1088 : {
1089 0 : fputs("Usage: concat-deps <file that contains dep_files>\n", stderr);
1090 : }
1091 :
1092 : #define kDEFAULT_HASH_SIZE 4096
1093 : #define PHONY_TARGET_BUFFER 4096
1094 :
1095 2450 : static int get_var(char **var, const char *name)
1096 : {
1097 2450 : *var = (char *)getenv(name);
1098 2450 : if(!*var)
1099 : {
1100 0 : fprintf(stderr,"Error: %s is missing in the environement\n", name);
1101 0 : return 1;
1102 : }
1103 : return 0;
1104 : }
1105 :
1106 1225 : int main(int argc, char** argv)
1107 : {
1108 1225 : int rc = 0;
1109 1225 : off_t in_list_size = 0;
1110 : char* in_list;
1111 : char* in_list_cursor;
1112 : char* in_list_base;
1113 : struct hash* dep_hash = 0;
1114 : const char *env_str;
1115 :
1116 1225 : if(argc < 2)
1117 : {
1118 : _usage();
1119 0 : return 1;
1120 : }
1121 1225 : if(get_var(&base_dir, "SRCDIR") || get_var(&work_dir, "WORKDIR"))
1122 : return 1;
1123 1225 : work_dir_len = strlen(work_dir);
1124 1225 : phony_content_buffer = malloc(PHONY_TARGET_BUFFER);
1125 1225 : strcpy(phony_content_buffer, work_dir);
1126 1225 : phony_content_buffer[work_dir_len] = '/';
1127 :
1128 1225 : env_str = getenv("SYSTEM_BOOST");
1129 1225 : internal_boost = !env_str || strcmp(env_str,"TRUE");
1130 :
1131 1225 : in_list = file_load(argv[1], &in_list_size, &rc);
1132 1225 : if(!rc)
1133 : {
1134 1225 : dep_hash = hash_create( kDEFAULT_HASH_SIZE);
1135 : in_list_base = in_list_cursor = in_list;
1136 :
1137 : /* extract filename of dep file from a 'space' separated list */
1138 1715259 : while(*in_list_cursor)
1139 : {
1140 : /* the input here may contain Win32 \r\n EOL */
1141 1712809 : if(*in_list_cursor == ' '
1142 3397489 : || *in_list_cursor == '\n' || *in_list_cursor == '\r')
1143 : {
1144 28129 : *in_list_cursor = 0;
1145 28129 : if(in_list_base < in_list_cursor)
1146 : {
1147 19149 : rc = _process(dep_hash, in_list_base);
1148 19149 : if(rc)
1149 : {
1150 : break;
1151 : }
1152 : }
1153 28129 : in_list_cursor += 1;
1154 28129 : in_list_base = in_list_cursor;
1155 : }
1156 : else
1157 : {
1158 1684680 : in_list_cursor += 1;
1159 : }
1160 : }
1161 1225 : if(!rc)
1162 : {
1163 : /* catch the last entry in case the input did not terminate with a 'space' */
1164 1225 : if(in_list_base < in_list_cursor)
1165 : {
1166 0 : rc = _process(dep_hash, in_list_base);
1167 : }
1168 : }
1169 : #ifdef HASH_STAT
1170 : fprintf(stderr, "stats: u:%d s:%d l:%d t:%d c:%d m:%d $:%d\n",
1171 : dep_hash->used, dep_hash->size, dep_hash->load_limit, dep_hash->stored,
1172 : dep_hash->collisions, dep_hash->memcmp, dep_hash->cost);
1173 : #endif
1174 : }
1175 : #if !ENABLE_RUNTIME_OPTIMIZATIONS
1176 : hash_destroy(dep_hash);
1177 : for (size_t i = 0; i != file_load_buffer_count; ++i)
1178 : {
1179 : free(file_load_buffers[i]);
1180 : }
1181 : #endif
1182 1225 : return rc;
1183 : }
1184 :
1185 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|