Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * Copyright (C) 2011 Norbert Thiebaud
4 : * License: GPLv3
5 : */
6 :
7 : /* define to activate stats reporting on hash usage*/
8 : /* #define HASH_STAT */
9 :
10 : /* ===============================================
11 : * Set-up: defines to identify the system and system related properties
12 : * ===============================================
13 : */
14 :
15 : #ifdef __APPLE__
16 : #ifdef __x86_64__
17 : #undef CORE_BIG_ENDIAN
18 : #define CORE_LITTLE_ENDIAN
19 : #else
20 : #define CORE_BIG_ENDIAN
21 : #undef CORE_LITTLE_ENDIAN
22 : #endif
23 :
24 : #endif
25 : #ifdef _AIX
26 : #define CORE_BIG_ENDIAN
27 : #undef CORE_LITTLE_ENDIAN
28 : #endif /* Def _AIX */
29 :
30 : #ifdef _MSC_VER
31 : #define __windows
32 : #undef CORE_BIG_ENDIAN
33 : #define CORE_LITTLE_ENDIAN
34 : #endif /* Def _MSC_VER */
35 :
36 : #if defined(__linux) || defined(__OpenBSD__) || \
37 : defined(__FreeBSD__) || defined(__NetBSD__) || \
38 : defined(__DragonFly__) || defined(__FreeBSD_kernel__)
39 : #include <sys/param.h>
40 : #if __BYTE_ORDER == __LITTLE_ENDIAN
41 : #undef CORE_BIG_ENDIAN
42 : #define CORE_LITTLE_ENDIAN
43 : #else /* !(__BYTE_ORDER == __LITTLE_ENDIAN) */
44 : #if __BYTE_ORDER == __BIG_ENDIAN
45 : #define CORE_BIG_ENDIAN
46 : #undef CORE_LITTLE_ENDIAN
47 : #endif /* __BYTE_ORDER == __BIG_ENDIAN */
48 : #endif /* !(__BYTE_ORDER == __LITTLE_ENDIAN) */
49 : #endif /* Def __linux || Def *BSD */
50 :
51 : #ifdef __sun
52 : #ifdef __sparc
53 : #define CORE_BIG_ENDIAN
54 : #undef CORE_LITTLE_ENDIAN
55 : #else /* Ndef __sparc */
56 : #undef CORE_BIG_ENDIAN
57 : #define CORE_LITTLE_ENDIAN
58 : #endif /* Ndef __sparc */
59 : #endif /* Def __sun */
60 :
61 : #include <assert.h>
62 : #include <stdio.h>
63 : #include <stdlib.h>
64 : #include <sys/types.h>
65 : #include <sys/stat.h>
66 : #include <errno.h>
67 : #include <fcntl.h>
68 : #include <string.h>
69 : #include <ctype.h>
70 :
71 : #ifdef __windows
72 : #include <io.h>
73 : #else
74 : #include <unistd.h>
75 : #endif
76 :
77 : #include <config_options.h>
78 :
79 : /* modes */
80 : #ifdef __windows
81 : #define FILE_O_RDONLY _O_RDONLY
82 : #define FILE_O_BINARY _O_BINARY
83 : #define PATHNCMP _strnicmp /* MSVC converts paths to lower-case sometimes? */
84 : #define inline __inline
85 : #define ssize_t long
86 : #define S_ISREG(mode) (((mode) & _S_IFMT) == (_S_IFREG)) /* MSVC does not have this macro */
87 : #else /* not windaube */
88 : #define FILE_O_RDONLY O_RDONLY
89 : #define FILE_O_BINARY 0
90 : #define PATHNCMP strncmp
91 : #endif /* not windaube */
92 :
93 : #ifndef TRUE
94 : #define TRUE 1
95 : #endif
96 : #ifndef FALSE
97 : #define FALSE 0
98 : #endif
99 :
100 : int internal_boost = 0;
101 : static char* base_dir;
102 : static char* work_dir;
103 : size_t work_dir_len;
104 :
105 : #ifdef __GNUC__
106 : #define clz __builtin_clz
107 : #else
108 : static inline int clz(unsigned int value)
109 : {
110 : int result = 32;
111 :
112 : while(value)
113 : {
114 : value >>= 1;
115 : result -= 1;
116 : }
117 : return result;
118 : }
119 : #endif
120 :
121 32490966 : static inline unsigned int get_unaligned_uint(const unsigned char* cursor)
122 : {
123 : unsigned int result;
124 :
125 : memcpy(&result, cursor, sizeof(unsigned int));
126 32490966 : return result;
127 : }
128 :
129 : /* ===============================================
130 : * memory pool for fast fix-size allocation (non-tread-safe)
131 : * ===============================================
132 : */
133 : struct pool
134 : {
135 : void* head_free; /**< head of a linked list of freed element */
136 : char* fresh; /**< top of a memory block to dig new element */
137 : char* tail; /**< to detect end of extent... when fresh pass tail */
138 : void* extent; /**< pointer to the primary extent block */
139 : int size_elem; /**< size of an element. */
140 : int primary; /**< primary allocation in bytes */
141 : int secondary; /**< secondary allocation in bytes */
142 : };
143 : #define POOL_ALIGN_INCREMENT 8 /**< Alignement, must be a power of 2 and of size > to sizeof(void*) */
144 :
145 :
146 1231 : static void* pool_take_extent(struct pool* pool, int allocate)
147 : {
148 : unsigned int size = 0;
149 : void* extent;
150 : void* data = NULL;
151 :
152 1231 : if(pool->extent)
153 : {
154 : /* we already have an extent, so this is a secondary */
155 0 : if(pool->secondary)
156 : {
157 0 : size = pool->secondary;
158 : }
159 : }
160 : else
161 : {
162 : assert(pool->primary);
163 1231 : size = pool->primary;
164 : }
165 1231 : if(size)
166 : {
167 1231 : extent = malloc(size);
168 1231 : if(extent)
169 : {
170 1231 : *(void**)extent = pool->extent;
171 1231 : pool->extent = extent;
172 1231 : if(allocate)
173 : {
174 0 : data = ((char*)extent) + POOL_ALIGN_INCREMENT;
175 0 : pool->fresh = ((char*)data) + pool->size_elem;
176 0 : pool->tail = pool->fresh + (size - pool->size_elem);
177 : }
178 : else
179 : {
180 1231 : pool->fresh = ((char*)extent) + POOL_ALIGN_INCREMENT;
181 1231 : pool->tail = pool->fresh + (size - pool->size_elem);
182 : }
183 : }
184 : }
185 1231 : return data;
186 : }
187 :
188 : /* Create a memory pool for fix size objects
189 : * this is a simplified implementation that
190 : * is _not_ thread safe.
191 : */
192 1231 : static struct pool* pool_create(int size_elem, int primary, int secondary)
193 : {
194 : struct pool* pool;
195 :
196 : assert(primary > 0);
197 : assert(secondary >= 0);
198 : assert(size_elem > 0);
199 :
200 1231 : pool = (struct pool*)calloc(1, sizeof(struct pool));
201 1231 : if(!pool) return NULL;
202 : /* Adjust the element size so that it be aligned, and so that an element could
203 : * at least contain a void*
204 : */
205 1231 : pool->size_elem = size_elem = (size_elem + POOL_ALIGN_INCREMENT - 1) & ~(POOL_ALIGN_INCREMENT - 1);
206 :
207 1231 : pool->primary = (size_elem * primary) + POOL_ALIGN_INCREMENT;
208 1231 : pool->secondary = secondary > 0 ? (size_elem * secondary) + POOL_ALIGN_INCREMENT : 0;
209 1231 : pool_take_extent(pool, FALSE);
210 :
211 1231 : return pool;
212 :
213 : }
214 :
215 0 : static void pool_destroy(struct pool* pool)
216 : {
217 : void* extent;
218 : void* next;
219 :
220 0 : if(pool != NULL)
221 : {
222 0 : extent = pool->extent;
223 0 : while(extent)
224 : {
225 0 : next = *(void**)extent;
226 0 : free(extent);
227 : extent = next;
228 : }
229 0 : free(pool);
230 : }
231 0 : }
232 :
233 248198 : static inline void* pool_alloc(struct pool* pool)
234 : {
235 : void* data;
236 :
237 248198 : data = pool->head_free;
238 248198 : if(data == NULL)
239 : {
240 : /* we have no old-freed elem */
241 248198 : if(pool->fresh <= pool->tail)
242 : {
243 : /* pick a slice of the current extent */
244 : data = (void*)pool->fresh;
245 248198 : pool->fresh += pool->size_elem;
246 : }
247 : else
248 : {
249 : /* allocate a new extent */
250 0 : data = pool_take_extent(pool, TRUE);
251 : }
252 : }
253 : else
254 : {
255 : /* re-used old freed element by chopipng the head of the free list */
256 0 : pool->head_free = *(void**)data;
257 : }
258 :
259 248198 : return data;
260 : }
261 :
262 :
263 : /* ===============================================
264 : * Hash implementation custumized to be just tracking
265 : * a unique list of string (i.e no data associated
266 : * with the key, no need for retrieval, etc..
267 : *
268 : * This is tuned for the particular use-case we have here
269 : * measures in tail_build showed that
270 : * we can get north of 4000 distinct values stored in a hash
271 : * the collision rate is at worse around 2%
272 : * the collision needing an expensive memcmp to resolve
273 : * have a rate typically at 1 per 1000
274 : * for tail_build we register 37229 unique key
275 : * with a total of 377 extra memcmp needed
276 : * which is completely negligible compared to the
277 : * number of memcmp required to eliminate duplicate
278 : * entry (north of 2.5 millions for tail_build)
279 : * ===============================================
280 : */
281 :
282 : struct hash_elem
283 : {
284 : struct hash_elem* next;
285 : const char* key;
286 : int key_len;
287 : };
288 :
289 : struct hash
290 : {
291 : struct hash_elem** array;
292 : struct pool* elems_pool;
293 : int flags;
294 : unsigned int used;
295 : unsigned int size;
296 : unsigned int load_limit;
297 : #ifdef HASH_STAT
298 : int stored;
299 : int collisions;
300 : int cost;
301 : int memcmp;
302 : #endif
303 : };
304 : #define HASH_F_NO_RESIZE (1<<0)
305 :
306 : /* The following hash_compute function was adapted from :
307 : * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
308 : *
309 : * The changes from the original are mostly cosmetic
310 : */
311 : #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
312 :
313 :
314 : #if defined CORE_BIG_ENDIAN
315 : #define MASK_C1 0xFFFFFF00
316 : #define MASK_C2 0xFFFF0000
317 : #define MASK_C3 0xFF000000
318 : #elif defined CORE_LITTLE_ENDIAN
319 : #define MASK_C1 0xFFFFFF
320 : #define MASK_C2 0xFFFF
321 : #define MASK_C3 0xFF
322 : #else
323 : #error "Missing Endianness definition"
324 : #endif
325 :
326 :
327 : #define mix(a,b,c) \
328 : { \
329 : a -= c; a ^= rot(c, 4); c += b; \
330 : b -= a; b ^= rot(a, 6); a += c; \
331 : c -= b; c ^= rot(b, 8); b += a; \
332 : a -= c; a ^= rot(c,16); c += b; \
333 : b -= a; b ^= rot(a,19); a += c; \
334 : c -= b; c ^= rot(b, 4); b += a; \
335 : }
336 : #define final(a,b,c) \
337 : { \
338 : c ^= b; c -= rot(b,14); \
339 : a ^= c; a -= rot(c,11); \
340 : b ^= a; b -= rot(a,25); \
341 : c ^= b; c -= rot(b,16); \
342 : a ^= c; a -= rot(c,4); \
343 : b ^= a; b -= rot(a,14); \
344 : c ^= b; c -= rot(b,24); \
345 : }
346 :
347 3610144 : static unsigned int hash_compute( struct hash* hash, const char* key, int length)
348 : {
349 : unsigned int a;
350 : unsigned int b;
351 : unsigned int c; /* internal state */
352 : const unsigned char* uk = (const unsigned char*)key;
353 :
354 : /* Set up the internal state */
355 3610144 : a = b = c = 0xdeadbeef + (length << 2);
356 :
357 : /* we use this to 'hash' full path with mostly a common root
358 : * let's now waste too much cycles hashing mostly constant stuff
359 : */
360 3610144 : if(length > 36)
361 : {
362 3609473 : uk += length - 36;
363 : length = 36;
364 : }
365 : /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
366 10830432 : while (length > 12)
367 : {
368 7220288 : a += get_unaligned_uint(uk);
369 7220288 : b += get_unaligned_uint(uk+4);
370 7220288 : c += get_unaligned_uint(uk+8);
371 7220288 : mix(a,b,c);
372 7220288 : length -= 12;
373 7220288 : uk += 12;
374 : }
375 :
376 : /*----------------------------- handle the last (probably partial) block */
377 : /* Note: we possibly over-read, which would trigger complaint from VALGRIND
378 : * but we mask the undefined stuff if any, so we are still good, thanks
379 : * to alignment of memory allocation and tail-memory management overhead
380 : * we always can read 3 bytes past the official end without triggering
381 : * a segfault -- if you find a platform/compiler couple for which that postulat
382 : * is false, then you just need to over-allocate by 2 more bytes in file_load()
383 : * file_load already over-allocate by 1 to sitck a \0 at the end of the buffer.
384 : */
385 3610144 : switch(length)
386 : {
387 3609506 : case 12: c+=get_unaligned_uint(uk+8); b+=get_unaligned_uint(uk+4); a+=get_unaligned_uint(uk); break;
388 110 : case 11: c+=get_unaligned_uint(uk+8) & MASK_C1; b+=get_unaligned_uint(uk+4); a+=get_unaligned_uint(uk); break;
389 121 : case 10: c+=get_unaligned_uint(uk+8) & MASK_C2; b+=get_unaligned_uint(uk+4); a+=get_unaligned_uint(uk); break;
390 110 : case 9 : c+=get_unaligned_uint(uk+8) & MASK_C3; b+=get_unaligned_uint(uk+4); a+=get_unaligned_uint(uk); break;
391 77 : case 8 : b+=get_unaligned_uint(uk+4); a+=get_unaligned_uint(uk); break;
392 77 : case 7 : b+=get_unaligned_uint(uk+4) & MASK_C1; a+=get_unaligned_uint(uk); break;
393 77 : case 6 : b+=get_unaligned_uint(uk+4) & MASK_C2; a+=get_unaligned_uint(uk); break;
394 33 : case 5 : b+=get_unaligned_uint(uk+4) & MASK_C3; a+=get_unaligned_uint(uk); break;
395 33 : case 4 : a+=get_unaligned_uint(uk); break;
396 0 : case 3 : a+=get_unaligned_uint(uk) & MASK_C1; break;
397 0 : case 2 : a+=get_unaligned_uint(uk) & MASK_C2; break;
398 0 : case 1 : a+=get_unaligned_uint(uk) & MASK_C3; break;
399 0 : case 0 : return c & hash->size; /* zero length strings require no mixing */
400 : }
401 :
402 3610144 : final(a,b,c);
403 3610144 : return c & hash->size;
404 : }
405 :
406 0 : static void hash_destroy(struct hash* hash)
407 : {
408 0 : if(hash)
409 : {
410 0 : if(hash->array)
411 : {
412 0 : free(hash->array);
413 : }
414 0 : if(hash->elems_pool)
415 : {
416 0 : pool_destroy(hash->elems_pool);
417 : }
418 0 : free(hash);
419 : }
420 0 : }
421 :
422 1231 : static struct hash* hash_create(unsigned int size)
423 : {
424 : struct hash* hash;
425 :
426 : assert(size > 0);
427 1231 : hash = calloc(1, sizeof(struct hash));
428 1231 : if(hash)
429 : {
430 1231 : size += (size >> 2) + 1; /* ~ 75% load factor */
431 1231 : if(size >= 15)
432 : {
433 1231 : hash->size = (((unsigned int)0xFFFFFFFF) >> clz((unsigned int)size));
434 : }
435 : else
436 : {
437 0 : hash->size = size = 15;
438 : }
439 1231 : hash->load_limit = hash->size - (hash->size >> 2);
440 1231 : hash->used = 0;
441 1231 : hash->array = (struct hash_elem**)calloc(hash->size + 1, sizeof(struct hash_elem*));
442 1231 : if(hash->array == NULL)
443 : {
444 0 : hash_destroy(hash);
445 : hash = NULL;
446 : }
447 : }
448 1231 : if(hash)
449 : {
450 1231 : hash->elems_pool = pool_create(sizeof(struct hash_elem),
451 1231 : size, size << 1);
452 1231 : if(!hash->elems_pool)
453 : {
454 0 : hash_destroy(hash);
455 : hash = NULL;
456 : }
457 : }
458 1231 : return hash;
459 : }
460 :
461 0 : static void hash_resize(struct hash* hash)
462 : {
463 0 : unsigned int old_size = hash->size;
464 : unsigned int hashed;
465 : struct hash_elem* hash_elem;
466 : struct hash_elem* next;
467 : struct hash_elem** array;
468 : unsigned int i;
469 :
470 0 : hash->size = (old_size << 1) + 1;
471 : /* we really should avoid to get there... so print a message to alert of the condition */
472 0 : fprintf(stderr, "resize hash %d -> %d\n", old_size, hash->size);
473 0 : if(hash->size == old_size)
474 : {
475 0 : hash->flags |= HASH_F_NO_RESIZE;
476 0 : return;
477 : }
478 0 : array = calloc(hash->size + 1, sizeof(struct hash_elem*));
479 0 : if(array)
480 : {
481 0 : hash->load_limit = hash->size - (hash->size >> 2);
482 0 : for(i=0; i <= old_size; i++)
483 : {
484 0 : hash_elem = (struct hash_elem*)hash->array[i];
485 0 : while(hash_elem)
486 : {
487 0 : next = hash_elem->next;
488 :
489 0 : hashed = hash_compute(hash, hash_elem->key, hash_elem->key_len);
490 0 : hash_elem->next = array[hashed];
491 0 : array[hashed] = hash_elem;
492 : hash_elem = next;
493 : }
494 : }
495 0 : free(hash->array);
496 0 : hash->array = (struct hash_elem**)array;
497 : }
498 : else
499 : {
500 0 : hash->size = old_size;
501 0 : hash->flags |= HASH_F_NO_RESIZE;
502 : }
503 : }
504 :
505 : #ifdef HASH_STAT
506 : static inline int compare_key(struct hash* hash, const char* a, const char* b, int len, int* cost)
507 : {
508 : *cost += 1;
509 : hash->memcmp += 1;
510 : return memcmp(a,b, len);
511 : }
512 : #else
513 : #define compare_key(h,a,b,l,c) memcmp(a,b,l)
514 : #endif
515 :
516 : /* a customized hash_store function that just store the key and return
517 : * TRUE if the key was effectively stored, or FALSE if the key was already there
518 : */
519 3610144 : static int hash_store(struct hash* hash, const char* key, int key_len)
520 : {
521 : unsigned int hashed;
522 : struct hash_elem* hash_elem;
523 : int cost = 0;
524 :
525 : (void) cost;
526 3610144 : hashed = hash_compute(hash, key, key_len);
527 : #ifdef HASH_STAT
528 : hash->stored += 1;
529 : #endif
530 3610144 : hash_elem = (struct hash_elem*)hash->array[hashed];
531 7876257 : while(hash_elem && (hash_elem->key_len != key_len || compare_key(hash, hash_elem->key, key, key_len, &cost)))
532 : {
533 655969 : hash_elem = hash_elem->next;
534 : }
535 :
536 3610144 : if(!hash_elem)
537 : {
538 248198 : hash_elem = pool_alloc(hash->elems_pool);
539 248198 : if(hash_elem)
540 : {
541 248198 : hash_elem->key = key;
542 248198 : hash_elem->key_len = key_len;
543 248198 : hash_elem->next = hash->array[hashed];
544 :
545 : #ifdef HASH_STAT
546 : if(hash_elem->next)
547 : {
548 : hash->collisions += 1;
549 : hash->cost += cost;
550 : }
551 : #endif
552 248198 : hash->array[hashed] = hash_elem;
553 248198 : hash->used += 1;
554 248198 : if(hash->used > hash->load_limit)
555 : {
556 0 : hash_resize(hash);
557 : }
558 : }
559 : return TRUE;
560 : }
561 : return FALSE;
562 : }
563 :
564 20262 : static int file_stat(const char* name, struct stat* buffer_stat, int* rc)
565 : {
566 : int rc_local = 0;
567 :
568 20262 : rc_local = stat(name, buffer_stat);
569 20262 : if (rc_local < 0)
570 : {
571 9515 : *rc = errno;
572 : }
573 20262 : return rc_local;
574 : }
575 :
576 20262 : static off_t file_get_size(const char* name, int* rc)
577 : {
578 : struct stat buffer_stat;
579 : off_t size = -1;
580 :
581 20262 : if (!file_stat(name, &buffer_stat, rc))
582 : {
583 10747 : if(S_ISREG(buffer_stat.st_mode))
584 : {
585 10747 : size = buffer_stat.st_size;
586 : }
587 : else
588 : {
589 0 : *rc = EINVAL;
590 : }
591 : }
592 20262 : return size;
593 : }
594 :
595 : #if !ENABLE_RUNTIME_OPTIMIZATIONS
596 : static void * file_load_buffers[100000];
597 : static size_t file_load_buffer_count = 0;
598 : #endif
599 :
600 20262 : static char* file_load(const char* name, off_t* size, int* return_rc)
601 : {
602 20262 : off_t local_size = 0;
603 20262 : int rc = 0;
604 : char* buffer = NULL;
605 : int fd;
606 :
607 : assert(name != NULL);
608 :
609 20262 : if(!size)
610 : {
611 : size = &local_size;
612 : }
613 20262 : *size = file_get_size(name, &rc);
614 20262 : if (!rc && *size >= 0)
615 : {
616 : fd = open(name, FILE_O_RDONLY | FILE_O_BINARY);
617 10747 : if (!(fd == -1))
618 : {
619 10747 : buffer = malloc((size_t)(*size + 1));
620 : #if !ENABLE_RUNTIME_OPTIMIZATIONS
621 : if (buffer != NULL)
622 : {
623 : if (file_load_buffer_count == 100000)
624 : {
625 : free(buffer);
626 : buffer = NULL;
627 : }
628 : else
629 : {
630 : file_load_buffers[file_load_buffer_count++] = buffer;
631 : }
632 : }
633 : #endif
634 10747 : if (buffer == NULL)
635 : {
636 0 : rc = ENOMEM;
637 : }
638 : else
639 : {
640 : ssize_t i;
641 :
642 : REDO:
643 10747 : i = read(fd, buffer, (size_t)(*size));
644 10747 : if(i == -1)
645 : {
646 0 : if(errno == EINTR)
647 : {
648 : goto REDO;
649 : }
650 : else
651 : {
652 0 : rc = errno;
653 : }
654 : }
655 : else
656 : {
657 10747 : if (i != *size)
658 : {
659 0 : rc = EIO;
660 : }
661 : }
662 10747 : buffer[*size] = 0;
663 : }
664 10747 : close(fd);
665 : }
666 : }
667 :
668 20262 : if(rc && buffer)
669 : {
670 0 : free(buffer);
671 : buffer = NULL;
672 : }
673 20262 : if(return_rc)
674 : {
675 20262 : *return_rc = rc;
676 : }
677 20262 : return buffer;
678 : }
679 :
680 1060 : static void _cancel_relative(char* base, char** ref_cursor, char** ref_cursor_out, char* end)
681 : {
682 1060 : char* cursor = *ref_cursor;
683 1060 : char* cursor_out = *ref_cursor_out;
684 :
685 : do
686 : {
687 1160 : cursor += 3;
688 2320 : while(cursor_out > base && cursor_out[-1] == '/')
689 0 : cursor_out--;
690 6960 : while(cursor_out > base && *--cursor_out != '/');
691 : }
692 1160 : while(cursor + 3 < end && !memcmp(cursor, "/../", 4));
693 1060 : *ref_cursor = cursor;
694 1060 : *ref_cursor_out = cursor_out;
695 1060 : }
696 :
697 8105180 : static inline void eat_space(char ** token)
698 : {
699 23542399 : while ((' ' == **token) || ('\t' == **token)) {
700 7332039 : ++(*token);
701 : }
702 8105180 : }
703 :
704 : /*
705 : * Prune LibreOffice specific duplicate dependencies to improve
706 : * gnumake startup time, and shrink the disk-space footprint.
707 : */
708 : static inline int
709 10988690 : elide_dependency(const char* key, int key_len, const char **unpacked_end)
710 : {
711 : #if 0
712 : {
713 : int i;
714 : fprintf (stderr, "elide?%d!: '", internal_boost);
715 : for (i = 0; i < key_len; i++) {
716 : fprintf (stderr, "%c", key[i]);
717 : }
718 : fprintf (stderr, "'\n");
719 : }
720 : #endif
721 :
722 : /* boost brings a plague of header files */
723 : int i;
724 : int unpacked = 0;
725 : /* walk down path elements */
726 680912669 : for (i = 0; i < key_len - 1; i++)
727 : {
728 670017293 : if (key[i] == '/')
729 : {
730 84027219 : if (0 == unpacked)
731 : {
732 51075289 : if (!PATHNCMP(key + i + 1, "workdir/", 8))
733 : {
734 : unpacked = 1;
735 4158774 : continue;
736 : }
737 : }
738 : else
739 : {
740 32951930 : if (!PATHNCMP(key + i + 1, "UnpackedTarball/", 16))
741 : {
742 93314 : if (unpacked_end)
743 46806 : *unpacked_end = strchr(key + i + 17, '/');
744 : return 1;
745 : }
746 : }
747 : }
748 : }
749 :
750 : return 0;
751 : }
752 :
753 : /*
754 : * We collapse tens of internal boost headers to the unpacked target, such
755 : * that you can re-compile / install boost and all is well.
756 : */
757 0 : static void emit_single_boost_header(void)
758 : {
759 : #define BOOST_TARGET "/UnpackedTarball/boost.done"
760 0 : fprintf(stdout, "%s" BOOST_TARGET " ", work_dir);
761 0 : }
762 :
763 46806 : static void emit_unpacked_target(const char* token, const char* end)
764 : {
765 46806 : fwrite(token, 1, end-token, stdout);
766 46806 : fputs(".done ", stdout);
767 46806 : }
768 :
769 : /* prefix paths to absolute */
770 257714 : static inline void print_fullpaths(char* line)
771 : {
772 : char* token;
773 : char* end;
774 : int boost_count = 0;
775 : int token_len;
776 257714 : const char * unpacked_end = 0; /* end of UnpackedTarget match (if any) */
777 : /* for UnpackedTarget the target is GenC{,xx}Object, dont mangle! */
778 : int target_seen = 0;
779 :
780 257714 : token = line;
781 257714 : eat_space(&token);
782 8105180 : while (*token)
783 : {
784 : end = token;
785 : /* hard to believe that in this day and age drive letters still exist */
786 7589752 : if (*end && (':' == *(end+1)) &&
787 0 : (('\\' == *(end+2)) || ('/' == *(end+2))) && isalpha(*end))
788 : {
789 0 : end = end + 3; /* only one cross, err drive letter per filename */
790 : }
791 379783774 : while (*end && (' ' != *end) && ('\t' != *end) && (':' != *end)) {
792 372194022 : ++end;
793 : }
794 7589752 : token_len = end - token;
795 14921790 : if (target_seen &&
796 7332038 : elide_dependency(token, token_len, &unpacked_end))
797 : {
798 46806 : if (unpacked_end)
799 : {
800 46806 : if (internal_boost && !PATHNCMP(unpacked_end - 5, "boost", 5))
801 : {
802 0 : ++boost_count;
803 0 : if (boost_count == 1)
804 0 : emit_single_boost_header();
805 : else
806 : {
807 : /* don't output, and swallow trailing \\\n if any */
808 0 : token = end;
809 0 : eat_space(&token);
810 0 : if (token[0] == '\\' && token[1] == '\n')
811 0 : end = token + 2;
812 : }
813 : }
814 : else
815 : {
816 46806 : emit_unpacked_target(token, unpacked_end);
817 : }
818 46806 : unpacked_end = 0;
819 : }
820 : }
821 : else
822 : {
823 7542946 : if (fwrite(token, token_len, 1, stdout) != 1)
824 0 : abort();
825 7542946 : fputc(' ', stdout);
826 : }
827 7589752 : token = end;
828 7589752 : eat_space(&token);
829 7589752 : if (!target_seen)
830 : {
831 257714 : if (':' == *token)
832 : {
833 : target_seen = 1;
834 257714 : fputc(':', stdout);
835 257714 : ++token;
836 257714 : eat_space(&token);
837 : }
838 : }
839 : }
840 257714 : }
841 :
842 3656652 : static inline char * eat_space_at_end(char * end)
843 : {
844 : char * real_end;
845 : assert('\0' == *end);
846 3656652 : real_end = end - 1;
847 10969956 : while (' ' == *real_end || '\t' == *real_end || '\n' == *real_end
848 7313304 : || ':' == *real_end)
849 : { /* eat colon and whitespace at end */
850 3656652 : --real_end;
851 : }
852 3656652 : return real_end;
853 : }
854 :
855 : static char* phony_content_buffer;
856 9515 : static inline char* generate_phony_line(char* phony_target, char* extension)
857 : {
858 : char* src;
859 : char* dest;
860 : char* last_dot = NULL;
861 : //fprintf(stderr, "generate_phony_line called with phony_target %s and extension %s\n", phony_target, extension);
862 377279 : for(dest = phony_content_buffer+work_dir_len, src = phony_target; *src != 0; ++src, ++dest)
863 : {
864 367764 : *dest = *src;
865 367764 : if(*dest == '.')
866 : {
867 : last_dot = dest;
868 : }
869 : }
870 : //fprintf(stderr, "generate_phony_line after phony_target copy: %s\n", phony_content_buffer);
871 18669 : for(dest = last_dot+1, src = extension; *src != 0; ++src, ++dest)
872 : {
873 9154 : *dest = *src;
874 : }
875 : //fprintf(stderr, "generate_phony_line after extension add: %s\n", phony_content_buffer);
876 : strcpy(dest, ": $(gb_Helper_PHONY)\n");
877 : //fprintf(stderr, "generate_phony_line after phony add: %s\n", phony_content_buffer);
878 9515 : return phony_content_buffer;
879 : }
880 :
881 9515 : static inline int generate_phony_file(char* fn, char* content)
882 : {
883 : FILE* depfile;
884 9515 : depfile = fopen(fn, "w");
885 9515 : if(!depfile)
886 : {
887 0 : fprintf(stderr, "Could not open '%s' for writing: %s\n", fn, strerror(errno));
888 : }
889 : else
890 : {
891 9515 : fputs(content, depfile);
892 9515 : fclose(depfile);
893 : }
894 9515 : return !depfile;
895 : }
896 :
897 19031 : static int _process(struct hash* dep_hash, char* fn)
898 : {
899 : int rc;
900 : char* buffer;
901 : char* end;
902 : char* cursor;
903 : char* cursor_out;
904 : char* base;
905 : char* created_line = NULL;
906 : char* src_relative;
907 : int continuation = 0;
908 : char last_ns = 0;
909 : off_t size;
910 :
911 19031 : buffer = file_load(fn, &size, &rc);
912 19031 : if(!rc)
913 : {
914 9516 : base = cursor_out = cursor = end = buffer;
915 9516 : end += size;
916 :
917 : /* first eat unneeded space at the beginning of file
918 : */
919 19032 : while(cursor < end && (*cursor == ' ' || *cursor == '\\'))
920 0 : ++cursor;
921 :
922 706398175 : while(cursor < end)
923 : {
924 706388659 : if(*cursor == '\\')
925 : {
926 : continuation = 1;
927 3665870 : *cursor_out++ = *cursor++;
928 : }
929 702722789 : else if(*cursor == '/')
930 : {
931 84524738 : if(cursor + 3 < end)
932 : {
933 84524738 : if(!memcmp(cursor, "/../", 4))
934 : {
935 1060 : _cancel_relative(base, &cursor, &cursor_out, end);
936 : }
937 : }
938 84524738 : *cursor_out++ = *cursor++;
939 : }
940 618198051 : else if(*cursor == '\n')
941 : {
942 10988690 : if(!continuation)
943 : {
944 7322820 : *cursor_out = 0;
945 7322820 : if(base < cursor)
946 : {
947 : /* here we have a complete rule */
948 3666168 : if(last_ns == ':')
949 : {
950 : /* if the rule ended in ':' that is a no-dep rule
951 : * these are the one for which we want to filter
952 : * duplicate out
953 : */
954 3656652 : int key_len = eat_space_at_end(cursor_out) - base;
955 3656652 : if (!elide_dependency(base,key_len + 1, NULL)
956 3610144 : && hash_store(dep_hash, base, key_len))
957 : {
958 : /* DO NOT modify base after it has been added
959 : as key by hash_store */
960 248198 : print_fullpaths(base);
961 248198 : putc('\n', stdout);
962 : }
963 : }
964 : else
965 : {
966 : /* rule with dep, just write it */
967 9516 : print_fullpaths(base);
968 9516 : putc('\n', stdout);
969 : }
970 : last_ns = ' '; // cannot hurt to reset it
971 : }
972 7322820 : cursor += 1;
973 7322820 : base = cursor_out = cursor;
974 : }
975 : else
976 : {
977 : /* here we have a '\' followed by \n this is a continuation
978 : * i.e not a complete rule yet
979 : */
980 3665870 : *cursor_out++ = *cursor++;
981 : continuation = 0; // cancel current one (empty lines!)
982 : }
983 : }
984 : else
985 : {
986 : continuation = 0;
987 : /* not using isspace() here save 25% of I refs and 75% of D refs based on cachegrind */
988 607209361 : if(*cursor != ' ' && *cursor != '\n' && *cursor != '\t' )
989 : {
990 : last_ns = *cursor;
991 : }
992 607209361 : *cursor_out++ = *cursor++;
993 : }
994 : }
995 : /* just in case the file did not end with a \n, there may be a pending rule */
996 9516 : if(base < cursor_out)
997 : {
998 0 : if(last_ns == ':')
999 : {
1000 0 : int key_len = eat_space_at_end(cursor_out) - base;
1001 0 : if (!elide_dependency(base,key_len + 1, NULL) &&
1002 0 : hash_store(dep_hash, base, key_len))
1003 : {
1004 0 : puts(base);
1005 0 : putc('\n', stdout);
1006 : }
1007 : }
1008 : else
1009 : {
1010 0 : puts(base);
1011 0 : putc('\n', stdout);
1012 : }
1013 : }
1014 : }
1015 : else
1016 : {
1017 9515 : if(strncmp(fn, work_dir, work_dir_len) == 0)
1018 : {
1019 9515 : if(strncmp(fn+work_dir_len, "/Dep/", 5) == 0)
1020 : {
1021 9515 : src_relative = fn+work_dir_len+5;
1022 : // cases ordered by frequency
1023 9515 : if(strncmp(src_relative, "CxxObject/", 10) == 0)
1024 : {
1025 8483 : created_line = generate_phony_line(src_relative+10, "o");
1026 8483 : rc = generate_phony_file(fn, created_line);
1027 : }
1028 1032 : else if(strncmp(fn+work_dir_len+5, "SrsPartTarget/", 14) == 0)
1029 : {
1030 361 : created_line = generate_phony_line(src_relative+14, "");
1031 361 : rc = generate_phony_file(fn, created_line);
1032 : }
1033 671 : else if(strncmp(src_relative, "GenCxxObject/", 13) == 0)
1034 : {
1035 602 : created_line = generate_phony_line(src_relative+13, "o");
1036 602 : rc = generate_phony_file(fn, created_line);
1037 : }
1038 69 : else if(strncmp(src_relative, "CObject/", 8) == 0)
1039 : {
1040 38 : created_line = generate_phony_line(src_relative+8, "o");
1041 38 : rc = generate_phony_file(fn, created_line);
1042 : }
1043 31 : else if(strncmp(src_relative, "GenCObject/", 11) == 0)
1044 : {
1045 30 : created_line = generate_phony_line(src_relative+11, "o");
1046 30 : rc = generate_phony_file(fn, created_line);
1047 : }
1048 1 : else if(strncmp(src_relative, "SdiObject/", 10) == 0)
1049 : {
1050 0 : created_line = generate_phony_line(src_relative+10, "o");
1051 0 : rc = generate_phony_file(fn, created_line);
1052 : }
1053 1 : else if(strncmp(src_relative, "AsmObject/", 10) == 0)
1054 : {
1055 1 : created_line = generate_phony_line(src_relative+10, "o");
1056 1 : rc = generate_phony_file(fn, created_line);
1057 : }
1058 0 : else if(strncmp(src_relative, "ObjCxxObject/", 13) == 0)
1059 : {
1060 0 : created_line = generate_phony_line(src_relative+13, "o");
1061 0 : rc = generate_phony_file(fn, created_line);
1062 : }
1063 0 : else if(strncmp(src_relative, "ObjCObject/", 11) == 0)
1064 : {
1065 0 : created_line = generate_phony_line(src_relative+11, "o");
1066 0 : rc = generate_phony_file(fn, created_line);
1067 : }
1068 : else
1069 : {
1070 0 : fprintf(stderr, "no magic for %s(%s) in %s\n", fn, src_relative, work_dir);
1071 : }
1072 : }
1073 9515 : if(!rc)
1074 : {
1075 9515 : puts(created_line);
1076 : }
1077 : }
1078 : }
1079 : /* Note: yes we are going to leak 'buffer'
1080 : * this is on purpose, to avoid cloning the 'key' out of it and our special
1081 : * 'hash' just store the pointer to the key inside of buffer, hence it need
1082 : * to remain allocated
1083 : * coverity[leaked_storage] - this is on purpose
1084 : */
1085 19031 : return rc;
1086 : }
1087 :
1088 0 : static void _usage(void)
1089 : {
1090 0 : fputs("Usage: concat-deps <file that contains dep_files>\n", stderr);
1091 0 : }
1092 :
1093 : #define kDEFAULT_HASH_SIZE 4096
1094 : #define PHONY_TARGET_BUFFER 4096
1095 :
1096 2462 : static int get_var(char **var, const char *name)
1097 : {
1098 2462 : *var = (char *)getenv(name);
1099 2462 : if(!*var)
1100 : {
1101 0 : fprintf(stderr,"Error: %s is missing in the environment\n", name);
1102 0 : return 1;
1103 : }
1104 : return 0;
1105 : }
1106 :
1107 1231 : int main(int argc, char** argv)
1108 : {
1109 1231 : int rc = 0;
1110 1231 : off_t in_list_size = 0;
1111 : char* in_list;
1112 : char* in_list_cursor;
1113 : char* in_list_base;
1114 : struct hash* dep_hash = 0;
1115 : const char *env_str;
1116 :
1117 1231 : if(argc < 2)
1118 : {
1119 0 : _usage();
1120 0 : return 1;
1121 : }
1122 1231 : if(get_var(&base_dir, "SRCDIR") || get_var(&work_dir, "WORKDIR"))
1123 : return 1;
1124 1231 : work_dir_len = strlen(work_dir);
1125 1231 : phony_content_buffer = malloc(PHONY_TARGET_BUFFER);
1126 : strcpy(phony_content_buffer, work_dir);
1127 1231 : phony_content_buffer[work_dir_len] = '/';
1128 :
1129 1231 : env_str = getenv("SYSTEM_BOOST");
1130 1231 : internal_boost = !env_str || strcmp(env_str,"TRUE");
1131 :
1132 1231 : in_list = file_load(argv[1], &in_list_size, &rc);
1133 1231 : if(!rc)
1134 : {
1135 1231 : dep_hash = hash_create( kDEFAULT_HASH_SIZE);
1136 : in_list_base = in_list_cursor = in_list;
1137 :
1138 : /* extract filename of dep file from a 'space' separated list */
1139 1876281 : while(*in_list_cursor)
1140 : {
1141 : /* the input here may contain Win32 \r\n EOL */
1142 1873819 : if(*in_list_cursor == ' '
1143 3719575 : || *in_list_cursor == '\n' || *in_list_cursor == '\r')
1144 : {
1145 28063 : *in_list_cursor = 0;
1146 28063 : if(in_list_base < in_list_cursor)
1147 : {
1148 19031 : rc = _process(dep_hash, in_list_base);
1149 19031 : if(rc)
1150 : {
1151 : break;
1152 : }
1153 : }
1154 28063 : in_list_cursor += 1;
1155 28063 : in_list_base = in_list_cursor;
1156 : }
1157 : else
1158 : {
1159 1845756 : in_list_cursor += 1;
1160 : }
1161 : }
1162 1231 : if(!rc)
1163 : {
1164 : /* catch the last entry in case the input did not terminate with a 'space' */
1165 1231 : if(in_list_base < in_list_cursor)
1166 : {
1167 0 : rc = _process(dep_hash, in_list_base);
1168 : }
1169 : }
1170 : #ifdef HASH_STAT
1171 : fprintf(stderr, "stats: u:%d s:%d l:%d t:%d c:%d m:%d $:%d\n",
1172 : dep_hash->used, dep_hash->size, dep_hash->load_limit, dep_hash->stored,
1173 : dep_hash->collisions, dep_hash->memcmp, dep_hash->cost);
1174 : #endif
1175 : }
1176 : #if !ENABLE_RUNTIME_OPTIMIZATIONS
1177 : hash_destroy(dep_hash);
1178 : for (size_t i = 0; i != file_load_buffer_count; ++i)
1179 : {
1180 : free(file_load_buffers[i]);
1181 : }
1182 : #endif
1183 1231 : return rc;
1184 : }
1185 :
1186 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|