Geant4 11.1.1
Toolkit for the simulation of the passage of particles through matter
Loading...
Searching...
No Matches
crc32.c
Go to the documentation of this file.
1/* crc32.c -- compute the CRC-32 of a data stream
2 * Copyright (C) 1995-2022 Mark Adler
3 * For conditions of distribution and use, see copyright notice in zlib.h
4 *
5 * This interleaved implementation of a CRC makes use of pipelined multiple
6 * arithmetic-logic units, commonly found in modern CPU cores. It is due to
7 * Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution.
8 */
9
10
11/*
12 Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore
13 protection on the static variables used to control the first-use generation
14 of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should
15 first call get_crc_table() to initialize the tables before allowing more than
16 one thread to use crc32().
17
18 MAKECRCH can be #defined to write out crc32.h. A main() routine is also
19 produced, so that this one source file can be compiled to an executable.
20 */
21
22#ifdef MAKECRCH
23# include <stdio.h>
24# ifndef DYNAMIC_CRC_TABLE
25# define DYNAMIC_CRC_TABLE
26# endif /* !DYNAMIC_CRC_TABLE */
27#endif /* MAKECRCH */
28
29#include "zutil.h" /* for Z_U4, Z_U8, z_crc_t, and FAR definitions */
30
31 /*
32 A CRC of a message is computed on N braids of words in the message, where
33 each word consists of W bytes (4 or 8). If N is 3, for example, then three
34 running sparse CRCs are calculated respectively on each braid, at these
35 indices in the array of words: 0, 3, 6, ..., 1, 4, 7, ..., and 2, 5, 8, ...
36 This is done starting at a word boundary, and continues until as many blocks
37 of N * W bytes as are available have been processed. The results are combined
38 into a single CRC at the end. For this code, N must be in the range 1..6 and
39 W must be 4 or 8. The upper limit on N can be increased if desired by adding
40 more #if blocks, extending the patterns apparent in the code. In addition,
41 crc32.h would need to be regenerated, if the maximum N value is increased.
42
43 N and W are chosen empirically by benchmarking the execution time on a given
44 processor. The choices for N and W below were based on testing on Intel Kaby
45 Lake i7, AMD Ryzen 7, ARM Cortex-A57, Sparc64-VII, PowerPC POWER9, and MIPS64
46 Octeon II processors. The Intel, AMD, and ARM processors were all fastest
47 with N=5, W=8. The Sparc, PowerPC, and MIPS64 were all fastest at N=5, W=4.
48 They were all tested with either gcc or clang, all using the -O3 optimization
49 level. Your mileage may vary.
50 */
51
52/* Define N */
53#ifdef Z_TESTN
54# define N Z_TESTN
55#else
56# define N 5
57#endif
58#if N < 1 || N > 6
59# error N must be in 1..6
60#endif
61
62/*
63 z_crc_t must be at least 32 bits. z_word_t must be at least as long as
64 z_crc_t. It is assumed here that z_word_t is either 32 bits or 64 bits, and
65 that bytes are eight bits.
66 */
67
68/*
69 Define W and the associated z_word_t type. If W is not defined, then a
70 braided calculation is not used, and the associated tables and code are not
71 compiled.
72 */
73#ifdef Z_TESTW
74# if Z_TESTW-1 != -1
75# define W Z_TESTW
76# endif
77#else
78# ifdef MAKECRCH
79# define W 8 /* required for MAKECRCH */
80# else
81# if defined(__x86_64__) || defined(__aarch64__)
82# define W 8
83# else
84# define W 4
85# endif
86# endif
87#endif
88#ifdef W
89# if W == 8 && defined(Z_U8)
90 typedef Z_U8 z_word_t;
91# elif defined(Z_U4)
92# undef W
93# define W 4
94 typedef Z_U4 z_word_t;
95# else
96# undef W
97# endif
98#endif
99
100/* Local functions. */
101local z_crc_t multmodp OF((z_crc_t a, z_crc_t b));
102local z_crc_t x2nmodp OF((z_off64_t n, unsigned k));
103
104/* If available, use the ARM processor CRC32 instruction. */
105#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) && W == 8
106# define ARMCRC32
107#endif
108
109#if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE))
110/*
111 Swap the bytes in a z_word_t to convert between little and big endian. Any
112 self-respecting compiler will optimize this to a single machine byte-swap
113 instruction, if one is available. This assumes that word_t is either 32 bits
114 or 64 bits.
115 */
116local z_word_t byte_swap(word)
117 z_word_t word;
118{
119# if W == 8
120 return
121 (word & 0xff00000000000000) >> 56 |
122 (word & 0xff000000000000) >> 40 |
123 (word & 0xff0000000000) >> 24 |
124 (word & 0xff00000000) >> 8 |
125 (word & 0xff000000) << 8 |
126 (word & 0xff0000) << 24 |
127 (word & 0xff00) << 40 |
128 (word & 0xff) << 56;
129# else /* W == 4 */
130 return
131 (word & 0xff000000) >> 24 |
132 (word & 0xff0000) >> 8 |
133 (word & 0xff00) << 8 |
134 (word & 0xff) << 24;
135# endif
136}
137#endif
138
139/* CRC polynomial. */
140#define POLY 0xedb88320 /* p(x) reflected, with x^32 implied */
141
142#ifdef DYNAMIC_CRC_TABLE
143
144local z_crc_t FAR crc_table[256];
145local z_crc_t FAR x2n_table[32];
146local void make_crc_table OF((void));
147#ifdef W
148 local z_word_t FAR crc_big_table[256];
149 local z_crc_t FAR crc_braid_table[W][256];
150 local z_word_t FAR crc_braid_big_table[W][256];
151 local void braid OF((z_crc_t [][256], z_word_t [][256], int, int));
152#endif
153#ifdef MAKECRCH
154 local void write_table OF((FILE *, const z_crc_t FAR *, int));
155 local void write_table32hi OF((FILE *, const z_word_t FAR *, int));
156 local void write_table64 OF((FILE *, const z_word_t FAR *, int));
157#endif /* MAKECRCH */
158
159/*
160 Define a once() function depending on the availability of atomics. If this is
161 compiled with DYNAMIC_CRC_TABLE defined, and if CRCs will be computed in
162 multiple threads, and if atomics are not available, then get_crc_table() must
163 be called to initialize the tables and must return before any threads are
164 allowed to compute or combine CRCs.
165 */
166
167/* Definition of once functionality. */
168typedef struct once_s once_t;
169local void once OF((once_t *, void (*)(void)));
170
171/* Check for the availability of atomics. */
172#if defined(__STDC__) && __STDC_VERSION__ >= 201112L && \
173 !defined(__STDC_NO_ATOMICS__)
174
175#include <stdatomic.h>
176
177/* Structure for once(), which must be initialized with ONCE_INIT. */
178struct once_s {
179 atomic_flag begun;
180 atomic_int done;
181};
182#define ONCE_INIT {ATOMIC_FLAG_INIT, 0}
183
184/*
185 Run the provided init() function exactly once, even if multiple threads
186 invoke once() at the same time. The state must be a once_t initialized with
187 ONCE_INIT.
188 */
189local void once(state, init)
190 once_t *state;
191 void (*init)(void);
192{
193 if (!atomic_load(&state->done)) {
194 if (atomic_flag_test_and_set(&state->begun))
195 while (!atomic_load(&state->done))
196 ;
197 else {
198 init();
199 atomic_store(&state->done, 1);
200 }
201 }
202}
203
204#else /* no atomics */
205
206/* Structure for once(), which must be initialized with ONCE_INIT. */
207struct once_s {
208 volatile int begun;
209 volatile int done;
210};
211#define ONCE_INIT {0, 0}
212
213/* Test and set. Alas, not atomic, but tries to minimize the period of
214 vulnerability. */
215local int test_and_set OF((int volatile *));
216local int test_and_set(flag)
217 int volatile *flag;
218{
219 int was;
220
221 was = *flag;
222 *flag = 1;
223 return was;
224}
225
226/* Run the provided init() function once. This is not thread-safe. */
227local void once(state, init)
228 once_t *state;
229 void (*init)(void);
230{
231 if (!state->done) {
232 if (test_and_set(&state->begun))
233 while (!state->done)
234 ;
235 else {
236 init();
237 state->done = 1;
238 }
239 }
240}
241
242#endif
243
244/* State for once(). */
245local once_t made = ONCE_INIT;
246
247/*
248 Generate tables for a byte-wise 32-bit CRC calculation on the polynomial:
249 x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1.
250
251 Polynomials over GF(2) are represented in binary, one bit per coefficient,
252 with the lowest powers in the most significant bit. Then adding polynomials
253 is just exclusive-or, and multiplying a polynomial by x is a right shift by
254 one. If we call the above polynomial p, and represent a byte as the
255 polynomial q, also with the lowest power in the most significant bit (so the
256 byte 0xb1 is the polynomial x^7+x^3+x^2+1), then the CRC is (q*x^32) mod p,
257 where a mod b means the remainder after dividing a by b.
258
259 This calculation is done using the shift-register method of multiplying and
260 taking the remainder. The register is initialized to zero, and for each
261 incoming bit, x^32 is added mod p to the register if the bit is a one (where
262 x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by x
263 (which is shifting right by one and adding x^32 mod p if the bit shifted out
264 is a one). We start with the highest power (least significant bit) of q and
265 repeat for all eight bits of q.
266
267 The table is simply the CRC of all possible eight bit values. This is all the
268 information needed to generate CRCs on data a byte at a time for all
269 combinations of CRC register values and incoming bytes.
270 */
271
272local void make_crc_table()
273{
274 unsigned i, j, n;
275 z_crc_t p;
276
277 /* initialize the CRC of bytes tables */
278 for (i = 0; i < 256; i++) {
279 p = i;
280 for (j = 0; j < 8; j++)
281 p = p & 1 ? (p >> 1) ^ POLY : p >> 1;
282 crc_table[i] = p;
283#ifdef W
284 crc_big_table[i] = byte_swap(p);
285#endif
286 }
287
288 /* initialize the x^2^n mod p(x) table */
289 p = (z_crc_t)1 << 30; /* x^1 */
290 x2n_table[0] = p;
291 for (n = 1; n < 32; n++)
292 x2n_table[n] = p = multmodp(p, p);
293
294#ifdef W
295 /* initialize the braiding tables -- needs x2n_table[] */
296 braid(crc_braid_table, crc_braid_big_table, N, W);
297#endif
298
299#ifdef MAKECRCH
300 {
301 /*
302 The crc32.h header file contains tables for both 32-bit and 64-bit
303 z_word_t's, and so requires a 64-bit type be available. In that case,
304 z_word_t must be defined to be 64-bits. This code then also generates
305 and writes out the tables for the case that z_word_t is 32 bits.
306 */
307#if !defined(W) || W != 8
308# error Need a 64-bit integer type in order to generate crc32.h.
309#endif
310 FILE *out;
311 int k, n;
312 z_crc_t ltl[8][256];
313 z_word_t big[8][256];
314
315 out = fopen("crc32.h", "w");
316 if (out == NULL) return;
317
318 /* write out little-endian CRC table to crc32.h */
319 fprintf(out,
320 "/* crc32.h -- tables for rapid CRC calculation\n"
321 " * Generated automatically by crc32.c\n */\n"
322 "\n"
323 "local const z_crc_t FAR crc_table[] = {\n"
324 " ");
325 write_table(out, crc_table, 256);
326 fprintf(out,
327 "};\n");
328
329 /* write out big-endian CRC table for 64-bit z_word_t to crc32.h */
330 fprintf(out,
331 "\n"
332 "#ifdef W\n"
333 "\n"
334 "#if W == 8\n"
335 "\n"
336 "local const z_word_t FAR crc_big_table[] = {\n"
337 " ");
338 write_table64(out, crc_big_table, 256);
339 fprintf(out,
340 "};\n");
341
342 /* write out big-endian CRC table for 32-bit z_word_t to crc32.h */
343 fprintf(out,
344 "\n"
345 "#else /* W == 4 */\n"
346 "\n"
347 "local const z_word_t FAR crc_big_table[] = {\n"
348 " ");
349 write_table32hi(out, crc_big_table, 256);
350 fprintf(out,
351 "};\n"
352 "\n"
353 "#endif\n");
354
355 /* write out braid tables for each value of N */
356 for (n = 1; n <= 6; n++) {
357 fprintf(out,
358 "\n"
359 "#if N == %d\n", n);
360
361 /* compute braid tables for this N and 64-bit word_t */
362 braid(ltl, big, n, 8);
363
364 /* write out braid tables for 64-bit z_word_t to crc32.h */
365 fprintf(out,
366 "\n"
367 "#if W == 8\n"
368 "\n"
369 "local const z_crc_t FAR crc_braid_table[][256] = {\n");
370 for (k = 0; k < 8; k++) {
371 fprintf(out, " {");
372 write_table(out, ltl[k], 256);
373 fprintf(out, "}%s", k < 7 ? ",\n" : "");
374 }
375 fprintf(out,
376 "};\n"
377 "\n"
378 "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
379 for (k = 0; k < 8; k++) {
380 fprintf(out, " {");
381 write_table64(out, big[k], 256);
382 fprintf(out, "}%s", k < 7 ? ",\n" : "");
383 }
384 fprintf(out,
385 "};\n");
386
387 /* compute braid tables for this N and 32-bit word_t */
388 braid(ltl, big, n, 4);
389
390 /* write out braid tables for 32-bit z_word_t to crc32.h */
391 fprintf(out,
392 "\n"
393 "#else /* W == 4 */\n"
394 "\n"
395 "local const z_crc_t FAR crc_braid_table[][256] = {\n");
396 for (k = 0; k < 4; k++) {
397 fprintf(out, " {");
398 write_table(out, ltl[k], 256);
399 fprintf(out, "}%s", k < 3 ? ",\n" : "");
400 }
401 fprintf(out,
402 "};\n"
403 "\n"
404 "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
405 for (k = 0; k < 4; k++) {
406 fprintf(out, " {");
407 write_table32hi(out, big[k], 256);
408 fprintf(out, "}%s", k < 3 ? ",\n" : "");
409 }
410 fprintf(out,
411 "};\n"
412 "\n"
413 "#endif\n"
414 "\n"
415 "#endif\n");
416 }
417 fprintf(out,
418 "\n"
419 "#endif\n");
420
421 /* write out zeros operator table to crc32.h */
422 fprintf(out,
423 "\n"
424 "local const z_crc_t FAR x2n_table[] = {\n"
425 " ");
426 write_table(out, x2n_table, 32);
427 fprintf(out,
428 "};\n");
429 fclose(out);
430 }
431#endif /* MAKECRCH */
432}
433
434#ifdef MAKECRCH
435
436/*
437 Write the 32-bit values in table[0..k-1] to out, five per line in
438 hexadecimal separated by commas.
439 */
440local void write_table(out, table, k)
441 FILE *out;
442 const z_crc_t FAR *table;
443 int k;
444{
445 int n;
446
447 for (n = 0; n < k; n++)
448 fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ",
449 (unsigned long)(table[n]),
450 n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
451}
452
453/*
454 Write the high 32-bits of each value in table[0..k-1] to out, five per line
455 in hexadecimal separated by commas.
456 */
457local void write_table32hi(out, table, k)
458FILE *out;
459const z_word_t FAR *table;
460int k;
461{
462 int n;
463
464 for (n = 0; n < k; n++)
465 fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ",
466 (unsigned long)(table[n] >> 32),
467 n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
468}
469
470/*
471 Write the 64-bit values in table[0..k-1] to out, three per line in
472 hexadecimal separated by commas. This assumes that if there is a 64-bit
473 type, then there is also a long long integer type, and it is at least 64
474 bits. If not, then the type cast and format string can be adjusted
475 accordingly.
476 */
477local void write_table64(out, table, k)
478 FILE *out;
479 const z_word_t FAR *table;
480 int k;
481{
482 int n;
483
484 for (n = 0; n < k; n++)
485 fprintf(out, "%s0x%016llx%s", n == 0 || n % 3 ? "" : " ",
486 (unsigned long long)(table[n]),
487 n == k - 1 ? "" : (n % 3 == 2 ? ",\n" : ", "));
488}
489
490/* Actually do the deed. */
491int main()
492{
493 make_crc_table();
494 return 0;
495}
496
497#endif /* MAKECRCH */
498
499#ifdef W
500/*
501 Generate the little and big-endian braid tables for the given n and z_word_t
502 size w. Each array must have room for w blocks of 256 elements.
503 */
504local void braid(ltl, big, n, w)
505 z_crc_t ltl[][256];
506 z_word_t big[][256];
507 int n;
508 int w;
509{
510 int k;
511 z_crc_t i, p, q;
512 for (k = 0; k < w; k++) {
513 p = x2nmodp((n * w + 3 - k) << 3, 0);
514 ltl[k][0] = 0;
515 big[w - 1 - k][0] = 0;
516 for (i = 1; i < 256; i++) {
517 ltl[k][i] = q = multmodp(i << 24, p);
518 big[w - 1 - k][i] = byte_swap(q);
519 }
520 }
521}
522#endif
523
524#else /* !DYNAMIC_CRC_TABLE */
525/* ========================================================================
526 * Tables for byte-wise and braided CRC-32 calculations, and a table of powers
527 * of x for combining CRC-32s, all made by make_crc_table().
528 */
529#include "crc32.h"
530#endif /* DYNAMIC_CRC_TABLE */
531
532/* ========================================================================
533 * Routines used for CRC calculation. Some are also required for the table
534 * generation above.
535 */
536
537/*
538 Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC polynomial,
539 reflected. For speed, this requires that a not be zero.
540 */
541local z_crc_t multmodp(a, b)
542 z_crc_t a;
543 z_crc_t b;
544{
545 z_crc_t m, p;
546
547 m = (z_crc_t)1 << 31;
548 p = 0;
549 for (;;) {
550 if (a & m) {
551 p ^= b;
552 if ((a & (m - 1)) == 0)
553 break;
554 }
555 m >>= 1;
556 b = b & 1 ? (b >> 1) ^ POLY : b >> 1;
557 }
558 return p;
559}
560
561/*
562 Return x^(n * 2^k) modulo p(x). Requires that x2n_table[] has been
563 initialized.
564 */
565local z_crc_t x2nmodp(n, k)
566 z_off64_t n;
567 unsigned k;
568{
569 z_crc_t p;
570
571 p = (z_crc_t)1 << 31; /* x^0 == 1 */
572 while (n) {
573 if (n & 1)
574 p = multmodp(x2n_table[k & 31], p);
575 n >>= 1;
576 k++;
577 }
578 return p;
579}
580
581/* =========================================================================
582 * This function can be used by asm versions of crc32(), and to force the
583 * generation of the CRC tables in a threaded application.
584 */
585const z_crc_t FAR * ZEXPORT get_crc_table()
586{
587#ifdef DYNAMIC_CRC_TABLE
588 once(&made, make_crc_table);
589#endif /* DYNAMIC_CRC_TABLE */
590 return (const z_crc_t FAR *)crc_table;
591}
592
593/* =========================================================================
594 * Use ARM machine instructions if available. This will compute the CRC about
595 * ten times faster than the braided calculation. This code does not check for
596 * the presence of the CRC instruction at run time. __ARM_FEATURE_CRC32 will
597 * only be defined if the compilation specifies an ARM processor architecture
598 * that has the instructions. For example, compiling with -march=armv8.1-a or
599 * -march=armv8-a+crc, or -march=native if the compile machine has the crc32
600 * instructions.
601 */
602#ifdef ARMCRC32
603
604/*
605 Constants empirically determined to maximize speed. These values are from
606 measurements on a Cortex-A57. Your mileage may vary.
607 */
608#define Z_BATCH 3990 /* number of words in a batch */
609#define Z_BATCH_ZEROS 0xa10d3d0c /* computed from Z_BATCH = 3990 */
610#define Z_BATCH_MIN 800 /* fewest words in a final batch */
611
612unsigned long ZEXPORT crc32_z(crc, buf, len)
613 unsigned long crc;
614 const unsigned char FAR *buf;
615 z_size_t len;
616{
617 z_crc_t val;
618 z_word_t crc1, crc2;
619 const z_word_t *word;
620 z_word_t val0, val1, val2;
621 z_size_t last, last2, i;
622 z_size_t num;
623
624 /* Return initial CRC, if requested. */
625 if (buf == Z_NULL) return 0;
626
627#ifdef DYNAMIC_CRC_TABLE
628 once(&made, make_crc_table);
629#endif /* DYNAMIC_CRC_TABLE */
630
631 /* Pre-condition the CRC */
632 crc ^= 0xffffffff;
633
634 /* Compute the CRC up to a word boundary. */
635 while (len && ((z_size_t)buf & 7) != 0) {
636 len--;
637 val = *buf++;
638 __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
639 }
640
641 /* Prepare to compute the CRC on full 64-bit words word[0..num-1]. */
642 word = (z_word_t const *)buf;
643 num = len >> 3;
644 len &= 7;
645
646 /* Do three interleaved CRCs to realize the throughput of one crc32x
647 instruction per cycle. Each CRC is calcuated on Z_BATCH words. The three
648 CRCs are combined into a single CRC after each set of batches. */
649 while (num >= 3 * Z_BATCH) {
650 crc1 = 0;
651 crc2 = 0;
652 for (i = 0; i < Z_BATCH; i++) {
653 val0 = word[i];
654 val1 = word[i + Z_BATCH];
655 val2 = word[i + 2 * Z_BATCH];
656 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
657 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
658 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
659 }
660 word += 3 * Z_BATCH;
661 num -= 3 * Z_BATCH;
662 crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc1;
663 crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc2;
664 }
665
666 /* Do one last smaller batch with the remaining words, if there are enough
667 to pay for the combination of CRCs. */
668 last = num / 3;
669 if (last >= Z_BATCH_MIN) {
670 last2 = last << 1;
671 crc1 = 0;
672 crc2 = 0;
673 for (i = 0; i < last; i++) {
674 val0 = word[i];
675 val1 = word[i + last];
676 val2 = word[i + last2];
677 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
678 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
679 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
680 }
681 word += 3 * last;
682 num -= 3 * last;
683 val = x2nmodp(last, 6);
684 crc = multmodp(val, crc) ^ crc1;
685 crc = multmodp(val, crc) ^ crc2;
686 }
687
688 /* Compute the CRC on any remaining words. */
689 for (i = 0; i < num; i++) {
690 val0 = word[i];
691 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
692 }
693 word += num;
694
695 /* Complete the CRC on any remaining bytes. */
696 buf = (const unsigned char FAR *)word;
697 while (len) {
698 len--;
699 val = *buf++;
700 __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
701 }
702
703 /* Return the CRC, post-conditioned. */
704 return crc ^ 0xffffffff;
705}
706
707#else
708
709#ifdef W
710
711/*
712 Return the CRC of the W bytes in the word_t data, taking the
713 least-significant byte of the word as the first byte of data, without any pre
714 or post conditioning. This is used to combine the CRCs of each braid.
715 */
716local z_crc_t crc_word(data)
717 z_word_t data;
718{
719 int k;
720 for (k = 0; k < W; k++)
721 data = (data >> 8) ^ crc_table[data & 0xff];
722 return (z_crc_t)data;
723}
724
725local z_word_t crc_word_big(data)
726 z_word_t data;
727{
728 int k;
729 for (k = 0; k < W; k++)
730 data = (data << 8) ^
731 crc_big_table[(data >> ((W - 1) << 3)) & 0xff];
732 return data;
733}
734
735#endif
736
737/* ========================================================================= */
738unsigned long ZEXPORT crc32_z(crc, buf, len)
739 unsigned long crc;
740 const unsigned char FAR *buf;
741 z_size_t len;
742{
743 /* Return initial CRC, if requested. */
744 if (buf == Z_NULL) return 0;
745
746#ifdef DYNAMIC_CRC_TABLE
747 once(&made, make_crc_table);
748#endif /* DYNAMIC_CRC_TABLE */
749
750 /* Pre-condition the CRC */
751 crc ^= 0xffffffff;
752
753#ifdef W
754
755 /* If provided enough bytes, do a braided CRC calculation. */
756 if (len >= N * W + W - 1) {
757 z_size_t blks;
758 z_word_t const *words;
759 unsigned endian;
760 int k;
761
762 /* Compute the CRC up to a z_word_t boundary. */
763 while (len && ((z_size_t)buf & (W - 1)) != 0) {
764 len--;
765 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
766 }
767
768 /* Compute the CRC on as many N z_word_t blocks as are available. */
769 blks = len / (N * W);
770 len -= blks * N * W;
771 words = (z_word_t const *)buf;
772
773 /* Do endian check at execution time instead of compile time, since ARM
774 processors can change the endianess at execution time. If the
775 compiler knows what the endianess will be, it can optimize out the
776 check and the unused branch. */
777 endian = 1;
778 if (*(unsigned char *)&endian) {
779 /* Little endian. */
780
781 z_crc_t crc0;
782 z_word_t word0;
783#if N > 1
784 z_crc_t crc1;
785 z_word_t word1;
786#if N > 2
787 z_crc_t crc2;
788 z_word_t word2;
789#if N > 3
790 z_crc_t crc3;
791 z_word_t word3;
792#if N > 4
793 z_crc_t crc4;
794 z_word_t word4;
795#if N > 5
796 z_crc_t crc5;
797 z_word_t word5;
798#endif
799#endif
800#endif
801#endif
802#endif
803
804 /* Initialize the CRC for each braid. */
805 crc0 = crc;
806#if N > 1
807 crc1 = 0;
808#if N > 2
809 crc2 = 0;
810#if N > 3
811 crc3 = 0;
812#if N > 4
813 crc4 = 0;
814#if N > 5
815 crc5 = 0;
816#endif
817#endif
818#endif
819#endif
820#endif
821
822 /*
823 Process the first blks-1 blocks, computing the CRCs on each braid
824 independently.
825 */
826 while (--blks) {
827 /* Load the word for each braid into registers. */
828 word0 = crc0 ^ words[0];
829#if N > 1
830 word1 = crc1 ^ words[1];
831#if N > 2
832 word2 = crc2 ^ words[2];
833#if N > 3
834 word3 = crc3 ^ words[3];
835#if N > 4
836 word4 = crc4 ^ words[4];
837#if N > 5
838 word5 = crc5 ^ words[5];
839#endif
840#endif
841#endif
842#endif
843#endif
844 words += N;
845
846 /* Compute and update the CRC for each word. The loop should
847 get unrolled. */
848 crc0 = crc_braid_table[0][word0 & 0xff];
849#if N > 1
850 crc1 = crc_braid_table[0][word1 & 0xff];
851#if N > 2
852 crc2 = crc_braid_table[0][word2 & 0xff];
853#if N > 3
854 crc3 = crc_braid_table[0][word3 & 0xff];
855#if N > 4
856 crc4 = crc_braid_table[0][word4 & 0xff];
857#if N > 5
858 crc5 = crc_braid_table[0][word5 & 0xff];
859#endif
860#endif
861#endif
862#endif
863#endif
864 for (k = 1; k < W; k++) {
865 crc0 ^= crc_braid_table[k][(word0 >> (k << 3)) & 0xff];
866#if N > 1
867 crc1 ^= crc_braid_table[k][(word1 >> (k << 3)) & 0xff];
868#if N > 2
869 crc2 ^= crc_braid_table[k][(word2 >> (k << 3)) & 0xff];
870#if N > 3
871 crc3 ^= crc_braid_table[k][(word3 >> (k << 3)) & 0xff];
872#if N > 4
873 crc4 ^= crc_braid_table[k][(word4 >> (k << 3)) & 0xff];
874#if N > 5
875 crc5 ^= crc_braid_table[k][(word5 >> (k << 3)) & 0xff];
876#endif
877#endif
878#endif
879#endif
880#endif
881 }
882 }
883
884 /*
885 Process the last block, combining the CRCs of the N braids at the
886 same time.
887 */
888 crc = crc_word(crc0 ^ words[0]);
889#if N > 1
890 crc = crc_word(crc1 ^ words[1] ^ crc);
891#if N > 2
892 crc = crc_word(crc2 ^ words[2] ^ crc);
893#if N > 3
894 crc = crc_word(crc3 ^ words[3] ^ crc);
895#if N > 4
896 crc = crc_word(crc4 ^ words[4] ^ crc);
897#if N > 5
898 crc = crc_word(crc5 ^ words[5] ^ crc);
899#endif
900#endif
901#endif
902#endif
903#endif
904 words += N;
905 }
906 else {
907 /* Big endian. */
908
909 z_word_t crc0, word0, comb;
910#if N > 1
911 z_word_t crc1, word1;
912#if N > 2
913 z_word_t crc2, word2;
914#if N > 3
915 z_word_t crc3, word3;
916#if N > 4
917 z_word_t crc4, word4;
918#if N > 5
919 z_word_t crc5, word5;
920#endif
921#endif
922#endif
923#endif
924#endif
925
926 /* Initialize the CRC for each braid. */
927 crc0 = byte_swap(crc);
928#if N > 1
929 crc1 = 0;
930#if N > 2
931 crc2 = 0;
932#if N > 3
933 crc3 = 0;
934#if N > 4
935 crc4 = 0;
936#if N > 5
937 crc5 = 0;
938#endif
939#endif
940#endif
941#endif
942#endif
943
944 /*
945 Process the first blks-1 blocks, computing the CRCs on each braid
946 independently.
947 */
948 while (--blks) {
949 /* Load the word for each braid into registers. */
950 word0 = crc0 ^ words[0];
951#if N > 1
952 word1 = crc1 ^ words[1];
953#if N > 2
954 word2 = crc2 ^ words[2];
955#if N > 3
956 word3 = crc3 ^ words[3];
957#if N > 4
958 word4 = crc4 ^ words[4];
959#if N > 5
960 word5 = crc5 ^ words[5];
961#endif
962#endif
963#endif
964#endif
965#endif
966 words += N;
967
968 /* Compute and update the CRC for each word. The loop should
969 get unrolled. */
970 crc0 = crc_braid_big_table[0][word0 & 0xff];
971#if N > 1
972 crc1 = crc_braid_big_table[0][word1 & 0xff];
973#if N > 2
974 crc2 = crc_braid_big_table[0][word2 & 0xff];
975#if N > 3
976 crc3 = crc_braid_big_table[0][word3 & 0xff];
977#if N > 4
978 crc4 = crc_braid_big_table[0][word4 & 0xff];
979#if N > 5
980 crc5 = crc_braid_big_table[0][word5 & 0xff];
981#endif
982#endif
983#endif
984#endif
985#endif
986 for (k = 1; k < W; k++) {
987 crc0 ^= crc_braid_big_table[k][(word0 >> (k << 3)) & 0xff];
988#if N > 1
989 crc1 ^= crc_braid_big_table[k][(word1 >> (k << 3)) & 0xff];
990#if N > 2
991 crc2 ^= crc_braid_big_table[k][(word2 >> (k << 3)) & 0xff];
992#if N > 3
993 crc3 ^= crc_braid_big_table[k][(word3 >> (k << 3)) & 0xff];
994#if N > 4
995 crc4 ^= crc_braid_big_table[k][(word4 >> (k << 3)) & 0xff];
996#if N > 5
997 crc5 ^= crc_braid_big_table[k][(word5 >> (k << 3)) & 0xff];
998#endif
999#endif
1000#endif
1001#endif
1002#endif
1003 }
1004 }
1005
1006 /*
1007 Process the last block, combining the CRCs of the N braids at the
1008 same time.
1009 */
1010 comb = crc_word_big(crc0 ^ words[0]);
1011#if N > 1
1012 comb = crc_word_big(crc1 ^ words[1] ^ comb);
1013#if N > 2
1014 comb = crc_word_big(crc2 ^ words[2] ^ comb);
1015#if N > 3
1016 comb = crc_word_big(crc3 ^ words[3] ^ comb);
1017#if N > 4
1018 comb = crc_word_big(crc4 ^ words[4] ^ comb);
1019#if N > 5
1020 comb = crc_word_big(crc5 ^ words[5] ^ comb);
1021#endif
1022#endif
1023#endif
1024#endif
1025#endif
1026 words += N;
1027 crc = byte_swap(comb);
1028 }
1029
1030 /*
1031 Update the pointer to the remaining bytes to process.
1032 */
1033 buf = (unsigned char const *)words;
1034 }
1035
1036#endif /* W */
1037
1038 /* Complete the computation of the CRC on any remaining bytes. */
1039 while (len >= 8) {
1040 len -= 8;
1041 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1042 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1043 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1044 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1045 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1046 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1047 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1048 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1049 }
1050 while (len) {
1051 len--;
1052 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1053 }
1054
1055 /* Return the CRC, post-conditioned. */
1056 return crc ^ 0xffffffff;
1057}
1058
1059#endif
1060
1061/* ========================================================================= */
1062unsigned long ZEXPORT crc32(crc, buf, len)
1063 unsigned long crc;
1064 const unsigned char FAR *buf;
1065 uInt len;
1066{
1067 return crc32_z(crc, buf, len);
1068}
1069
1070/* ========================================================================= */
1071uLong ZEXPORT crc32_combine64(crc1, crc2, len2)
1072 uLong crc1;
1073 uLong crc2;
1074 z_off64_t len2;
1075{
1076#ifdef DYNAMIC_CRC_TABLE
1077 once(&made, make_crc_table);
1078#endif /* DYNAMIC_CRC_TABLE */
1079 return multmodp(x2nmodp(len2, 3), crc1) ^ crc2;
1080}
1081
1082/* ========================================================================= */
1083uLong ZEXPORT crc32_combine(crc1, crc2, len2)
1084 uLong crc1;
1085 uLong crc2;
1086 z_off_t len2;
1087{
1088 return crc32_combine64(crc1, crc2, len2);
1089}
1090
1091/* ========================================================================= */
1092uLong ZEXPORT crc32_combine_gen64(len2)
1093 z_off64_t len2;
1094{
1095#ifdef DYNAMIC_CRC_TABLE
1096 once(&made, make_crc_table);
1097#endif /* DYNAMIC_CRC_TABLE */
1098 return x2nmodp(len2, 3);
1099}
1100
1101/* ========================================================================= */
1102uLong ZEXPORT crc32_combine_gen(len2)
1103 z_off_t len2;
1104{
1105 return crc32_combine_gen64(len2);
1106}
1107
1108/* ========================================================================= */
1109uLong crc32_combine_op(crc1, crc2, op)
1110 uLong crc1;
1111 uLong crc2;
1112 uLong op;
1113{
1114 return multmodp(op, crc1) ^ crc2;
1115}
#define N
Definition: crc32.c:56
const z_crc_t FAR *ZEXPORT get_crc_table()
Definition: crc32.c:585
z_crc_t x2nmodp(z_off64_t n, unsigned k)
Definition: crc32.c:565
unsigned long ZEXPORT crc32_z(unsigned long crc, const unsigned char FAR *buf, z_size_t len)
Definition: crc32.c:738
#define W
Definition: crc32.c:84
#define POLY
Definition: crc32.c:140
uLong ZEXPORT crc32_combine(uLong crc1, uLong crc2, z_off_t len2)
Definition: crc32.c:1083
uLong ZEXPORT crc32_combine64(uLong crc1, uLong crc2, z_off64_t len2)
Definition: crc32.c:1071
uLong ZEXPORT crc32_combine_gen(z_off_t len2)
Definition: crc32.c:1102
z_crc_t multmodp(z_crc_t a, z_crc_t b)
Definition: crc32.c:541
uLong ZEXPORT crc32_combine_gen64(z_off64_t len2)
Definition: crc32.c:1092
uLong crc32_combine_op(uLong crc1, uLong crc2, uLong op)
Definition: crc32.c:1109
unsigned long ZEXPORT crc32(unsigned long crc, const unsigned char FAR *buf, uInt len)
Definition: crc32.c:1062
const z_crc_t FAR crc_table[]
Definition: crc32.h:5
const z_crc_t FAR x2n_table[]
Definition: crc32.h:9439
#define local
Definition: gzguts.h:114
voidpf alloc_func OF((voidpf opaque, uInt items, uInt size))
Definition: zlib.h:81
#define Z_NULL
Definition: zlib.h:212