HardenedBSD src tree
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

448 lines
13KB

  1. /*-
  2. * Copyright (c) 2008 Joerg Sonnenberger
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions
  7. * are met:
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. * 2. Redistributions in binary form must reproduce the above copyright
  11. * notice, this list of conditions and the following disclaimer in the
  12. * documentation and/or other materials provided with the distribution.
  13. *
  14. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
  15. * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  16. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  17. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
  18. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  19. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  20. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  21. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  22. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  23. * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  24. */
  25. /*-
  26. * Copyright (c) 1985, 1986, 1992, 1993
  27. * The Regents of the University of California. All rights reserved.
  28. *
  29. * This code is derived from software contributed to Berkeley by
  30. * Diomidis Spinellis and James A. Woods, derived from original
  31. * work by Spencer Thomas and Joseph Orost.
  32. *
  33. * Redistribution and use in source and binary forms, with or without
  34. * modification, are permitted provided that the following conditions
  35. * are met:
  36. * 1. Redistributions of source code must retain the above copyright
  37. * notice, this list of conditions and the following disclaimer.
  38. * 2. Redistributions in binary form must reproduce the above copyright
  39. * notice, this list of conditions and the following disclaimer in the
  40. * documentation and/or other materials provided with the distribution.
  41. * 3. Neither the name of the University nor the names of its contributors
  42. * may be used to endorse or promote products derived from this software
  43. * without specific prior written permission.
  44. *
  45. * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  46. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  47. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  48. * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  49. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  50. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  51. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  52. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  53. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  54. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  55. * SUCH DAMAGE.
  56. */
  57. #include "archive_platform.h"
  58. __FBSDID("$FreeBSD: head/lib/libarchive/archive_write_set_compression_compress.c 201111 2009-12-28 03:33:05Z kientzle $");
  59. #ifdef HAVE_ERRNO_H
  60. #include <errno.h>
  61. #endif
  62. #ifdef HAVE_STDLIB_H
  63. #include <stdlib.h>
  64. #endif
  65. #ifdef HAVE_STRING_H
  66. #include <string.h>
  67. #endif
  68. #include "archive.h"
  69. #include "archive_private.h"
  70. #include "archive_write_private.h"
  71. #define HSIZE 69001 /* 95% occupancy */
  72. #define HSHIFT 8 /* 8 - trunc(log2(HSIZE / 65536)) */
  73. #define CHECK_GAP 10000 /* Ratio check interval. */
  74. #define MAXCODE(bits) ((1 << (bits)) - 1)
  75. /*
  76. * the next two codes should not be changed lightly, as they must not
  77. * lie within the contiguous general code space.
  78. */
  79. #define FIRST 257 /* First free entry. */
  80. #define CLEAR 256 /* Table clear output code. */
  81. struct private_data {
  82. int64_t in_count, out_count, checkpoint;
  83. int code_len; /* Number of bits/code. */
  84. int cur_maxcode; /* Maximum code, given n_bits. */
  85. int max_maxcode; /* Should NEVER generate this code. */
  86. int hashtab [HSIZE];
  87. unsigned short codetab [HSIZE];
  88. int first_free; /* First unused entry. */
  89. int compress_ratio;
  90. int cur_code, cur_fcode;
  91. int bit_offset;
  92. unsigned char bit_buf;
  93. unsigned char *compressed;
  94. size_t compressed_buffer_size;
  95. size_t compressed_offset;
  96. };
  97. static int archive_compressor_compress_open(struct archive_write_filter *);
  98. static int archive_compressor_compress_write(struct archive_write_filter *,
  99. const void *, size_t);
  100. static int archive_compressor_compress_close(struct archive_write_filter *);
  101. static int archive_compressor_compress_free(struct archive_write_filter *);
  102. #if ARCHIVE_VERSION_NUMBER < 4000000
  103. int
  104. archive_write_set_compression_compress(struct archive *a)
  105. {
  106. __archive_write_filters_free(a);
  107. return (archive_write_add_filter_compress(a));
  108. }
  109. #endif
  110. /*
  111. * Add a compress filter to this write handle.
  112. */
  113. int
  114. archive_write_add_filter_compress(struct archive *_a)
  115. {
  116. struct archive_write *a = (struct archive_write *)_a;
  117. struct archive_write_filter *f = __archive_write_allocate_filter(_a);
  118. archive_check_magic(&a->archive, ARCHIVE_WRITE_MAGIC,
  119. ARCHIVE_STATE_NEW, "archive_write_add_filter_compress");
  120. f->open = &archive_compressor_compress_open;
  121. f->code = ARCHIVE_FILTER_COMPRESS;
  122. f->name = "compress";
  123. return (ARCHIVE_OK);
  124. }
  125. /*
  126. * Setup callback.
  127. */
  128. static int
  129. archive_compressor_compress_open(struct archive_write_filter *f)
  130. {
  131. struct private_data *state;
  132. size_t bs = 65536, bpb;
  133. f->code = ARCHIVE_FILTER_COMPRESS;
  134. f->name = "compress";
  135. state = (struct private_data *)calloc(1, sizeof(*state));
  136. if (state == NULL) {
  137. archive_set_error(f->archive, ENOMEM,
  138. "Can't allocate data for compression");
  139. return (ARCHIVE_FATAL);
  140. }
  141. if (f->archive->magic == ARCHIVE_WRITE_MAGIC) {
  142. /* Buffer size should be a multiple number of the of bytes
  143. * per block for performance. */
  144. bpb = archive_write_get_bytes_per_block(f->archive);
  145. if (bpb > bs)
  146. bs = bpb;
  147. else if (bpb != 0)
  148. bs -= bs % bpb;
  149. }
  150. state->compressed_buffer_size = bs;
  151. state->compressed = malloc(state->compressed_buffer_size);
  152. if (state->compressed == NULL) {
  153. archive_set_error(f->archive, ENOMEM,
  154. "Can't allocate data for compression buffer");
  155. free(state);
  156. return (ARCHIVE_FATAL);
  157. }
  158. f->write = archive_compressor_compress_write;
  159. f->close = archive_compressor_compress_close;
  160. f->free = archive_compressor_compress_free;
  161. state->max_maxcode = 0x10000; /* Should NEVER generate this code. */
  162. state->in_count = 0; /* Length of input. */
  163. state->bit_buf = 0;
  164. state->bit_offset = 0;
  165. state->out_count = 3; /* Includes 3-byte header mojo. */
  166. state->compress_ratio = 0;
  167. state->checkpoint = CHECK_GAP;
  168. state->code_len = 9;
  169. state->cur_maxcode = MAXCODE(state->code_len);
  170. state->first_free = FIRST;
  171. memset(state->hashtab, 0xff, sizeof(state->hashtab));
  172. /* Prime output buffer with a gzip header. */
  173. state->compressed[0] = 0x1f; /* Compress */
  174. state->compressed[1] = 0x9d;
  175. state->compressed[2] = 0x90; /* Block mode, 16bit max */
  176. state->compressed_offset = 3;
  177. f->data = state;
  178. return (0);
  179. }
  180. /*-
  181. * Output the given code.
  182. * Inputs:
  183. * code: A n_bits-bit integer. If == -1, then EOF. This assumes
  184. * that n_bits <= (long)wordsize - 1.
  185. * Outputs:
  186. * Outputs code to the file.
  187. * Assumptions:
  188. * Chars are 8 bits long.
  189. * Algorithm:
  190. * Maintain a BITS character long buffer (so that 8 codes will
  191. * fit in it exactly). Use the VAX insv instruction to insert each
  192. * code in turn. When the buffer fills up empty it and start over.
  193. */
  194. static const unsigned char rmask[9] =
  195. {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};
  196. static int
  197. output_byte(struct archive_write_filter *f, unsigned char c)
  198. {
  199. struct private_data *state = f->data;
  200. state->compressed[state->compressed_offset++] = c;
  201. ++state->out_count;
  202. if (state->compressed_buffer_size == state->compressed_offset) {
  203. int ret = __archive_write_filter(f->next_filter,
  204. state->compressed, state->compressed_buffer_size);
  205. if (ret != ARCHIVE_OK)
  206. return ARCHIVE_FATAL;
  207. state->compressed_offset = 0;
  208. }
  209. return ARCHIVE_OK;
  210. }
  211. static int
  212. output_code(struct archive_write_filter *f, int ocode)
  213. {
  214. struct private_data *state = f->data;
  215. int bits, ret, clear_flg, bit_offset;
  216. clear_flg = ocode == CLEAR;
  217. /*
  218. * Since ocode is always >= 8 bits, only need to mask the first
  219. * hunk on the left.
  220. */
  221. bit_offset = state->bit_offset % 8;
  222. state->bit_buf |= (ocode << bit_offset) & 0xff;
  223. output_byte(f, state->bit_buf);
  224. bits = state->code_len - (8 - bit_offset);
  225. ocode >>= 8 - bit_offset;
  226. /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
  227. if (bits >= 8) {
  228. output_byte(f, ocode & 0xff);
  229. ocode >>= 8;
  230. bits -= 8;
  231. }
  232. /* Last bits. */
  233. state->bit_offset += state->code_len;
  234. state->bit_buf = ocode & rmask[bits];
  235. if (state->bit_offset == state->code_len * 8)
  236. state->bit_offset = 0;
  237. /*
  238. * If the next entry is going to be too big for the ocode size,
  239. * then increase it, if possible.
  240. */
  241. if (clear_flg || state->first_free > state->cur_maxcode) {
  242. /*
  243. * Write the whole buffer, because the input side won't
  244. * discover the size increase until after it has read it.
  245. */
  246. if (state->bit_offset > 0) {
  247. while (state->bit_offset < state->code_len * 8) {
  248. ret = output_byte(f, state->bit_buf);
  249. if (ret != ARCHIVE_OK)
  250. return ret;
  251. state->bit_offset += 8;
  252. state->bit_buf = 0;
  253. }
  254. }
  255. state->bit_buf = 0;
  256. state->bit_offset = 0;
  257. if (clear_flg) {
  258. state->code_len = 9;
  259. state->cur_maxcode = MAXCODE(state->code_len);
  260. } else {
  261. state->code_len++;
  262. if (state->code_len == 16)
  263. state->cur_maxcode = state->max_maxcode;
  264. else
  265. state->cur_maxcode = MAXCODE(state->code_len);
  266. }
  267. }
  268. return (ARCHIVE_OK);
  269. }
  270. static int
  271. output_flush(struct archive_write_filter *f)
  272. {
  273. struct private_data *state = f->data;
  274. int ret;
  275. /* At EOF, write the rest of the buffer. */
  276. if (state->bit_offset % 8) {
  277. state->code_len = (state->bit_offset % 8 + 7) / 8;
  278. ret = output_byte(f, state->bit_buf);
  279. if (ret != ARCHIVE_OK)
  280. return ret;
  281. }
  282. return (ARCHIVE_OK);
  283. }
  284. /*
  285. * Write data to the compressed stream.
  286. */
  287. static int
  288. archive_compressor_compress_write(struct archive_write_filter *f,
  289. const void *buff, size_t length)
  290. {
  291. struct private_data *state = (struct private_data *)f->data;
  292. int i;
  293. int ratio;
  294. int c, disp, ret;
  295. const unsigned char *bp;
  296. if (length == 0)
  297. return ARCHIVE_OK;
  298. bp = buff;
  299. if (state->in_count == 0) {
  300. state->cur_code = *bp++;
  301. ++state->in_count;
  302. --length;
  303. }
  304. while (length--) {
  305. c = *bp++;
  306. state->in_count++;
  307. state->cur_fcode = (c << 16) + state->cur_code;
  308. i = ((c << HSHIFT) ^ state->cur_code); /* Xor hashing. */
  309. if (state->hashtab[i] == state->cur_fcode) {
  310. state->cur_code = state->codetab[i];
  311. continue;
  312. }
  313. if (state->hashtab[i] < 0) /* Empty slot. */
  314. goto nomatch;
  315. /* Secondary hash (after G. Knott). */
  316. if (i == 0)
  317. disp = 1;
  318. else
  319. disp = HSIZE - i;
  320. probe:
  321. if ((i -= disp) < 0)
  322. i += HSIZE;
  323. if (state->hashtab[i] == state->cur_fcode) {
  324. state->cur_code = state->codetab[i];
  325. continue;
  326. }
  327. if (state->hashtab[i] >= 0)
  328. goto probe;
  329. nomatch:
  330. ret = output_code(f, state->cur_code);
  331. if (ret != ARCHIVE_OK)
  332. return ret;
  333. state->cur_code = c;
  334. if (state->first_free < state->max_maxcode) {
  335. state->codetab[i] = state->first_free++; /* code -> hashtable */
  336. state->hashtab[i] = state->cur_fcode;
  337. continue;
  338. }
  339. if (state->in_count < state->checkpoint)
  340. continue;
  341. state->checkpoint = state->in_count + CHECK_GAP;
  342. if (state->in_count <= 0x007fffff && state->out_count != 0)
  343. ratio = (int)(state->in_count * 256 / state->out_count);
  344. else if ((ratio = (int)(state->out_count / 256)) == 0)
  345. ratio = 0x7fffffff;
  346. else
  347. ratio = (int)(state->in_count / ratio);
  348. if (ratio > state->compress_ratio)
  349. state->compress_ratio = ratio;
  350. else {
  351. state->compress_ratio = 0;
  352. memset(state->hashtab, 0xff, sizeof(state->hashtab));
  353. state->first_free = FIRST;
  354. ret = output_code(f, CLEAR);
  355. if (ret != ARCHIVE_OK)
  356. return ret;
  357. }
  358. }
  359. return (ARCHIVE_OK);
  360. }
  361. /*
  362. * Finish the compression...
  363. */
  364. static int
  365. archive_compressor_compress_close(struct archive_write_filter *f)
  366. {
  367. struct private_data *state = (struct private_data *)f->data;
  368. int ret;
  369. ret = output_code(f, state->cur_code);
  370. if (ret != ARCHIVE_OK)
  371. return ret;
  372. ret = output_flush(f);
  373. if (ret != ARCHIVE_OK)
  374. return ret;
  375. /* Write the last block */
  376. ret = __archive_write_filter(f->next_filter,
  377. state->compressed, state->compressed_offset);
  378. return (ret);
  379. }
  380. static int
  381. archive_compressor_compress_free(struct archive_write_filter *f)
  382. {
  383. struct private_data *state = (struct private_data *)f->data;
  384. free(state->compressed);
  385. free(state);
  386. return (ARCHIVE_OK);
  387. }