1 files changed, 0 insertions, 168 deletions
diff --git a/cbits/cbits.c b/cbits/cbits.c
deleted file mode 100644
index c11645b..0000000
--- a/cbits/cbits.c
+++ /dev/null
@@ -1,168 +0,0 @@
-/*
- * Copyright (c) 2011 Bryan O'Sullivan <bos@serpentine.com>.
- *
- * Portions copyright (c) 2008-2010 Björn Höhrmann <bjoern@hoehrmann.de>.
- *
- * See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
- */
-#include <string.h>
-#include <stdint.h>
-#include <stdio.h>
-#include "pipes_text_cbits.h"
-#define UTF8_ACCEPT 0
-#define UTF8_REJECT 12
-static const uint8_t utf8d[] = {
-  /*
-   * The first part of the table maps bytes to character classes that
-   * to reduce the size of the transition table and create bitmasks.
-   */
-   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,  9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
-   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
-   8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
-  10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
-  /*
-   * The second part is a transition table that maps a combination of
-   * a state of the automaton and a character class to a state.
-   */
-   0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
-  12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
-  12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
-  12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
-  12,36,12,12,12,12,12,12,12,12,12,12,
-};
-static inline uint32_t
-decode(uint32_t *state, uint32_t* codep, uint32_t byte) {
-  uint32_t type = utf8d[byte];
-  *codep = (*state != UTF8_ACCEPT) ?
-    (byte & 0x3fu) | (*codep << 6) :
-    (0xff >> type) & (byte);
-  return *state = utf8d[256 + *state + type];
-}
-/*
- * A best-effort decoder. Runs until it hits either end of input or
- * the start of an invalid byte sequence.
- *
- * At exit, we update *destoff with the next offset to write to, *src
- * with the next source location past the last one successfully
- * decoded, and return the next source location to read from.
- *
- * Moreover, we expose the internal decoder state (state0 and
- * codepoint0), allowing one to restart the decoder after it
- * terminates (say, due to a partial codepoint).
- *
- * In particular, there are a few possible outcomes,
- *
- *   1) We decoded the buffer entirely:
- *      In this case we return srcend
- *      state0 == UTF8_ACCEPT
- *
- *   2) We met an invalid encoding
- *      In this case we return the address of the first invalid byte
- *      state0 == UTF8_REJECT
- *
- *   3) We reached the end of the buffer while decoding a codepoint
- *      In this case we return a pointer to the first byte of the partial codepoint
- *      state0 != UTF8_ACCEPT, UTF8_REJECT
- *
- */
- #if defined(__GNUC__) || defined(__clang__)
- static inline uint8_t const *
- _hs_pipes_text_decode_utf8_int(uint16_t *const dest, size_t *destoff,
-                         const uint8_t const **src, const uint8_t const *srcend,
-                         uint32_t *codepoint0, uint32_t *state0)
-   __attribute((always_inline));
- #endif
-static inline uint8_t const *
-_hs_pipes_text_decode_utf8_int(uint16_t *const dest, size_t *destoff,
-                         const uint8_t const **src, const uint8_t const *srcend,
-                         uint32_t *codepoint0, uint32_t *state0)
-{
- uint16_t *d = dest + *destoff;
- const uint8_t *s = *src, *last = *src;
- uint32_t state = *state0;
- uint32_t codepoint = *codepoint0;
- while (s < srcend) {
-#if defined(__i386__) || defined(__x86_64__)
-   /*
-    * This code will only work on a little-endian system that
-    * supports unaligned loads.
-    *
-    * It gives a substantial speed win on data that is purely or
-    * partly ASCII (e.g. HTML), at only a slight cost on purely
-    * non-ASCII text.
-    */
-   if (state == UTF8_ACCEPT) {
-     while (s < srcend - 4) {
-        codepoint = *((uint32_t *) s);
-        if ((codepoint & 0x80808080) != 0)
-          break;
-        s += 4;
-        /*
-         * Tried 32-bit stores here, but the extra bit-twiddling
-         * slowed the code down.
-         */
-        *d++ = (uint16_t) (codepoint & 0xff);
-        *d++ = (uint16_t) ((codepoint >> 8) & 0xff);
-        *d++ = (uint16_t) ((codepoint >> 16) & 0xff);
-        *d++ = (uint16_t) ((codepoint >> 24) & 0xff);
-     }
-     last = s;
-   }
-#endif
-   if (decode(&state, &codepoint, *s++) != UTF8_ACCEPT) {
-     if (state != UTF8_REJECT)
-        continue;
-     break;
-   }
-   if (codepoint <= 0xffff)
-     *d++ = (uint16_t) codepoint;
-   else {
-     *d++ = (uint16_t) (0xD7C0 + (codepoint >> 10));
-     *d++ = (uint16_t) (0xDC00 + (codepoint & 0x3FF));
-   }
-   last = s;
- }
- *destoff = d - dest;
- *codepoint0 = codepoint;
- *state0 = state;
- *src = last;
- return s;
-}
-uint8_t const *
-_hs_pipes_text_decode_utf8_state(uint16_t *const dest, size_t *destoff,
-                          const uint8_t const **src,
-                           const uint8_t const *srcend,
-                          uint32_t *codepoint0, uint32_t *state0)
-{
- uint8_t const *ret = _hs_pipes_text_decode_utf8_int(dest, destoff, src, srcend,
-                                                codepoint0, state0);
- if (*state0 == UTF8_REJECT)
-   ret -=1;
- return ret;
-}

diff --git a/cbits/cbits.c b/cbits/cbits.c deleted file mode 100644 index c11645b..0000000 --- a/cbits/cbits.c +++ /dev/null
@@ -1,168 +0,0 @@
1	/*
2	* Copyright (c) 2011 Bryan O'Sullivan <bos@serpentine.com>.
3	*
4	* Portions copyright (c) 2008-2010 Björn Höhrmann <bjoern@hoehrmann.de>.
5	*
6	* See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
7	*/
8
9	#include <string.h>
10	#include <stdint.h>
11	#include <stdio.h>
12	#include "pipes_text_cbits.h"
13
14
15
16	#define UTF8_ACCEPT 0
17	#define UTF8_REJECT 12
18
19	static const uint8_t utf8d[] = {
20	/*
21	* The first part of the table maps bytes to character classes that
22	* to reduce the size of the transition table and create bitmasks.
23	*/
24	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
25	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
26	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
27	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
28	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
29	7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
30	8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
31	10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
32
33	/*
34	* The second part is a transition table that maps a combination of
35	* a state of the automaton and a character class to a state.
36	*/
37	0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
38	12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
39	12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
40	12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
41	12,36,12,12,12,12,12,12,12,12,12,12,
42	};
43
44	static inline uint32_t
45	decode(uint32_t state, uint32_t codep, uint32_t byte) {
46	uint32_t type = utf8d[byte];
47
48	codep = (state != UTF8_ACCEPT) ?
49	(byte & 0x3fu) \| (*codep << 6) :
50	(0xff >> type) & (byte);
51
52	return state = utf8d[256 + state + type];
53	}
54
55	/*
56	* A best-effort decoder. Runs until it hits either end of input or
57	* the start of an invalid byte sequence.
58	*
59	* At exit, we update destoff with the next offset to write to, src
60	* with the next source location past the last one successfully
61	* decoded, and return the next source location to read from.
62	*
63	* Moreover, we expose the internal decoder state (state0 and
64	* codepoint0), allowing one to restart the decoder after it
65	* terminates (say, due to a partial codepoint).
66	*
67	* In particular, there are a few possible outcomes,
68	*
69	* 1) We decoded the buffer entirely:
70	* In this case we return srcend
71	* state0 == UTF8_ACCEPT
72	*
73	* 2) We met an invalid encoding
74	* In this case we return the address of the first invalid byte
75	* state0 == UTF8_REJECT
76	*
77	* 3) We reached the end of the buffer while decoding a codepoint
78	* In this case we return a pointer to the first byte of the partial codepoint
79	* state0 != UTF8_ACCEPT, UTF8_REJECT
80	*
81	*/
82
83	#if defined(__GNUC__) \|\| defined(__clang__)
84	static inline uint8_t const *
85	_hs_pipes_text_decode_utf8_int(uint16_t const dest, size_t destoff,
86	const uint8_t const *src, const uint8_t const srcend,
87	uint32_t codepoint0, uint32_t state0)
88	__attribute((always_inline));
89	#endif
90
91	static inline uint8_t const *
92	_hs_pipes_text_decode_utf8_int(uint16_t const dest, size_t destoff,
93	const uint8_t const *src, const uint8_t const srcend,
94	uint32_t codepoint0, uint32_t state0)
95	{
96	uint16_t d = dest + destoff;
97	const uint8_t s = src, last = src;
98	uint32_t state = *state0;
99	uint32_t codepoint = *codepoint0;
100
101	while (s < srcend) {
102	#if defined(__i386__) \|\| defined(__x86_64__)
103	/*
104	* This code will only work on a little-endian system that
105	* supports unaligned loads.
106	*
107	* It gives a substantial speed win on data that is purely or
108	* partly ASCII (e.g. HTML), at only a slight cost on purely
109	* non-ASCII text.
110	*/
111
112	if (state == UTF8_ACCEPT) {
113	while (s < srcend - 4) {
114	codepoint = ((uint32_t ) s);
115	if ((codepoint & 0x80808080) != 0)
116	break;
117	s += 4;
118
119	/*
120	* Tried 32-bit stores here, but the extra bit-twiddling
121	* slowed the code down.
122	*/
123
124	*d++ = (uint16_t) (codepoint & 0xff);
125	*d++ = (uint16_t) ((codepoint >> 8) & 0xff);
126	*d++ = (uint16_t) ((codepoint >> 16) & 0xff);
127	*d++ = (uint16_t) ((codepoint >> 24) & 0xff);
128	}
129	last = s;
130	}
131	#endif
132
133	if (decode(&state, &codepoint, *s++) != UTF8_ACCEPT) {
134	if (state != UTF8_REJECT)
135	continue;
136	break;
137	}
138
139	if (codepoint <= 0xffff)
140	*d++ = (uint16_t) codepoint;
141	else {
142	*d++ = (uint16_t) (0xD7C0 + (codepoint >> 10));
143	*d++ = (uint16_t) (0xDC00 + (codepoint & 0x3FF));
144	}
145	last = s;
146	}
147
148	*destoff = d - dest;
149	*codepoint0 = codepoint;
150	*state0 = state;
151	*src = last;
152
153	return s;
154	}
155
156	uint8_t const *
157	_hs_pipes_text_decode_utf8_state(uint16_t const dest, size_t destoff,
158	const uint8_t const **src,
159	const uint8_t const *srcend,
160	uint32_t codepoint0, uint32_t state0)
161	{
162	uint8_t const *ret = _hs_pipes_text_decode_utf8_int(dest, destoff, src, srcend,
163	codepoint0, state0);
164	if (*state0 == UTF8_REJECT)
165	ret -=1;
166	return ret;
167	}
168