1 | /* Copyright (C) 2011 by Valentin Ochs
|
---|
2 | *
|
---|
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy
|
---|
4 | * of this software and associated documentation files (the "Software"), to
|
---|
5 | * deal in the Software without restriction, including without limitation the
|
---|
6 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
---|
7 | * sell copies of the Software, and to permit persons to whom the Software is
|
---|
8 | * furnished to do so, subject to the following conditions:
|
---|
9 | *
|
---|
10 | * The above copyright notice and this permission notice shall be included in
|
---|
11 | * all copies or substantial portions of the Software.
|
---|
12 | *
|
---|
13 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
---|
14 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
---|
15 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
---|
16 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
---|
17 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
---|
18 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
---|
19 | * IN THE SOFTWARE.
|
---|
20 | */
|
---|
21 |
|
---|
22 | /* Minor changes by Rich Felker for integration in musl, 2011-04-27. */
|
---|
23 |
|
---|
24 | /* Smoothsort, an adaptive variant of Heapsort. Memory usage: O(1).
|
---|
25 | Run time: Worst case O(n log n), close to O(n) in the mostly-sorted case. */
|
---|
26 |
|
---|
27 | #include <stdint.h>
|
---|
28 | #include <stdlib.h>
|
---|
29 | #include <string.h>
|
---|
30 |
|
---|
31 | #include "atomic.h"
|
---|
32 | #define ntz(x) a_ctz_l((x))
|
---|
33 |
|
---|
34 | typedef int (*cmpfun)(const void *, const void *);
|
---|
35 |
|
---|
36 | static inline int pntz(size_t p[2]) {
|
---|
37 | int r = ntz(p[0] - 1);
|
---|
38 | if(r != 0 || (r = 8*sizeof(size_t) + ntz(p[1])) != 8*sizeof(size_t)) {
|
---|
39 | return r;
|
---|
40 | }
|
---|
41 | return 0;
|
---|
42 | }
|
---|
43 |
|
---|
44 | static void cycle(size_t width, unsigned char* ar[], int n)
|
---|
45 | {
|
---|
46 | unsigned char tmp[256];
|
---|
47 | size_t l;
|
---|
48 | int i;
|
---|
49 |
|
---|
50 | if(n < 2) {
|
---|
51 | return;
|
---|
52 | }
|
---|
53 |
|
---|
54 | ar[n] = tmp;
|
---|
55 | while(width) {
|
---|
56 | l = sizeof(tmp) < width ? sizeof(tmp) : width;
|
---|
57 | memcpy(ar[n], ar[0], l);
|
---|
58 | for(i = 0; i < n; i++) {
|
---|
59 | memcpy(ar[i], ar[i + 1], l);
|
---|
60 | ar[i] += l;
|
---|
61 | }
|
---|
62 | width -= l;
|
---|
63 | }
|
---|
64 | }
|
---|
65 |
|
---|
66 | /* shl() and shr() need n > 0 */
|
---|
67 | static inline void shl(size_t p[2], int n)
|
---|
68 | {
|
---|
69 | if(n >= 8 * sizeof(size_t)) {
|
---|
70 | n -= 8 * sizeof(size_t);
|
---|
71 | p[1] = p[0];
|
---|
72 | p[0] = 0;
|
---|
73 | }
|
---|
74 | p[1] <<= n;
|
---|
75 | p[1] |= p[0] >> (sizeof(size_t) * 8 - n);
|
---|
76 | p[0] <<= n;
|
---|
77 | }
|
---|
78 |
|
---|
79 | static inline void shr(size_t p[2], int n)
|
---|
80 | {
|
---|
81 | if(n >= 8 * sizeof(size_t)) {
|
---|
82 | n -= 8 * sizeof(size_t);
|
---|
83 | p[0] = p[1];
|
---|
84 | p[1] = 0;
|
---|
85 | }
|
---|
86 | p[0] >>= n;
|
---|
87 | p[0] |= p[1] << (sizeof(size_t) * 8 - n);
|
---|
88 | p[1] >>= n;
|
---|
89 | }
|
---|
90 |
|
---|
91 | static void sift(unsigned char *head, size_t width, cmpfun cmp, int pshift, size_t lp[])
|
---|
92 | {
|
---|
93 | unsigned char *rt, *lf;
|
---|
94 | unsigned char *ar[14 * sizeof(size_t) + 1];
|
---|
95 | int i = 1;
|
---|
96 |
|
---|
97 | ar[0] = head;
|
---|
98 | while(pshift > 1) {
|
---|
99 | rt = head - width;
|
---|
100 | lf = head - width - lp[pshift - 2];
|
---|
101 |
|
---|
102 | if((*cmp)(ar[0], lf) >= 0 && (*cmp)(ar[0], rt) >= 0) {
|
---|
103 | break;
|
---|
104 | }
|
---|
105 | if((*cmp)(lf, rt) >= 0) {
|
---|
106 | ar[i++] = lf;
|
---|
107 | head = lf;
|
---|
108 | pshift -= 1;
|
---|
109 | } else {
|
---|
110 | ar[i++] = rt;
|
---|
111 | head = rt;
|
---|
112 | pshift -= 2;
|
---|
113 | }
|
---|
114 | }
|
---|
115 | cycle(width, ar, i);
|
---|
116 | }
|
---|
117 |
|
---|
118 | static void trinkle(unsigned char *head, size_t width, cmpfun cmp, size_t pp[2], int pshift, int trusty, size_t lp[])
|
---|
119 | {
|
---|
120 | unsigned char *stepson,
|
---|
121 | *rt, *lf;
|
---|
122 | size_t p[2];
|
---|
123 | unsigned char *ar[14 * sizeof(size_t) + 1];
|
---|
124 | int i = 1;
|
---|
125 | int trail;
|
---|
126 |
|
---|
127 | p[0] = pp[0];
|
---|
128 | p[1] = pp[1];
|
---|
129 |
|
---|
130 | ar[0] = head;
|
---|
131 | while(p[0] != 1 || p[1] != 0) {
|
---|
132 | stepson = head - lp[pshift];
|
---|
133 | if((*cmp)(stepson, ar[0]) <= 0) {
|
---|
134 | break;
|
---|
135 | }
|
---|
136 | if(!trusty && pshift > 1) {
|
---|
137 | rt = head - width;
|
---|
138 | lf = head - width - lp[pshift - 2];
|
---|
139 | if((*cmp)(rt, stepson) >= 0 || (*cmp)(lf, stepson) >= 0) {
|
---|
140 | break;
|
---|
141 | }
|
---|
142 | }
|
---|
143 |
|
---|
144 | ar[i++] = stepson;
|
---|
145 | head = stepson;
|
---|
146 | trail = pntz(p);
|
---|
147 | shr(p, trail);
|
---|
148 | pshift += trail;
|
---|
149 | trusty = 0;
|
---|
150 | }
|
---|
151 | if(!trusty) {
|
---|
152 | cycle(width, ar, i);
|
---|
153 | sift(head, width, cmp, pshift, lp);
|
---|
154 | }
|
---|
155 | }
|
---|
156 |
|
---|
157 | void qsort(void *base, size_t nel, size_t width, cmpfun cmp)
|
---|
158 | {
|
---|
159 | size_t lp[12*sizeof(size_t)];
|
---|
160 | size_t i, size = width * nel;
|
---|
161 | unsigned char *head, *high;
|
---|
162 | size_t p[2] = {1, 0};
|
---|
163 | int pshift = 1;
|
---|
164 | int trail;
|
---|
165 |
|
---|
166 | if (!size) return;
|
---|
167 |
|
---|
168 | head = base;
|
---|
169 | high = head + size - width;
|
---|
170 |
|
---|
171 | /* Precompute Leonardo numbers, scaled by element width */
|
---|
172 | for(lp[0]=lp[1]=width, i=2; (lp[i]=lp[i-2]+lp[i-1]+width) < size; i++);
|
---|
173 |
|
---|
174 | while(head < high) {
|
---|
175 | if((p[0] & 3) == 3) {
|
---|
176 | sift(head, width, cmp, pshift, lp);
|
---|
177 | shr(p, 2);
|
---|
178 | pshift += 2;
|
---|
179 | } else {
|
---|
180 | if(lp[pshift - 1] >= high - head) {
|
---|
181 | trinkle(head, width, cmp, p, pshift, 0, lp);
|
---|
182 | } else {
|
---|
183 | sift(head, width, cmp, pshift, lp);
|
---|
184 | }
|
---|
185 |
|
---|
186 | if(pshift == 1) {
|
---|
187 | shl(p, 1);
|
---|
188 | pshift = 0;
|
---|
189 | } else {
|
---|
190 | shl(p, pshift - 1);
|
---|
191 | pshift = 1;
|
---|
192 | }
|
---|
193 | }
|
---|
194 |
|
---|
195 | p[0] |= 1;
|
---|
196 | head += width;
|
---|
197 | }
|
---|
198 |
|
---|
199 | trinkle(head, width, cmp, p, pshift, 0, lp);
|
---|
200 |
|
---|
201 | while(pshift != 1 || p[0] != 1 || p[1] != 0) {
|
---|
202 | if(pshift <= 1) {
|
---|
203 | trail = pntz(p);
|
---|
204 | shr(p, trail);
|
---|
205 | pshift += trail;
|
---|
206 | } else {
|
---|
207 | shl(p, 2);
|
---|
208 | pshift -= 2;
|
---|
209 | p[0] ^= 7;
|
---|
210 | shr(p, 1);
|
---|
211 | trinkle(head - lp[pshift] - width, width, cmp, p, pshift + 1, 1, lp);
|
---|
212 | shl(p, 1);
|
---|
213 | p[0] |= 1;
|
---|
214 | trinkle(head - width, width, cmp, p, pshift, 1, lp);
|
---|
215 | }
|
---|
216 | head -= width;
|
---|
217 | }
|
---|
218 | }
|
---|