NFFT  3.3.0
sort.c
1 /*
2  * Copyright (c) 2002, 2015 Jens Keiner, Stefan Kunis, Daniel Potts
3  *
4  * This program is free software; you can redistribute it and/or modify it under
5  * the terms of the GNU General Public License as published by the Free Software
6  * Foundation; either version 2 of the License, or (at your option) any later
7  * version.
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
12  * details.
13  *
14  * You should have received a copy of the GNU General Public License along with
15  * this program; if not, write to the Free Software Foundation, Inc., 51
16  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  */
18 
19 /* $Id: util.c 3483 2010-04-23 19:02:34Z keiner $ */
20 
21 
22 #ifdef _OPENMP
23 #include <omp.h>
24 #endif
25 
26 #include "infft.h"
27 
28 #define z_swap(_a_, _b_, _t_) do { (_t_) = (_a_); (_a_) = (_b_); (_b_) = (_t_); } while (0)
29 
35 static inline void sort_node_indices_sort_bubble(INT n, INT *keys)
36 {
37  INT i, j, ti;
38 
39  for (i = 0; i < n; ++i)
40  {
41  j = i;
42  while (j > 0 && keys[2 * j + 0] < keys[2 * (j - 1) + 0])
43  {
44  z_swap(keys[2 * j + 0], keys[2 * (j - 1) + 0], ti);
45  z_swap(keys[2 * j + 1], keys[2 * (j - 1) + 1], ti);
46  --j;
47  }
48  }
49 }
50 
56 static inline void sort_node_indices_radix_count(INT n, INT *keys, INT shift, INT mask, INT *counts)
57 {
58  INT i, k;
59 
60  for (i = 0; i < n; ++i)
61  {
62  k = (keys[2 * i + 0] >> shift) & mask;
63  ++counts[k];
64  }
65 }
66 
72 static inline void sort_node_indices_radix_rearrange(INT n, INT *keys_in, INT *keys_out, INT shift, INT mask, INT *displs)
73 {
74  INT i, k;
75 
76  for (i = 0; i < n; ++i)
77  {
78  k = (keys_in[2 * i + 0] >> shift) & mask;
79  keys_out[2 * displs[k] + 0] = keys_in[2 * i + 0];
80  keys_out[2 * displs[k] + 1] = keys_in[2 * i + 1];
81  ++displs[k];
82  }
83 }
84 
85 #define rwidth 9
86 #define radix_n (1 << rwidth)
87 
93 void Y(sort_node_indices_radix_lsdf)(INT n, INT *keys0, INT *keys1, INT rhigh)
94 {
95  const INT radix_mask = radix_n - 1;
96  const INT rhigh_in = rhigh;
97 
98  const INT tmax =
99 #ifdef _OPENMP
100  omp_get_max_threads();
101 #else
102  1;
103 #endif
104 
105  INT *from, *to, *tmp;
106 
107  INT i, k, l, h;
108  INT *lcounts;
109 
110  INT tid = 0, tnum = 1;
111 
112  STACK_MALLOC(INT*, lcounts, (size_t)(tmax * radix_n) * sizeof(INT));
113 
114  from = keys0;
115  to = keys1;
116 
117  while (rhigh >= 0)
118  {
119 #ifdef _OPENMP
120  #pragma omp parallel private(tid, tnum, i, l, h)
121  {
122  tid = omp_get_thread_num();
123  tnum = omp_get_num_threads();
124 #endif
125 
126  for (i = 0; i < radix_n; ++i) lcounts[tid * radix_n + i] = 0;
127 
128  l = (tid * n) / tnum;
129  h = ((tid + 1) * n) / tnum;
130 
131  sort_node_indices_radix_count(h - l, from + (2 * l), rhigh_in - rhigh, radix_mask, &lcounts[tid * radix_n]);
132 #ifdef _OPENMP
133  }
134 #endif
135 
136  k = 0;
137  for (i = 0; i < radix_n; ++i)
138  {
139  for (l = 0; l < tmax; ++l) lcounts[l * radix_n + i] = (k += lcounts[l * radix_n + i]) - lcounts[l * radix_n + i];
140  }
141 
142 #ifdef _OPENMP
143  #pragma omp parallel private(tid, tnum, i, l, h)
144  {
145  tid = omp_get_thread_num();
146  tnum = omp_get_num_threads();
147 #endif
148 
149  l = (tid * n) / tnum;
150  h = ((tid + 1) * n) / tnum;
151 
152  sort_node_indices_radix_rearrange(h - l, from + (2 * l), to, rhigh_in - rhigh, radix_mask, &lcounts[tid * radix_n]);
153 #ifdef _OPENMP
154  }
155 #endif
156 
157 /* print_keys(n, to);*/
158 
159  tmp = from;
160  from = to;
161  to = tmp;
162 
163  rhigh -= rwidth;
164  }
165 
166  if (to == keys0) memcpy(to, from, (size_t)(n) * 2 * sizeof(INT));
167 
168  STACK_FREE(lcounts);
169 }
170 
176 void Y(sort_node_indices_radix_msdf)(INT n, INT *keys0, INT *keys1, INT rhigh)
177 {
178  const INT radix_mask = radix_n - 1;
179 
180  const INT tmax =
181 #ifdef _OPENMP
182  omp_get_max_threads();
183 #else
184  1;
185 #endif
186 
187  INT i, k, l, h;
188  INT *lcounts;
189 
190  INT counts[radix_n], displs[radix_n];
191 
192  INT tid = 0, tnum = 1;
193 
194  STACK_MALLOC(INT*, lcounts, (size_t)(tmax * radix_n) * sizeof(INT));
195 
196  rhigh -= rwidth;
197 
198 #ifdef _OPENMP
199  #pragma omp parallel private(tid, tnum, i, l, h)
200  {
201  tid = omp_get_thread_num();
202  tnum = omp_get_num_threads();
203 #endif
204 
205  for (i = 0; i < radix_n; ++i) lcounts[tid * radix_n + i] = 0;
206 
207  l = (tid * n) / tnum;
208  h = ((tid + 1) * n) / tnum;
209 
210  sort_node_indices_radix_count(h - l, keys0 + (2 * l), rhigh + 1, radix_mask, &lcounts[tid * radix_n]);
211 #ifdef _OPENMP
212  }
213 #endif
214 
215  k = 0;
216  for (i = 0; i < radix_n; ++i)
217  {
218  for (l = 0; l < tmax; ++l) lcounts[l * radix_n + i] = (k += lcounts[l * radix_n + i]) - lcounts[l * radix_n + i];
219 
220  displs[i] = lcounts[0 * radix_n + i];
221  if (i > 0) counts[i - 1] = displs[i] - displs[i - 1];
222  }
223  counts[radix_n - 1] = n - displs[radix_n - 1];
224 
225 #ifdef _OPENMP
226  #pragma omp parallel private(tid, tnum, i, l, h)
227  {
228  tid = omp_get_thread_num();
229  tnum = omp_get_num_threads();
230 #endif
231 
232  l = (tid * n) / tnum;
233  h = ((tid + 1) * n) / tnum;
234 
235  sort_node_indices_radix_rearrange(h - l, keys0 + (2 * l), keys1, rhigh + 1, radix_mask, &lcounts[tid * radix_n]);
236 #ifdef _OPENMP
237  }
238 #endif
239 
240  memcpy(keys0, keys1, (size_t)(n) * 2 * sizeof(INT));
241 
242  if (rhigh >= 0)
243  {
244  for (i = 0; i < radix_n; ++i)
245  {
246  if (counts[i] > 1)
247  {
248  if (counts[i] > 256)
249  Y(sort_node_indices_radix_msdf)(counts[i], keys0 + 2 * displs[i], keys1 + 2 * displs[i], rhigh);
250  else
251  sort_node_indices_sort_bubble(counts[i], keys0 + 2 * displs[i]);
252  }
253  }
254  }
255 
256  STACK_FREE(lcounts);
257 }