libSplash
DCHelper.hpp
1 
23 #ifndef DCHELPER_H
24 #define DCHELPER_H
25 
26 #include <map>
27 #include <sstream>
28 #include <iostream>
29 #include <hdf5.h>
30 
31 namespace splash
32 {
33 
38  class DCHelper
39  {
40  private:
41  DCHelper();
42  public:
43 
44  static void printhsizet(const char *name, const hsize_t *data, hsize_t rank)
45  {
46  std::cerr << name << " = (";
47  for (hsize_t i = 0; i < rank - 1; i++)
48  std::cerr << data[i] << ", ";
49  std::cerr << data[rank - 1];
50  std::cerr << ")" << std::endl;
51 
52  }
53 
59  static void swapHSize(hsize_t *hs, uint32_t rank)
60  {
61  if (hs == NULL)
62  return;
63 
64  hsize_t tmp1;
65  hsize_t tmp3[DSP_DIM_MAX];
66 
67  switch (rank)
68  {
69  case 2:
70  tmp1 = hs[0];
71  hs[0] = hs[1];
72  hs[1] = tmp1;
73  break;
74  case 3:
75  tmp3[0] = hs[2];
76  tmp3[1] = hs[1];
77  tmp3[2] = hs[0];
78 
79  for (uint32_t i = 0; i < 3; i++)
80  hs[i] = tmp3[i];
81  break;
82  default:
83  return;
84  }
85  }
86 
102  static void getOptimalChunkDims(const hsize_t *dims, uint32_t ndims,
103  size_t typeSize, hsize_t *chunkDims)
104  {
105  const size_t NUM_CHUNK_SIZES = 7;
106  // chunk sizes in KByte
107  const size_t CHUNK_SIZES_KB[] = {4096, 2048, 1024, 512, 256, 128, 64};
108 
109  size_t total_data_size = typeSize;
110  size_t max_chunk_size = typeSize;
111  size_t target_chunk_size = 0;
112 
113  // compute the order of dimensions (descending)
114  // large dataset dimensions should have larger chunk sizes
115  std::multimap<hsize_t, uint32_t> dims_order;
116  for (uint32_t i = 0; i < ndims; ++i)
117  dims_order.insert(std::make_pair(dims[i], i));
118 
119  for (uint32_t i = 0; i < ndims; ++i)
120  {
121  // initial number of chunks per dimension
122  chunkDims[i] = 1;
123 
124  // try to make at least two chunks for each dimension
125  size_t half_dim = dims[i] / 2;
126 
127  // compute sizes
128  max_chunk_size *= (half_dim > 0) ? half_dim : 1;
129  total_data_size *= dims[i];
130  }
131 
132  // compute the target chunk size
133  for (uint32_t i = 0; i < NUM_CHUNK_SIZES; ++i)
134  {
135  target_chunk_size = CHUNK_SIZES_KB[i] * 1024;
136  if (target_chunk_size <= max_chunk_size)
137  break;
138  }
139 
140  size_t current_chunk_size = typeSize;
141  size_t last_chunk_diff = target_chunk_size;
142  std::multimap<hsize_t, uint32_t>::const_iterator current_index =
143  dims_order.begin();
144 
145  while (current_chunk_size < target_chunk_size)
146  {
147  // test if increasing chunk size optimizes towards target chunk size
148  size_t chunk_diff = target_chunk_size - (current_chunk_size * 2u);
149  if (chunk_diff >= last_chunk_diff)
150  break;
151 
152  // find next dimension to increase chunk size for
153  int can_increase_dim = 0;
154  for (uint32_t d = 0; d < ndims; ++d)
155  {
156  int current_dim = current_index->second;
157 
158  // increasing chunk size possible
159  if (chunkDims[current_dim] * 2 <= dims[current_dim])
160  {
161  chunkDims[current_dim] *= 2;
162  current_chunk_size *= 2;
163  can_increase_dim = 1;
164  }
165 
166  current_index++;
167  if (current_index == dims_order.end())
168  current_index = dims_order.begin();
169 
170  if (can_increase_dim)
171  break;
172  }
173 
174  // can not increase chunk size in any dimension
175  // we must use the current chunk sizes
176  if (!can_increase_dim)
177  break;
178 
179  last_chunk_diff = chunk_diff;
180  }
181  }
182 
188  static bool testFilename(const std::string& filename)
189  {
190  if (filename.rfind(".h5.h5") == filename.length() - 6)
191  {
192  std::cerr << std::endl << "\tWarning: DCHelper: Do you really want to access "
193  << filename.c_str() << "?" << std::endl;
194  return false;
195  }
196 
197  if (filename.find(".h5") != filename.length() - 3)
198  {
199  std::cerr << std::endl << "\tWarning: DCHelper: "
200  << "Duplicate or missing file name extension. "
201  << "Do you really want to access "
202  << filename.c_str() << "?" << std::endl;
203  return false;
204  }
205 
206  return true;
207  }
208 
209  };
214 } // namespace DataCollector
215 
216 #endif /* DCHELPER_H */