Fix undefined behavior flagged by UBSAN
[gromacs.git] / src / gromacs / selection / sm_same.cpp
blobabcd1745cdd29eceaa1c86a89ba7321cec6d753d
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2009,2010,2011,2012,2013 by the GROMACS development team.
5 * Copyright (c) 2014,2015,2016,2017,2018 by the GROMACS development team.
6 * Copyright (c) 2019,2020, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
37 /*! \internal \file
38 * \brief
39 * Implements the \p same selection method.
41 * \author Teemu Murtola <teemu.murtola@gmail.com>
42 * \ingroup module_selection
44 #include "gmxpre.h"
46 #include <cstdlib>
47 #include <cstring>
49 #include "gromacs/utility/arraysize.h"
50 #include "gromacs/utility/exceptions.h"
51 #include "gromacs/utility/smalloc.h"
53 #include "keywords.h"
54 #include "parsetree.h"
55 #include "selelem.h"
56 #include "selmethod.h"
57 #include "selmethod_impl.h"
59 /*! \internal
60 * \brief
61 * Data structure for the \p same selection method.
63 * To avoid duplicate initialization code, the same data structure is used
64 * for matching both integer and string keywords; hence the unions.
66 * \ingroup module_selection
68 typedef struct
70 /** Value for each atom to match. */
71 union {
72 int* i;
73 char** s;
74 void* ptr;
75 } val;
76 /*! \brief
77 * Number of values in the \p as array.
79 * For string values, this is actually the number of values in the
80 * \p as_s_sorted array.
82 int nas;
83 /** Values to match against. */
84 union {
85 int* i;
86 char** s;
87 void* ptr;
88 } as;
89 /*! \brief
90 * Separate array for sorted \p as.s array.
92 * The array of strings returned as the output value of a parameter should
93 * not be messed with to avoid memory corruption (the pointers in the array
94 * may be reused for several evaluations), so we keep our own copy for
95 * modifications.
97 char** as_s_sorted;
98 /** Whether simple matching can be used. */
99 bool bSorted;
100 } t_methoddata_same;
102 /*! \brief
103 * Allocates data for the \p same selection method.
105 * \param[in] npar Not used (should be 2).
106 * \param[in,out] param Method parameters (should point to a copy of
107 * ::smparams_same_int or ::smparams_same_str).
108 * \returns Pointer to the allocated data (\ref t_methoddata_same).
110 static void* init_data_same(int npar, gmx_ana_selparam_t* param);
111 /*! \brief
112 * Initializes the \p same selection method.
114 * \param top Not used.
115 * \param npar Not used (should be 2).
116 * \param param Initialized method parameters (should point to a copy of
117 * ::smparams_same_int or ::smparams_same_str).
118 * \param data Pointer to \ref t_methoddata_same to initialize.
119 * \returns 0 on success, -1 on failure.
121 static void init_same(const gmx_mtop_t* top, int npar, gmx_ana_selparam_t* param, void* data);
122 /** Frees the data allocated for the \p same selection method. */
123 static void free_data_same(void* data);
124 /*! \brief
125 * Initializes the evaluation of the \p same selection method for a frame.
127 * \param[in] context Not used.
128 * \param data Should point to a \ref t_methoddata_same.
130 * Sorts the \c data->as.i array and removes identical values for faster and
131 * simpler lookup.
133 static void init_frame_same_int(const gmx::SelMethodEvalContext& context, void* data);
134 /** Evaluates the \p same selection method. */
135 static void evaluate_same_int(const gmx::SelMethodEvalContext& context,
136 gmx_ana_index_t* g,
137 gmx_ana_selvalue_t* out,
138 void* data);
139 /*! \brief
140 * Initializes the evaluation of the \p same selection method for a frame.
142 * \param[in] context Not used.
143 * \param data Should point to a \ref t_methoddata_same.
145 * Sorts the \c data->as.s array and removes identical values for faster and
146 * simpler lookup.
148 static void init_frame_same_str(const gmx::SelMethodEvalContext& context, void* data);
149 /** Evaluates the \p same selection method. */
150 static void evaluate_same_str(const gmx::SelMethodEvalContext& context,
151 gmx_ana_index_t* g,
152 gmx_ana_selvalue_t* out,
153 void* data);
155 /** Parameters for the \p same selection method. */
156 static gmx_ana_selparam_t smparams_same_int[] = {
157 { nullptr, { INT_VALUE, -1, { nullptr } }, nullptr, SPAR_DYNAMIC | SPAR_ATOMVAL },
158 { "as", { INT_VALUE, -1, { nullptr } }, nullptr, SPAR_DYNAMIC | SPAR_VARNUM },
161 /** Parameters for the \p same selection method. */
162 static gmx_ana_selparam_t smparams_same_str[] = {
163 { nullptr, { STR_VALUE, -1, { nullptr } }, nullptr, SPAR_DYNAMIC | SPAR_ATOMVAL },
164 { "as", { STR_VALUE, -1, { nullptr } }, nullptr, SPAR_DYNAMIC | SPAR_VARNUM },
167 /** Help text for the \p same selection method. */
168 static const char* const help_same[] = {
169 "::",
171 " same KEYWORD as ATOM_EXPR",
174 "The keyword [TT]same[tt] can be used to select all atoms for which",
175 "the given [TT]KEYWORD[tt] matches any of the atoms in [TT]ATOM_EXPR[tt].",
176 "Keywords that evaluate to integer or string values are supported.",
179 /*! \internal \brief Selection method data for the \p same method. */
180 gmx_ana_selmethod_t sm_same = {
181 "same",
182 GROUP_VALUE,
184 asize(smparams_same_int),
185 smparams_same_int,
186 &init_data_same,
187 nullptr,
188 &init_same,
189 nullptr,
190 &free_data_same,
191 &init_frame_same_int,
192 &evaluate_same_int,
193 nullptr,
194 { "same KEYWORD as ATOM_EXPR", "Extending selections", asize(help_same), help_same },
197 /*! \brief
198 * Selection method data for the \p same method.
200 * This selection method is used for matching string keywords. The parser
201 * never sees this method; _gmx_selelem_custom_init_same() replaces sm_same
202 * with this method in cases where it is required.
204 static gmx_ana_selmethod_t sm_same_str = {
205 "same",
206 GROUP_VALUE,
207 SMETH_SINGLEVAL,
208 asize(smparams_same_str),
209 smparams_same_str,
210 &init_data_same,
211 nullptr,
212 &init_same,
213 nullptr,
214 &free_data_same,
215 &init_frame_same_str,
216 &evaluate_same_str,
217 nullptr,
218 { nullptr, nullptr, 0, nullptr },
221 static void* init_data_same(int /* npar */, gmx_ana_selparam_t* param)
223 t_methoddata_same* data;
225 snew(data, 1);
226 data->as_s_sorted = nullptr;
227 param[1].nvalptr = &data->nas;
228 return data;
232 * \param[in,out] method The method to initialize.
233 * \param[in,out] params Pointer to the first parameter.
234 * \param[in] scanner Scanner data structure.
236 * If \p *method is not a \c same method, this function returns
237 * immediately.
239 void _gmx_selelem_custom_init_same(gmx_ana_selmethod_t** method,
240 const gmx::SelectionParserParameterListPointer& params,
241 void* scanner)
244 /* Do nothing if this is not a same method. */
245 if (!*method || (*method)->name != sm_same.name || params->empty())
247 return;
250 const gmx::SelectionParserValueList& kwvalues = params->front().values();
251 if (kwvalues.size() != 1 || !kwvalues.front().hasExpressionValue()
252 || kwvalues.front().expr->type != SEL_EXPRESSION)
254 GMX_THROW(gmx::InvalidInputError("'same' should be followed by a single keyword"));
256 gmx_ana_selmethod_t* kwmethod = kwvalues.front().expr->u.expr.method;
257 if (kwmethod->type == STR_VALUE)
259 *method = &sm_same_str;
262 /* We do custom processing for the "as" parameter. */
263 gmx::SelectionParserParameterList::iterator asparam = ++params->begin();
264 if (asparam != params->end() && asparam->name() == sm_same.param[1].name)
266 const gmx::SelectionParserValueList& asvalues = asparam->values();
267 if (asvalues.size() != 1 || !asvalues.front().hasExpressionValue())
269 // TODO: Think about providing more informative context.
270 GMX_THROW(gmx::InvalidInputError(
271 "'same ... as' should be followed by a single expression"));
273 const gmx::SelectionTreeElementPointer& child = asvalues.front().expr;
274 /* Create a second keyword evaluation element for the keyword given as
275 * the first parameter, evaluating the keyword in the group given by the
276 * second parameter. */
277 gmx::SelectionTreeElementPointer kwelem =
278 _gmx_sel_init_keyword_evaluator(kwmethod, child, scanner);
279 /* Replace the second parameter with one with a value from \p kwelem. */
280 std::string pname = asparam->name();
281 *asparam = gmx::SelectionParserParameter::createFromExpression(pname, kwelem);
285 static void init_same(const gmx_mtop_t* /* top */, int /* npar */, gmx_ana_selparam_t* param, void* data)
287 t_methoddata_same* d = static_cast<t_methoddata_same*>(data);
289 d->val.ptr = param[0].val.u.ptr;
290 d->as.ptr = param[1].val.u.ptr;
291 if (param[1].val.type == STR_VALUE)
293 snew(d->as_s_sorted, d->nas);
295 if (!(param[0].flags & SPAR_ATOMVAL))
297 GMX_THROW(
298 gmx::InvalidInputError("The 'same' selection keyword combined with a "
299 "non-keyword does not make sense"));
304 * \param data Data to free (should point to a \ref t_methoddata_same).
306 static void free_data_same(void* data)
308 t_methoddata_same* d = static_cast<t_methoddata_same*>(data);
310 sfree(d->as_s_sorted);
311 sfree(d);
314 /*! \brief
315 * Helper function for comparison of two integers.
317 static int cmp_int(const void* a, const void* b)
319 if (*reinterpret_cast<const int*>(a) < *reinterpret_cast<const int*>(b))
321 return -1;
323 if (*reinterpret_cast<const int*>(a) > *reinterpret_cast<const int*>(b))
325 return 1;
327 return 0;
330 static void init_frame_same_int(const gmx::SelMethodEvalContext& /*context*/, void* data)
332 t_methoddata_same* d = static_cast<t_methoddata_same*>(data);
333 int i, j;
335 /* Collapse adjacent values, and check whether the array is sorted. */
336 d->bSorted = true;
337 if (d->nas == 0)
339 return;
341 for (i = 1, j = 0; i < d->nas; ++i)
343 if (d->as.i[i] != d->as.i[j])
345 if (d->as.i[i] < d->as.i[j])
347 d->bSorted = false;
349 ++j;
350 d->as.i[j] = d->as.i[i];
353 d->nas = j + 1;
355 if (!d->bSorted)
357 qsort(d->as.i, d->nas, sizeof(d->as.i[0]), &cmp_int);
358 /* More identical values may become adjacent after sorting. */
359 for (i = 1, j = 0; i < d->nas; ++i)
361 if (d->as.i[i] != d->as.i[j])
363 ++j;
364 d->as.i[j] = d->as.i[i];
367 d->nas = j + 1;
372 * See sel_updatefunc() for description of the parameters.
373 * \p data should point to a \c t_methoddata_same.
375 * Calculates which values in \c data->val.i can be found in \c data->as.i
376 * (assumed sorted), and writes the corresponding atoms to output.
377 * If \c data->val is sorted, uses a linear scan of both arrays, otherwise a
378 * binary search of \c data->as is performed for each block of values in
379 * \c data->val.
381 static void evaluate_same_int(const gmx::SelMethodEvalContext& /*context*/,
382 gmx_ana_index_t* g,
383 gmx_ana_selvalue_t* out,
384 void* data)
386 t_methoddata_same* d = static_cast<t_methoddata_same*>(data);
387 int i, j;
389 out->u.g->isize = 0;
390 i = j = 0;
391 while (j < g->isize)
393 if (d->bSorted)
395 /* If we are sorted, we can do a simple linear scan. */
396 while (i < d->nas && d->as.i[i] < d->val.i[j])
398 ++i;
401 else
403 /* If not, we must do a binary search of all the values. */
404 int i1, i2;
406 i1 = 0;
407 i2 = d->nas;
408 while (i2 - i1 > 1)
410 int itry = (i1 + i2) / 2;
411 if (d->as.i[itry] <= d->val.i[j])
413 i1 = itry;
415 else
417 i2 = itry;
420 i = (d->as.i[i1] == d->val.i[j] ? i1 : d->nas);
422 /* Check whether the value was found in the as list. */
423 if (i == d->nas || d->as.i[i] != d->val.i[j])
425 /* If not, skip all atoms with the same value. */
426 int tmpval = d->val.i[j];
427 ++j;
428 while (j < g->isize && d->val.i[j] == tmpval)
430 ++j;
433 else
435 /* Copy all the atoms with this value to the output. */
436 while (j < g->isize && d->val.i[j] == d->as.i[i])
438 out->u.g->index[out->u.g->isize++] = g->index[j];
439 ++j;
442 if (j < g->isize && d->val.i[j] < d->val.i[j - 1])
444 d->bSorted = false;
449 /*! \brief
450 * Helper function for comparison of two strings.
452 static int cmp_str(const void* a, const void* b)
454 return strcmp(*static_cast<char* const*>(a), *static_cast<char* const*>(b));
457 static void init_frame_same_str(const gmx::SelMethodEvalContext& /*context*/, void* data)
459 t_methoddata_same* d = static_cast<t_methoddata_same*>(data);
460 int i, j;
462 /* Collapse adjacent values.
463 * For strings, it's unlikely that the values would be sorted originally,
464 * so set bSorted always to false. */
465 d->bSorted = false;
466 if (d->nas == 0)
468 return;
470 d->as_s_sorted[0] = d->as.s[0];
471 for (i = 1, j = 0; i < d->nas; ++i)
473 if (strcmp(d->as.s[i], d->as_s_sorted[j]) != 0)
475 ++j;
476 d->as_s_sorted[j] = d->as.s[i];
479 d->nas = j + 1;
481 qsort(d->as_s_sorted, d->nas, sizeof(d->as_s_sorted[0]), &cmp_str);
482 /* More identical values may become adjacent after sorting. */
483 for (i = 1, j = 0; i < d->nas; ++i)
485 if (strcmp(d->as_s_sorted[i], d->as_s_sorted[j]) != 0)
487 ++j;
488 d->as_s_sorted[j] = d->as_s_sorted[i];
491 d->nas = j + 1;
495 * See sel_updatefunc() for description of the parameters.
496 * \p data should point to a \c t_methoddata_same.
498 * Calculates which strings in \c data->val.s can be found in \c data->as.s
499 * (assumed sorted), and writes the corresponding atoms to output.
500 * A binary search of \c data->as is performed for each block of values in
501 * \c data->val.
503 static void evaluate_same_str(const gmx::SelMethodEvalContext& /*context*/,
504 gmx_ana_index_t* g,
505 gmx_ana_selvalue_t* out,
506 void* data)
508 t_methoddata_same* d = static_cast<t_methoddata_same*>(data);
509 int j;
511 out->u.g->isize = 0;
512 j = 0;
513 while (j < g->isize)
515 /* Do a binary search of the strings. */
516 void* ptr = nullptr;
517 if (d->nas > 0)
519 ptr = bsearch(&d->val.s[j], d->as_s_sorted, d->nas, sizeof(d->as_s_sorted[0]), &cmp_str);
521 /* Check whether the value was found in the as list. */
522 if (ptr == nullptr)
524 /* If not, skip all atoms with the same value. */
525 const char* tmpval = d->val.s[j];
526 ++j;
527 while (j < g->isize && strcmp(d->val.s[j], tmpval) == 0)
529 ++j;
532 else
534 const char* tmpval = d->val.s[j];
535 /* Copy all the atoms with this value to the output. */
536 while (j < g->isize && strcmp(d->val.s[j], tmpval) == 0)
538 out->u.g->index[out->u.g->isize++] = g->index[j];
539 ++j;