diff --git a/sc/workben/dpcache/perf-test.cpp b/sc/workben/dpcache/perf-test.cpp new file mode 100644 index 000000000000..ab9e80bfd587 --- /dev/null +++ b/sc/workben/dpcache/perf-test.cpp @@ -0,0 +1,434 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * Version: MPL 1.1 / GPLv3+ / LGPLv3+ + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License or as specified alternatively below. You may obtain a copy of + * the License at http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Major Contributor(s): + * Copyright (C) 2012 Kohei Yoshida + * + * All Rights Reserved. + * + * For minor contributions see the git repository. + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 3 or later (the "GPLv3+"), or + * the GNU Lesser General Public License Version 3 or later (the "LGPLv3+"), + * in which case the provisions of the GPLv3+ or the LGPLv3+ are applicable + * instead of those above. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +using namespace std; + +namespace { + +class stack_printer +{ +public: + explicit stack_printer(const char* msg) : + msMsg(msg) + { + fprintf(stdout, "%s: --begin\n", msMsg.c_str()); + mfStartTime = getTime(); + } + + ~stack_printer() + { + double fEndTime = getTime(); + fprintf(stdout, "%s: --end (duration: %g sec)\n", msMsg.c_str(), (fEndTime-mfStartTime)); + } + + void printTime(int line) const + { + double fEndTime = getTime(); + fprintf(stdout, "%s: --(%d) (duration: %g sec)\n", msMsg.c_str(), line, (fEndTime-mfStartTime)); + } + +private: + double getTime() const + { + timeval tv; + gettimeofday(&tv, NULL); + return tv.tv_sec + tv.tv_usec / 1000000.0; + } + + ::std::string msMsg; + double mfStartTime; +}; + +typedef std::vector values_type; +typedef std::vector indices_type; + +#if 1 +size_t val_count = 6000000; +double multiplier = 300000.0; +bool dump_values = false; +#else +size_t val_count = 20; +double multiplier = 10.0; +bool dump_values = true; +#endif + +struct field : boost::noncopyable +{ + values_type items; /// unique values + indices_type data; /// original value series as indices into unique values. + indices_type order; /// ascending order of the values as indices. +}; + +long compare(int left, int right) +{ + if (left == right) + return 0; + if (left < right) + return -1; + return 1; +} + +bool has_item(const values_type& items, const indices_type& order, int val, long& index) +{ + index = items.size(); + bool found = false; + long low = 0; + long high = items.size() - 1; + long comp_res; + while (low <= high) + { + long this_index = (low + high) / 2; + comp_res = compare(items[order[this_index]], val); + if (comp_res < 0) + low = this_index + 1; + else + { + high = this_index - 1; + if (comp_res == 0) + { + found = true; + low = this_index; + } + } + } + index = low; + return found; +} + +bool check_items(const values_type& items) +{ + if (items.empty()) + return false; + + // Items are supposed to be all unique values. + values_type copied(items); + sort(copied.begin(), copied.end()); + copied.erase(unique(copied.begin(), copied.end()), copied.end()); + return copied.size() == items.size(); +} + +bool check_order(const values_type& items, const indices_type& order) +{ + // Ensure that the order is truly in ascending order. + if (items.size() != order.size()) + return false; + + if (items.empty()) + return false; + + indices_type::const_iterator it = order.begin(); + values_type::value_type prev = items[*it]; + for (++it; it != order.end(); ++it) + { + values_type::value_type val = items[*it]; + if (prev >= val) + return false; + + prev = val; + } + + return true; +} + +bool check_data(const values_type& items, const indices_type& data, const values_type& original) +{ + if (items.empty() || data.empty() || original.empty()) + return false; + + if (data.size() != original.size()) + return false; + + size_t n = data.size(); + for (size_t i = 0; i < n; ++i) + { + if (items[data[i]] != original[i]) + return false; + } + return true; +} + +bool dump_and_check(const field& fld, const values_type& original, bool dump_values) +{ + cout << "unique item count: " << fld.items.size() << endl; + cout << "original data count: " << fld.data.size() << endl; + + if (dump_values) + { + cout << "--- items" << endl; + copy(fld.items.begin(), fld.items.end(), ostream_iterator(cout, "\n")); + cout << "--- sorted items" << endl; + { + indices_type::const_iterator it = fld.order.begin(), it_end = fld.order.end(); + for (; it != it_end; ++it) + { + cout << fld.items[*it] << endl; + } + } + } + + if (!check_items(fld.items)) + { + cout << "item check failed" << endl; + return false; + } + + if (!check_order(fld.items, fld.order)) + { + cout << "order check failed" << endl; + return false; + } + + if (!check_data(fld.items, fld.data, original)) + { + cout << "data check failed" << endl; + return false; + } + + return true; +} + +void run1(const values_type& vals, bool dump_values) +{ + field fld; + { + stack_printer __stack_printer__("::run1 (existing algorithm)"); + values_type::const_iterator it = vals.begin(), it_end = vals.end(); + for (; it != it_end; ++it) + { + long index = 0; + if (!has_item(fld.items, fld.order, *it, index)) + { + // This item doesn't exist in the dimension array yet. + fld.items.push_back(*it); + fld.order.insert( + fld.order.begin()+index, fld.items.size()-1); + fld.data.push_back(fld.items.size()-1); + } + else + fld.data.push_back(fld.order[index]); + } + } + + bool res = dump_and_check(fld, vals, dump_values); + cout << "check: " << (res ? "success" : "failure") << endl; +} + +struct bucket +{ + int value; + size_t order_index; + size_t data_index; + + bucket(int _value, size_t _order_index, size_t _data_index) : + value(_value), order_index(_order_index), data_index(_data_index) {} + + bucket(const bucket& r) : + value(r.value), order_index(r.order_index), data_index(r.data_index) {} +}; + +void print_buckets(const vector& buckets, const char* msg) +{ + cout << "--- buckets content (" << msg << ")" << endl; + vector::const_iterator it = buckets.begin(), it_end = buckets.end(); + for (; it != it_end; ++it) + { + cout << "value: " << it->value << " order index: " << it->order_index + << " data index: " << it->data_index << endl; + } + cout << "---" << endl; +} + +struct less_by_value : std::binary_function +{ + bool operator() (const bucket& left, const bucket& right) const + { + return left.value < right.value; + } +}; + +struct less_by_data_index : std::binary_function +{ + bool operator() (const bucket& left, const bucket& right) const + { + return left.data_index < right.data_index; + } +}; + +struct equal_by_value : std::binary_function +{ + bool operator() (const bucket& left, const bucket& right) const + { + return left.value == right.value; + } +}; + +class push_back_value : std::unary_function +{ + values_type& items; +public: + push_back_value(values_type& _items) : items(_items) {} + void operator() (const bucket& v) + { + items.push_back(v.value); + } +}; + +class push_back_order_index : std::unary_function +{ + indices_type& data_indices; +public: + push_back_order_index(indices_type& _items) : data_indices(_items) {} + void operator() (const bucket& v) + { + data_indices.push_back(v.order_index); + } +}; + +void run2(const values_type& vals, bool dump_values) +{ + field fld; + { + stack_printer __stack_printer__("::run2 (alternative algorithm)"); + vector buckets; + buckets.reserve(vals.size()); + { + // Push back all original values. + values_type::const_iterator it = vals.begin(), it_end = vals.end(); + for (size_t i = 0; it != it_end; ++it, ++i) + buckets.push_back(bucket(*it, 0, i)); + } + + if (buckets.empty()) + { + cout << "error: empty buckets" << endl; + return; + } + +// print_buckets(buckets, "original"); + + // Sort by the value. + sort(buckets.begin(), buckets.end(), less_by_value()); + +// print_buckets(buckets, "sorted"); + + { + // Set order index such that unique values have identical index value. + size_t cur_index = 0; + vector::iterator it = buckets.begin(), it_end = buckets.end(); + int prev = it->value; + it->order_index = cur_index; + for (++it; it != it_end; ++it) + { + if (prev != it->value) + ++cur_index; + + it->order_index = cur_index; + prev = it->value; + } + } + +// print_buckets(buckets, "sorted and indexed"); + + // Re-sort the bucket this time by the data index. + sort(buckets.begin(), buckets.end(), less_by_data_index()); +// print_buckets(buckets, "re-sort by data index"); + + // Copy the order index series into the field object. + fld.data.reserve(buckets.size()); + for_each(buckets.begin(), buckets.end(), push_back_order_index(fld.data)); + + // Sort by the value again. + sort(buckets.begin(), buckets.end(), less_by_value()); + + // Unique by value. + vector::iterator it_unique_end = + unique(buckets.begin(), buckets.end(), equal_by_value()); + +// print_buckets(buckets, "uniqued"); + + // Copy the unique values into items. + vector::iterator it_beg = buckets.begin(); + size_t len = distance(it_beg, it_unique_end); + fld.items.reserve(len); + for_each(it_beg, it_unique_end, push_back_value(fld.items)); + + // The items are actually already sorted. So, just insert a sequence + // of integers from 0 and up. + fld.order.reserve(len); + for (size_t i = 0; i < len; ++i) + fld.order.push_back(i); + } + + bool res = dump_and_check(fld, vals, dump_values); + cout << "check: " << (res ? "success" : "failure") << endl; +} + +} + +int main() +{ + values_type vals; + vals.reserve(val_count); + + if (dump_values) + cout << "--- original" << endl; + + for (size_t i = 0; i < val_count; ++i) + { + double v = rand(); + v /= RAND_MAX; + v *= multiplier; + values_type::value_type v2 = v; + vals.push_back(v2); + + if (dump_values) + cout << i << ": " << v2 << endl; + } + + if (dump_values) + cout << "---" << endl; + + run1(vals, dump_values); + run2(vals, dump_values); + + return EXIT_SUCCESS; +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */