Some proof-of-concept code for dpcache performance.

2012-03-14 22:46:41 -04:00 · 2012-03-14 22:46:41 -04:00 · e4fb449706
commit e4fb449706
parent f697d7aa5c
1 changed files with 434 additions and 0 deletions
--- a/sc/workben/dpcache/perf-test.cpp
+++ b/sc/workben/dpcache/perf-test.cpp
@ -0,0 +1,434 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * Version: MPL 1.1 / GPLv3+ / LGPLv3+
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License or as specified alternatively below. You may obtain a copy of
+ * the License at http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * Major Contributor(s):
+ *   Copyright (C) 2012 Kohei Yoshida <kohei.yoshida@suse.com>
+ *
+ * All Rights Reserved.
+ *
+ * For minor contributions see the git repository.
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 3 or later (the "GPLv3+"), or
+ * the GNU Lesser General Public License Version 3 or later (the "LGPLv3+"),
+ * in which case the provisions of the GPLv3+ or the LGPLv3+ are applicable
+ * instead of those above.
+ */
+
+#include <cstdlib>
+#include <iostream>
+#include <stdio.h>
+#include <string>
+#include <sys/time.h>
+#include <vector>
+#include <iterator>
+#include <algorithm>
+#include <functional>
+
+#include <boost/noncopyable.hpp>
+
+using namespace std;
+
+namespace {
+
+class stack_printer
+{
+public:
+    explicit stack_printer(const char* msg) :
+        msMsg(msg)
+    {
+        fprintf(stdout, "%s: --begin\n", msMsg.c_str());
+        mfStartTime = getTime();
+    }
+
+    ~stack_printer()
+    {
+        double fEndTime = getTime();
+        fprintf(stdout, "%s: --end (duration: %g sec)\n", msMsg.c_str(), (fEndTime-mfStartTime));
+    }
+
+    void printTime(int line) const
+    {
+        double fEndTime = getTime();
+        fprintf(stdout, "%s: --(%d) (duration: %g sec)\n", msMsg.c_str(), line, (fEndTime-mfStartTime));
+    }
+
+private:
+    double getTime() const
+    {
+        timeval tv;
+        gettimeofday(&tv, NULL);
+        return tv.tv_sec + tv.tv_usec / 1000000.0;
+    }
+
+    ::std::string msMsg;
+    double mfStartTime;
+};
+
+typedef std::vector<int> values_type;
+typedef std::vector<size_t> indices_type;
+
+#if 1
+size_t val_count = 6000000;
+double multiplier = 300000.0;
+bool dump_values = false;
+#else
+size_t val_count = 20;
+double multiplier = 10.0;
+bool dump_values = true;
+#endif
+
+struct field : boost::noncopyable
+{
+    values_type items;   /// unique values
+    indices_type data;   /// original value series as indices into unique values.
+    indices_type order;  /// ascending order of the values as indices.
+};
+
+long compare(int left, int right)
+{
+    if (left == right)
+        return 0;
+    if (left < right)
+        return -1;
+    return 1;
+}
+
+bool has_item(const values_type& items, const indices_type& order, int val, long& index)
+{
+    index = items.size();
+    bool found = false;
+    long low = 0;
+    long high = items.size() - 1;
+    long comp_res;
+    while (low <= high)
+    {
+        long this_index = (low + high) / 2;
+        comp_res = compare(items[order[this_index]], val);
+        if (comp_res < 0)
+            low = this_index + 1;
+        else
+        {
+            high = this_index - 1;
+            if (comp_res == 0)
+            {
+                found = true;
+                low = this_index;
+            }
+        }
+    }
+    index = low;
+    return found;
+}
+
+bool check_items(const values_type& items)
+{
+    if (items.empty())
+        return false;
+
+    // Items are supposed to be all unique values.
+    values_type copied(items);
+    sort(copied.begin(), copied.end());
+    copied.erase(unique(copied.begin(), copied.end()), copied.end());
+    return copied.size() == items.size();
+}
+
+bool check_order(const values_type& items, const indices_type& order)
+{
+    // Ensure that the order is truly in ascending order.
+    if (items.size() != order.size())
+        return false;
+
+    if (items.empty())
+        return false;
+
+    indices_type::const_iterator it = order.begin();
+    values_type::value_type prev = items[*it];
+    for (++it; it != order.end(); ++it)
+    {
+        values_type::value_type val = items[*it];
+        if (prev >= val)
+            return false;
+
+        prev = val;
+    }
+
+    return true;
+}
+
+bool check_data(const values_type& items, const indices_type& data, const values_type& original)
+{
+    if (items.empty() || data.empty() || original.empty())
+        return false;
+
+    if (data.size() != original.size())
+        return false;
+
+    size_t n = data.size();
+    for (size_t i = 0; i < n; ++i)
+    {
+        if (items[data[i]] != original[i])
+            return false;
+    }
+    return true;
+}
+
+bool dump_and_check(const field& fld, const values_type& original, bool dump_values)
+{
+    cout << "unique item count:   " << fld.items.size() << endl;
+    cout << "original data count: " << fld.data.size() << endl;
+
+    if (dump_values)
+    {
+        cout << "--- items" << endl;
+        copy(fld.items.begin(), fld.items.end(), ostream_iterator<int>(cout, "\n"));
+        cout << "--- sorted items" << endl;
+        {
+            indices_type::const_iterator it = fld.order.begin(), it_end = fld.order.end();
+            for (; it != it_end; ++it)
+            {
+                cout << fld.items[*it] << endl;
+            }
+        }
+    }
+
+    if (!check_items(fld.items))
+    {
+        cout << "item check failed" << endl;
+        return false;
+    }
+
+    if (!check_order(fld.items, fld.order))
+    {
+        cout << "order check failed" << endl;
+        return false;
+    }
+
+    if (!check_data(fld.items, fld.data, original))
+    {
+        cout << "data check failed" << endl;
+        return false;
+    }
+
+    return true;
+}
+
+void run1(const values_type& vals, bool dump_values)
+{
+    field fld;
+    {
+        stack_printer __stack_printer__("::run1 (existing algorithm)");
+        values_type::const_iterator it = vals.begin(), it_end = vals.end();
+        for (; it != it_end; ++it)
+        {
+            long index = 0;
+            if (!has_item(fld.items, fld.order, *it, index))
+            {
+                // This item doesn't exist in the dimension array yet.
+                fld.items.push_back(*it);
+                fld.order.insert(
+                    fld.order.begin()+index, fld.items.size()-1);
+                fld.data.push_back(fld.items.size()-1);
+            }
+            else
+                fld.data.push_back(fld.order[index]);
+        }
+    }
+
+    bool res = dump_and_check(fld, vals, dump_values);
+    cout << "check: " << (res ? "success" : "failure") << endl;
+}
+
+struct bucket
+{
+    int value;
+    size_t order_index;
+    size_t data_index;
+
+    bucket(int _value, size_t _order_index, size_t _data_index) :
+        value(_value), order_index(_order_index), data_index(_data_index) {}
+
+    bucket(const bucket& r) :
+        value(r.value), order_index(r.order_index), data_index(r.data_index) {}
+};
+
+void print_buckets(const vector<bucket>& buckets, const char* msg)
+{
+    cout << "--- buckets content (" << msg << ")" << endl;
+    vector<bucket>::const_iterator it = buckets.begin(), it_end = buckets.end();
+    for (; it != it_end; ++it)
+    {
+        cout << "value: " << it->value << "  order index: " << it->order_index
+             << "  data index: " << it->data_index << endl;
+    }
+    cout << "---" << endl;
+}
+
+struct less_by_value : std::binary_function<bucket, bucket, bool>
+{
+    bool operator() (const bucket& left, const bucket& right) const
+    {
+        return left.value < right.value;
+    }
+};
+
+struct less_by_data_index : std::binary_function<bucket, bucket, bool>
+{
+    bool operator() (const bucket& left, const bucket& right) const
+    {
+        return left.data_index < right.data_index;
+    }
+};
+
+struct equal_by_value : std::binary_function<bucket, bucket, bool>
+{
+    bool operator() (const bucket& left, const bucket& right) const
+    {
+        return left.value == right.value;
+    }
+};
+
+class push_back_value : std::unary_function<bucket, void>
+{
+    values_type& items;
+public:
+    push_back_value(values_type& _items) : items(_items) {}
+    void operator() (const bucket& v)
+    {
+        items.push_back(v.value);
+    }
+};
+
+class push_back_order_index : std::unary_function<bucket, void>
+{
+    indices_type& data_indices;
+public:
+    push_back_order_index(indices_type& _items) : data_indices(_items) {}
+    void operator() (const bucket& v)
+    {
+        data_indices.push_back(v.order_index);
+    }
+};
+
+void run2(const values_type& vals, bool dump_values)
+{
+    field fld;
+    {
+        stack_printer __stack_printer__("::run2 (alternative algorithm)");
+        vector<bucket> buckets;
+        buckets.reserve(vals.size());
+        {
+            // Push back all original values.
+            values_type::const_iterator it = vals.begin(), it_end = vals.end();
+            for (size_t i = 0; it != it_end; ++it, ++i)
+                buckets.push_back(bucket(*it, 0, i));
+        }
+
+        if (buckets.empty())
+        {
+            cout << "error: empty buckets" << endl;
+            return;
+        }
+
+//      print_buckets(buckets, "original");
+
+        // Sort by the value.
+        sort(buckets.begin(), buckets.end(), less_by_value());
+
+//      print_buckets(buckets, "sorted");
+
+        {
+            // Set order index such that unique values have identical index value.
+            size_t cur_index = 0;
+            vector<bucket>::iterator it = buckets.begin(), it_end = buckets.end();
+            int prev = it->value;
+            it->order_index = cur_index;
+            for (++it; it != it_end; ++it)
+            {
+                if (prev != it->value)
+                    ++cur_index;
+
+                it->order_index = cur_index;
+                prev = it->value;
+            }
+        }
+
+//      print_buckets(buckets, "sorted and indexed");
+
+        // Re-sort the bucket this time by the data index.
+        sort(buckets.begin(), buckets.end(), less_by_data_index());
+//      print_buckets(buckets, "re-sort by data index");
+
+        // Copy the order index series into the field object.
+        fld.data.reserve(buckets.size());
+        for_each(buckets.begin(), buckets.end(), push_back_order_index(fld.data));
+
+        // Sort by the value again.
+        sort(buckets.begin(), buckets.end(), less_by_value());
+
+        // Unique by value.
+        vector<bucket>::iterator it_unique_end =
+            unique(buckets.begin(), buckets.end(), equal_by_value());
+
+//      print_buckets(buckets, "uniqued");
+
+        // Copy the unique values into items.
+        vector<bucket>::iterator it_beg = buckets.begin();
+        size_t len = distance(it_beg, it_unique_end);
+        fld.items.reserve(len);
+        for_each(it_beg, it_unique_end, push_back_value(fld.items));
+
+        // The items are actually already sorted.  So, just insert a sequence
+        // of integers from 0 and up.
+        fld.order.reserve(len);
+        for (size_t i = 0; i < len; ++i)
+            fld.order.push_back(i);
+    }
+
+    bool res = dump_and_check(fld, vals, dump_values);
+    cout << "check: " << (res ? "success" : "failure") << endl;
+}
+
+}
+
+int main()
+{
+    values_type vals;
+    vals.reserve(val_count);
+
+    if (dump_values)
+        cout << "--- original" << endl;
+
+    for (size_t i = 0; i < val_count; ++i)
+    {
+        double v = rand();
+        v /= RAND_MAX;
+        v *= multiplier;
+        values_type::value_type v2 = v;
+        vals.push_back(v2);
+
+        if (dump_values)
+            cout << i << ": " << v2 << endl;
+    }
+
+    if (dump_values)
+        cout << "---" << endl;
+
+    run1(vals, dump_values);
+    run2(vals, dump_values);
+
+    return EXIT_SUCCESS;
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */