contrib/native/client/src/clientlib/decimalUtils.cpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
#include <vector>
#include "drill/recordBatch.hpp"
#include "drill/decimalUtils.hpp"

#ifdef _WIN32
#define bswap_16                    _byteswap_ushort
#define bswap_32                    _byteswap_ulong
#define bswap_64                    _byteswap_uint64
#elif defined(__linux__)
#include <byteswap.h>   //for bswap_16,32,64
#elif defined(__APPLE__)
#include <libkern/OSByteOrder.h>
#define bswap_16 OSSwapInt16
#define bswap_32 OSSwapInt32
#define bswap_64 OSSwapInt64
#endif

using namespace boost::multiprecision;

#define MAX_DIGITS 9
#define INTEGER_SIZE sizeof(uint32_t)
#define DIGITS_BASE 1000000000

namespace Drill
{

// Code is ported from Java DecimalUtility.java implementation.
// The big differences are that we expose a DecimalValue struct,
// We store the unscaled value in a boost cpp_int
// And that when we retrieve integer data off the wire, we need to do an endianness swap on.
// We only do the endianness swap on dense types.
// on little-endian platforms.

DecimalValue getDecimalValueFromByteBuf(SlicedByteBuf& data, size_t startIndex, int nDecimalDigits, int scale, bool truncateScale)
{

    // For sparse decimal type we have padded zeroes at the end, strip them while converting to BigDecimal.
    int32_t actualDigits;

    // Indicate if we need an endianness swap (truncateScale = true for sparse types, which don't need an endianness swap).
    bool needsEndiannessSwap = !truncateScale;

    // Initialize the BigDecimal, first digit in the ByteBuf has the sign so mask it out
    cpp_int decimalDigits = (needsEndiannessSwap ?
            bswap_32(data.getUint32(startIndex)) & 0x7FFFFFFF :
            (data.getUint32(startIndex) & 0x7FFFFFFF));

    cpp_int base(DIGITS_BASE);

    for (int i = 1; i < nDecimalDigits; i++) {

        // Note: we need to byteswap when we retrieve integers off the wire since they are
        // stored in big-endian format.
        cpp_int temp = (needsEndiannessSwap ?
                bswap_32(data.getUint32(startIndex + (i * INTEGER_SIZE))) :
                (data.getUint32(startIndex + (i * INTEGER_SIZE))));

        decimalDigits *= base;
        decimalDigits += temp;
    }

    // Truncate any additional padding we might have added
    if (truncateScale && scale > 0 && (actualDigits = scale % MAX_DIGITS) != 0) {
        cpp_int truncate = (int32_t) std::pow(10.0, (MAX_DIGITS - actualDigits));
        decimalDigits /= truncate;
    }

    DecimalValue val;
    val.m_unscaledValue = decimalDigits;

    // set the sign
    if ((data.getUint32(startIndex) & 0x80000000) != 0)
    {
        val.m_unscaledValue *= -1;
    }

    val.m_scale = scale;
    return val;
}

DecimalValue getDecimalValueFromDense(SlicedByteBuf& data, size_t startIndex, int nDecimalDigits, int scale, int maxPrecision, int width)
{
    /* This method converts the dense representation to
     * an intermediate representation. The intermediate
     * representation has one more integer than the dense
     * representation.
     */
    std::vector<uint8_t> intermediateBytes((nDecimalDigits + 1) * INTEGER_SIZE, 0);

    int32_t intermediateIndex = 3;

    int32_t mask[] = {0x03, 0x0F, 0x3F, 0xFF};
    int32_t reverseMask[] = {0xFC, 0xF0, 0xC0, 0x00};

    int32_t maskIndex;
    int32_t shiftOrder;
    uint8_t shiftBits;

    // TODO: Some of the logic here is common with casting from Dense to Sparse types, factor out common code
    if (maxPrecision == 38) {
        maskIndex = 0;
        shiftOrder = 6;
        shiftBits = 0x00;
        intermediateBytes[intermediateIndex++] = (uint8_t) (data.getByte(startIndex) & 0x7F);
    } else if (maxPrecision == 28) {
        maskIndex = 1;
        shiftOrder = 4;
        shiftBits = (uint8_t) ((data.getByte(startIndex) & 0x03) << shiftOrder);
        intermediateBytes[intermediateIndex++] = (uint8_t) (((data.getByte(startIndex) & 0x3C) & 0xFF) >> 2);
    } else {
        assert("Dense types with max precision 38 and 28 are only supported");
    }

    int32_t inputIndex = 1;
    bool sign = false;

    if ((data.getByte(startIndex) & 0x80) != 0) {
        sign = true;
    }

    while (inputIndex < width) {

        intermediateBytes[intermediateIndex] = (uint8_t) ((shiftBits) | (((data.getByte(startIndex + inputIndex) & reverseMask[maskIndex]) & 0xFF) >> (8 - shiftOrder)));

        shiftBits = (uint8_t) ((data.getByte(startIndex + inputIndex) & mask[maskIndex]) << shiftOrder);

        inputIndex++;
        intermediateIndex++;

        if (((inputIndex - 1) % INTEGER_SIZE) == 0) {
            shiftBits = (uint8_t) ((shiftBits & 0xFF) >> 2);
            maskIndex++;
            shiftOrder -= 2;
        }

    }
    /* copy the last byte */
    intermediateBytes[intermediateIndex] = shiftBits;

    if (sign) {
        intermediateBytes[0] = (uint8_t) (intermediateBytes[0] | 0x80);
    }

    SlicedByteBuf intermediateData(&intermediateBytes[0], 0, intermediateBytes.size());
    return getDecimalValueFromIntermediate(intermediateData, 0, nDecimalDigits + 1, scale);
}

}