124 lines
4.6 KiB
C++
124 lines
4.6 KiB
C++
/*
|
|
* Copyright (C) 2017 The Android Open Source Project
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#ifndef LIBTEXTCLASSIFIER_COMMON_LITTLE_ENDIAN_DATA_H_
|
|
#define LIBTEXTCLASSIFIER_COMMON_LITTLE_ENDIAN_DATA_H_
|
|
|
|
#include <algorithm>
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
#include "base.h"
|
|
#include "util/base/logging.h"
|
|
|
|
namespace libtextclassifier {
|
|
namespace nlp_core {
|
|
|
|
// Swaps the sizeof(T) bytes that start at addr. E.g., if sizeof(T) == 2,
|
|
// then (addr[0], addr[1]) -> (addr[1], addr[0]). Useful for little endian
|
|
// <-> big endian conversions.
|
|
template <class T>
|
|
void SwapBytes(T *addr) {
|
|
char *char_ptr = reinterpret_cast<char *>(addr);
|
|
std::reverse(char_ptr, char_ptr + sizeof(T));
|
|
}
|
|
|
|
// Assuming addr points to a piece of data of type T, with its bytes in the
|
|
// little/big endian order specific to the machine this code runs on, this
|
|
// method will re-arrange the bytes (in place) in little-endian order.
|
|
template <class T>
|
|
void HostToLittleEndian(T *addr) {
|
|
if (LittleEndian::IsLittleEndian()) {
|
|
// Do nothing: current machine is little-endian.
|
|
} else {
|
|
SwapBytes(addr);
|
|
}
|
|
}
|
|
|
|
// Reverse of HostToLittleEndian.
|
|
template <class T>
|
|
void LittleEndianToHost(T *addr) {
|
|
// It turns out it's the same function: on little-endian machines, do nothing
|
|
// (source and target formats are identical). Otherwise, swap bytes.
|
|
HostToLittleEndian(addr);
|
|
}
|
|
|
|
// Returns string obtained by concatenating the bytes of the elements from a
|
|
// vector (in order: v[0], v[1], etc). If the type T requires more than one
|
|
// byte, the byte for each element are first converted to little-endian format.
|
|
template<typename T>
|
|
std::string GetDataBytesInLittleEndianOrder(const std::vector<T> &v) {
|
|
std::string data_bytes;
|
|
for (const T element : v) {
|
|
T little_endian_element = element;
|
|
HostToLittleEndian(&little_endian_element);
|
|
data_bytes.append(
|
|
reinterpret_cast<const char *>(&little_endian_element),
|
|
sizeof(T));
|
|
}
|
|
return data_bytes;
|
|
}
|
|
|
|
// Performs reverse of GetDataBytesInLittleEndianOrder.
|
|
//
|
|
// I.e., decodes the data bytes from parameter bytes into num_elements Ts, and
|
|
// places them in the vector v (previous content of that vector is erased).
|
|
//
|
|
// We expect bytes to contain the concatenation of the bytes for exactly
|
|
// num_elements elements of type T. If the type T requires more than one byte,
|
|
// those bytes should be arranged in little-endian form.
|
|
//
|
|
// Returns true on success and false otherwise (e.g., bytes has the wrong size).
|
|
// Note: we do not want to crash on corrupted data (some clients, e..g, GMSCore,
|
|
// have asked us not to do so). Instead, we report the error and let the client
|
|
// decide what to do. On error, we also fill the vector with zeros, such that
|
|
// at least the dimension of v matches expectations.
|
|
template<typename T>
|
|
bool FillVectorFromDataBytesInLittleEndian(
|
|
const std::string &bytes, int num_elements, std::vector<T> *v) {
|
|
if (bytes.size() != num_elements * sizeof(T)) {
|
|
TC_LOG(ERROR) << "Wrong number of bytes: actual " << bytes.size()
|
|
<< " vs expected " << num_elements
|
|
<< " elements of sizeof(element) = " << sizeof(T)
|
|
<< " bytes each ; will fill vector with zeros";
|
|
v->assign(num_elements, static_cast<T>(0));
|
|
return false;
|
|
}
|
|
v->clear();
|
|
v->reserve(num_elements);
|
|
const T *start = reinterpret_cast<const T *>(bytes.data());
|
|
if (LittleEndian::IsLittleEndian() || (sizeof(T) == 1)) {
|
|
// Fast in the common case ([almost] all hardware today is little-endian):
|
|
// if same endianness (or type T requires a single byte and endianness
|
|
// irrelevant), just use the bytes.
|
|
v->assign(start, start + num_elements);
|
|
} else {
|
|
// Slower (but very rare case): this code runs on a big endian machine and
|
|
// the type T requires more than one byte. Hence, some conversion is
|
|
// necessary.
|
|
for (int i = 0; i < num_elements; ++i) {
|
|
T temp = start[i];
|
|
SwapBytes(&temp);
|
|
v->push_back(temp);
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
} // namespace nlp_core
|
|
} // namespace libtextclassifier
|
|
|
|
#endif // LIBTEXTCLASSIFIER_COMMON_LITTLE_ENDIAN_DATA_H_
|