upload android base code part6
This commit is contained in:
parent
421e214c7d
commit
4e516ec6ed
35396 changed files with 9188716 additions and 0 deletions
942
android/system/extras/tests/memtest/bandwidth.h
Normal file
942
android/system/extras/tests/memtest/bandwidth.h
Normal file
|
@ -0,0 +1,942 @@
|
|||
/*
|
||||
* Copyright (C) 2013 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef __BANDWIDTH_H__
|
||||
#define __BANDWIDTH_H__
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "utils/Compat.h"
|
||||
#include "memtest.h"
|
||||
|
||||
// Bandwidth Class definitions.
|
||||
class BandwidthBenchmark {
|
||||
public:
|
||||
BandwidthBenchmark()
|
||||
: _size(0),
|
||||
_num_warm_loops(DEFAULT_NUM_WARM_LOOPS),
|
||||
_num_loops(DEFAULT_NUM_LOOPS) {}
|
||||
virtual ~BandwidthBenchmark() {}
|
||||
|
||||
bool run() {
|
||||
if (_size == 0) {
|
||||
return false;
|
||||
}
|
||||
if (!canRun()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bench(_num_warm_loops);
|
||||
|
||||
nsecs_t t = system_time();
|
||||
bench(_num_loops);
|
||||
t = system_time() - t;
|
||||
|
||||
_mb_per_sec = (_size*(_num_loops/_BYTES_PER_MB))/(t/_NUM_NS_PER_SEC);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool canRun() { return !usesNeon() || isNeonSupported(); }
|
||||
|
||||
virtual bool setSize(size_t size) = 0;
|
||||
|
||||
virtual const char *getName() = 0;
|
||||
|
||||
virtual bool verify() = 0;
|
||||
|
||||
virtual bool usesNeon() { return false; }
|
||||
|
||||
bool isNeonSupported() {
|
||||
#if defined(__ARM_NEON__)
|
||||
return true;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
// Accessors/mutators.
|
||||
double mb_per_sec() { return _mb_per_sec; }
|
||||
size_t num_warm_loops() { return _num_warm_loops; }
|
||||
size_t num_loops() { return _num_loops; }
|
||||
size_t size() { return _size; }
|
||||
|
||||
void set_num_warm_loops(size_t num_warm_loops) {
|
||||
_num_warm_loops = num_warm_loops;
|
||||
}
|
||||
void set_num_loops(size_t num_loops) { _num_loops = num_loops; }
|
||||
|
||||
// Static constants
|
||||
static const unsigned int DEFAULT_NUM_WARM_LOOPS = 1000000;
|
||||
static const unsigned int DEFAULT_NUM_LOOPS = 20000000;
|
||||
|
||||
protected:
|
||||
virtual void bench(size_t num_loops) = 0;
|
||||
|
||||
double _mb_per_sec;
|
||||
size_t _size;
|
||||
size_t _num_warm_loops;
|
||||
size_t _num_loops;
|
||||
|
||||
private:
|
||||
// Static constants
|
||||
static const CONSTEXPR double _NUM_NS_PER_SEC = 1000000000.0;
|
||||
static const CONSTEXPR double _BYTES_PER_MB = 1024.0* 1024.0;
|
||||
};
|
||||
|
||||
class CopyBandwidthBenchmark : public BandwidthBenchmark {
|
||||
public:
|
||||
CopyBandwidthBenchmark() : BandwidthBenchmark(), _src(NULL), _dst(NULL) { }
|
||||
|
||||
bool setSize(size_t size) {
|
||||
if (_src) {
|
||||
free(_src);
|
||||
_src = NULL;
|
||||
}
|
||||
if (_dst) {
|
||||
free(_dst);
|
||||
_dst = NULL;
|
||||
}
|
||||
|
||||
if (size == 0) {
|
||||
_size = DEFAULT_COPY_SIZE;
|
||||
} else {
|
||||
_size = size;
|
||||
}
|
||||
|
||||
_src = reinterpret_cast<char*>(memalign(64, _size));
|
||||
if (!_src) {
|
||||
perror("Failed to allocate memory for test.");
|
||||
return false;
|
||||
}
|
||||
_dst = reinterpret_cast<char*>(memalign(64, _size));
|
||||
if (!_dst) {
|
||||
perror("Failed to allocate memory for test.");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
virtual ~CopyBandwidthBenchmark() {
|
||||
if (_src) {
|
||||
free(_src);
|
||||
_src = NULL;
|
||||
}
|
||||
if (_dst) {
|
||||
free(_dst);
|
||||
_dst = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
bool verify() {
|
||||
memset(_src, 0x23, _size);
|
||||
memset(_dst, 0, _size);
|
||||
bench(1);
|
||||
if (memcmp(_src, _dst, _size) != 0) {
|
||||
printf("Buffers failed to compare after one loop.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
memset(_src, 0x23, _size);
|
||||
memset(_dst, 0, _size);
|
||||
_num_loops = 2;
|
||||
bench(2);
|
||||
if (memcmp(_src, _dst, _size) != 0) {
|
||||
printf("Buffers failed to compare after two loops.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
protected:
|
||||
char *_src;
|
||||
char *_dst;
|
||||
|
||||
static const unsigned int DEFAULT_COPY_SIZE = 8000;
|
||||
};
|
||||
|
||||
class CopyLdrdStrdBenchmark : public CopyBandwidthBenchmark {
|
||||
public:
|
||||
CopyLdrdStrdBenchmark() : CopyBandwidthBenchmark() { }
|
||||
virtual ~CopyLdrdStrdBenchmark() {}
|
||||
|
||||
const char *getName() { return "ldrd/strd"; }
|
||||
|
||||
protected:
|
||||
// Copy using ldrd/strd instructions.
|
||||
void bench(size_t num_loops) {
|
||||
asm volatile(
|
||||
"stmfd sp!, {r0,r1,r2,r3,r4,r6,r7}\n"
|
||||
|
||||
"mov r0, %0\n"
|
||||
"mov r1, %1\n"
|
||||
"mov r2, %2\n"
|
||||
"mov r3, %3\n"
|
||||
|
||||
"0:\n"
|
||||
"mov r4, r2, lsr #6\n"
|
||||
|
||||
"1:\n"
|
||||
"ldrd r6, r7, [r0]\n"
|
||||
"strd r6, r7, [r1]\n"
|
||||
"ldrd r6, r7, [r0, #8]\n"
|
||||
"strd r6, r7, [r1, #8]\n"
|
||||
"ldrd r6, r7, [r0, #16]\n"
|
||||
"strd r6, r7, [r1, #16]\n"
|
||||
"ldrd r6, r7, [r0, #24]\n"
|
||||
"strd r6, r7, [r1, #24]\n"
|
||||
"ldrd r6, r7, [r0, #32]\n"
|
||||
"strd r6, r7, [r1, #32]\n"
|
||||
"ldrd r6, r7, [r0, #40]\n"
|
||||
"strd r6, r7, [r1, #40]\n"
|
||||
"ldrd r6, r7, [r0, #48]\n"
|
||||
"strd r6, r7, [r1, #48]\n"
|
||||
"ldrd r6, r7, [r0, #56]\n"
|
||||
"strd r6, r7, [r1, #56]\n"
|
||||
|
||||
"add r0, r0, #64\n"
|
||||
"add r1, r1, #64\n"
|
||||
"subs r4, r4, #1\n"
|
||||
"bgt 1b\n"
|
||||
|
||||
"sub r0, r0, r2\n"
|
||||
"sub r1, r1, r2\n"
|
||||
"subs r3, r3, #1\n"
|
||||
"bgt 0b\n"
|
||||
|
||||
"ldmfd sp!, {r0,r1,r2,r3,r4,r6,r7}\n"
|
||||
:: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
|
||||
}
|
||||
};
|
||||
|
||||
class CopyLdmiaStmiaBenchmark : public CopyBandwidthBenchmark {
|
||||
public:
|
||||
CopyLdmiaStmiaBenchmark() : CopyBandwidthBenchmark() { }
|
||||
virtual ~CopyLdmiaStmiaBenchmark() {}
|
||||
|
||||
const char *getName() { return "ldmia/stmia"; }
|
||||
|
||||
protected:
|
||||
// Copy using ldmia/stmia instructions.
|
||||
void bench(size_t num_loops) {
|
||||
asm volatile(
|
||||
"stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12}\n"
|
||||
|
||||
"mov r0, %0\n"
|
||||
"mov r1, %1\n"
|
||||
"mov r2, %2\n"
|
||||
"mov r3, %3\n"
|
||||
|
||||
"0:\n"
|
||||
"mov r4, r2, lsr #6\n"
|
||||
|
||||
"1:\n"
|
||||
"ldmia r0!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
|
||||
"stmia r1!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
|
||||
"subs r4, r4, #1\n"
|
||||
"ldmia r0!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
|
||||
"stmia r1!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
|
||||
"bgt 1b\n"
|
||||
|
||||
"sub r0, r0, r2\n"
|
||||
"sub r1, r1, r2\n"
|
||||
"subs r3, r3, #1\n"
|
||||
"bgt 0b\n"
|
||||
|
||||
"ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12}\n"
|
||||
:: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
|
||||
}
|
||||
};
|
||||
|
||||
class CopyVld1Vst1Benchmark : public CopyBandwidthBenchmark {
|
||||
public:
|
||||
CopyVld1Vst1Benchmark() : CopyBandwidthBenchmark() { }
|
||||
virtual ~CopyVld1Vst1Benchmark() {}
|
||||
|
||||
const char *getName() { return "vld1/vst1"; }
|
||||
|
||||
bool usesNeon() { return true; }
|
||||
|
||||
protected:
|
||||
// Copy using vld1/vst1 instructions.
|
||||
#if defined(__ARM_NEON__)
|
||||
void bench(size_t num_loops) {
|
||||
asm volatile(
|
||||
"stmfd sp!, {r0,r1,r2,r3,r4}\n"
|
||||
|
||||
"mov r0, %0\n"
|
||||
"mov r1, %1\n"
|
||||
"mov r2, %2\n"
|
||||
"mov r3, %3\n"
|
||||
|
||||
"0:\n"
|
||||
"mov r4, r2, lsr #6\n"
|
||||
|
||||
"1:\n"
|
||||
"vld1.8 {d0-d3}, [r0]!\n"
|
||||
"vld1.8 {d4-d7}, [r0]!\n"
|
||||
"subs r4, r4, #1\n"
|
||||
"vst1.8 {d0-d3}, [r1:128]!\n"
|
||||
"vst1.8 {d4-d7}, [r1:128]!\n"
|
||||
"bgt 1b\n"
|
||||
|
||||
"sub r0, r0, r2\n"
|
||||
"sub r1, r1, r2\n"
|
||||
"subs r3, r3, #1\n"
|
||||
"bgt 0b\n"
|
||||
|
||||
"ldmfd sp!, {r0,r1,r2,r3,r4}\n"
|
||||
:: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
|
||||
#else
|
||||
void bench(size_t) {
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
class CopyVldrVstrBenchmark : public CopyBandwidthBenchmark {
|
||||
public:
|
||||
CopyVldrVstrBenchmark() : CopyBandwidthBenchmark() { }
|
||||
virtual ~CopyVldrVstrBenchmark() {}
|
||||
|
||||
const char *getName() { return "vldr/vstr"; }
|
||||
|
||||
bool usesNeon() { return true; }
|
||||
|
||||
protected:
|
||||
// Copy using vldr/vstr instructions.
|
||||
#if defined(__ARM_NEON__)
|
||||
void bench(size_t num_loops) {
|
||||
asm volatile(
|
||||
"stmfd sp!, {r0,r1,r2,r3,r4}\n"
|
||||
|
||||
"mov r0, %0\n"
|
||||
"mov r1, %1\n"
|
||||
"mov r2, %2\n"
|
||||
"mov r3, %3\n"
|
||||
|
||||
"0:\n"
|
||||
"mov r4, r2, lsr #6\n"
|
||||
|
||||
"1:\n"
|
||||
"vldr d0, [r0, #0]\n"
|
||||
"subs r4, r4, #1\n"
|
||||
"vldr d1, [r0, #8]\n"
|
||||
"vstr d0, [r1, #0]\n"
|
||||
"vldr d0, [r0, #16]\n"
|
||||
"vstr d1, [r1, #8]\n"
|
||||
"vldr d1, [r0, #24]\n"
|
||||
"vstr d0, [r1, #16]\n"
|
||||
"vldr d0, [r0, #32]\n"
|
||||
"vstr d1, [r1, #24]\n"
|
||||
"vldr d1, [r0, #40]\n"
|
||||
"vstr d0, [r1, #32]\n"
|
||||
"vldr d0, [r0, #48]\n"
|
||||
"vstr d1, [r1, #40]\n"
|
||||
"vldr d1, [r0, #56]\n"
|
||||
"vstr d0, [r1, #48]\n"
|
||||
"add r0, r0, #64\n"
|
||||
"vstr d1, [r1, #56]\n"
|
||||
"add r1, r1, #64\n"
|
||||
"bgt 1b\n"
|
||||
|
||||
"sub r0, r0, r2\n"
|
||||
"sub r1, r1, r2\n"
|
||||
"subs r3, r3, #1\n"
|
||||
"bgt 0b\n"
|
||||
|
||||
"ldmfd sp!, {r0,r1,r2,r3,r4}\n"
|
||||
:: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
|
||||
#else
|
||||
void bench(size_t) {
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
class CopyVldmiaVstmiaBenchmark : public CopyBandwidthBenchmark {
|
||||
public:
|
||||
CopyVldmiaVstmiaBenchmark() : CopyBandwidthBenchmark() { }
|
||||
virtual ~CopyVldmiaVstmiaBenchmark() {}
|
||||
|
||||
const char *getName() { return "vldmia/vstmia"; }
|
||||
|
||||
bool usesNeon() { return true; }
|
||||
|
||||
protected:
|
||||
// Copy using vldmia/vstmia instructions.
|
||||
#if defined(__ARM_NEON__)
|
||||
void bench(size_t num_loops) {
|
||||
asm volatile(
|
||||
"stmfd sp!, {r0,r1,r2,r3,r4}\n"
|
||||
|
||||
"mov r0, %0\n"
|
||||
"mov r1, %1\n"
|
||||
"mov r2, %2\n"
|
||||
"mov r3, %3\n"
|
||||
|
||||
"0:\n"
|
||||
"mov r4, r2, lsr #6\n"
|
||||
|
||||
"1:\n"
|
||||
"vldmia r0!, {d0-d7}\n"
|
||||
"subs r4, r4, #1\n"
|
||||
"vstmia r1!, {d0-d7}\n"
|
||||
"bgt 1b\n"
|
||||
|
||||
"sub r0, r0, r2\n"
|
||||
"sub r1, r1, r2\n"
|
||||
"subs r3, r3, #1\n"
|
||||
"bgt 0b\n"
|
||||
|
||||
"ldmfd sp!, {r0,r1,r2,r3,r4}\n"
|
||||
:: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
|
||||
#else
|
||||
void bench(size_t) {
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
class MemcpyBenchmark : public CopyBandwidthBenchmark {
|
||||
public:
|
||||
MemcpyBenchmark() : CopyBandwidthBenchmark() { }
|
||||
virtual ~MemcpyBenchmark() {}
|
||||
|
||||
const char *getName() { return "memcpy"; }
|
||||
|
||||
protected:
|
||||
void bench(size_t num_loops) {
|
||||
for (size_t i = 0; i < num_loops; i++) {
|
||||
memcpy(_dst, _src, _size);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class SingleBufferBandwidthBenchmark : public BandwidthBenchmark {
|
||||
public:
|
||||
SingleBufferBandwidthBenchmark() : BandwidthBenchmark(), _buffer(NULL) { }
|
||||
virtual ~SingleBufferBandwidthBenchmark() {
|
||||
if (_buffer) {
|
||||
free(_buffer);
|
||||
_buffer = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
bool setSize(size_t size) {
|
||||
if (_buffer) {
|
||||
free(_buffer);
|
||||
_buffer = NULL;
|
||||
}
|
||||
|
||||
if (size == 0) {
|
||||
_size = DEFAULT_SINGLE_BUFFER_SIZE;
|
||||
} else {
|
||||
_size = size;
|
||||
}
|
||||
|
||||
_buffer = reinterpret_cast<char*>(memalign(64, _size));
|
||||
if (!_buffer) {
|
||||
perror("Failed to allocate memory for test.");
|
||||
return false;
|
||||
}
|
||||
memset(_buffer, 0, _size);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool verify() { return true; }
|
||||
|
||||
protected:
|
||||
char *_buffer;
|
||||
|
||||
static const unsigned int DEFAULT_SINGLE_BUFFER_SIZE = 16000;
|
||||
};
|
||||
|
||||
class WriteBandwidthBenchmark : public SingleBufferBandwidthBenchmark {
|
||||
public:
|
||||
WriteBandwidthBenchmark() : SingleBufferBandwidthBenchmark() { }
|
||||
virtual ~WriteBandwidthBenchmark() { }
|
||||
|
||||
bool verify() {
|
||||
memset(_buffer, 0, _size);
|
||||
bench(1);
|
||||
for (size_t i = 0; i < _size; i++) {
|
||||
if (_buffer[i] != 1) {
|
||||
printf("Buffer failed to compare after one loop.\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
memset(_buffer, 0, _size);
|
||||
bench(2);
|
||||
for (size_t i = 0; i < _size; i++) {
|
||||
if (_buffer[i] != 2) {
|
||||
printf("Buffer failed to compare after two loops.\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
class WriteStrdBenchmark : public WriteBandwidthBenchmark {
|
||||
public:
|
||||
WriteStrdBenchmark() : WriteBandwidthBenchmark() { }
|
||||
virtual ~WriteStrdBenchmark() {}
|
||||
|
||||
const char *getName() { return "strd"; }
|
||||
|
||||
protected:
|
||||
// Write a given value using strd.
|
||||
void bench(size_t num_loops) {
|
||||
asm volatile(
|
||||
"stmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
|
||||
|
||||
"mov r0, %0\n"
|
||||
"mov r1, %1\n"
|
||||
"mov r2, %2\n"
|
||||
|
||||
"mov r4, #0\n"
|
||||
"mov r5, #0\n"
|
||||
|
||||
"0:\n"
|
||||
"mov r3, r1, lsr #5\n"
|
||||
|
||||
"add r4, r4, #0x01010101\n"
|
||||
"mov r5, r4\n"
|
||||
|
||||
"1:\n"
|
||||
"subs r3, r3, #1\n"
|
||||
"strd r4, r5, [r0]\n"
|
||||
"strd r4, r5, [r0, #8]\n"
|
||||
"strd r4, r5, [r0, #16]\n"
|
||||
"strd r4, r5, [r0, #24]\n"
|
||||
"add r0, r0, #32\n"
|
||||
"bgt 1b\n"
|
||||
|
||||
"sub r0, r0, r1\n"
|
||||
"subs r2, r2, #1\n"
|
||||
"bgt 0b\n"
|
||||
|
||||
"ldmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
|
||||
:: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
|
||||
}
|
||||
};
|
||||
|
||||
class WriteStmiaBenchmark : public WriteBandwidthBenchmark {
|
||||
public:
|
||||
WriteStmiaBenchmark() : WriteBandwidthBenchmark() { }
|
||||
virtual ~WriteStmiaBenchmark() {}
|
||||
|
||||
const char *getName() { return "stmia"; }
|
||||
|
||||
protected:
|
||||
// Write a given value using stmia.
|
||||
void bench(size_t num_loops) {
|
||||
asm volatile(
|
||||
"stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
|
||||
|
||||
"mov r0, %0\n"
|
||||
"mov r1, %1\n"
|
||||
"mov r2, %2\n"
|
||||
|
||||
"mov r4, #0\n"
|
||||
|
||||
"0:\n"
|
||||
"mov r3, r1, lsr #5\n"
|
||||
|
||||
"add r4, r4, #0x01010101\n"
|
||||
"mov r5, r4\n"
|
||||
"mov r6, r4\n"
|
||||
"mov r7, r4\n"
|
||||
"mov r8, r4\n"
|
||||
"mov r9, r4\n"
|
||||
"mov r10, r4\n"
|
||||
"mov r11, r4\n"
|
||||
|
||||
"1:\n"
|
||||
"subs r3, r3, #1\n"
|
||||
"stmia r0!, {r4, r5, r6, r7, r8, r9, r10, r11}\n"
|
||||
"bgt 1b\n"
|
||||
|
||||
"sub r0, r0, r1\n"
|
||||
"subs r2, r2, #1\n"
|
||||
"bgt 0b\n"
|
||||
|
||||
"ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
|
||||
:: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
|
||||
}
|
||||
};
|
||||
|
||||
class WriteVst1Benchmark : public WriteBandwidthBenchmark {
|
||||
public:
|
||||
WriteVst1Benchmark() : WriteBandwidthBenchmark() { }
|
||||
virtual ~WriteVst1Benchmark() {}
|
||||
|
||||
const char *getName() { return "vst1"; }
|
||||
|
||||
bool usesNeon() { return true; }
|
||||
|
||||
protected:
|
||||
// Write a given value using vst.
|
||||
#if defined(__ARM_NEON__)
|
||||
void bench(size_t num_loops) {
|
||||
asm volatile(
|
||||
"stmfd sp!, {r0,r1,r2,r3,r4}\n"
|
||||
|
||||
"mov r0, %0\n"
|
||||
"mov r1, %1\n"
|
||||
"mov r2, %2\n"
|
||||
"mov r4, #0\n"
|
||||
|
||||
"0:\n"
|
||||
"mov r3, r1, lsr #5\n"
|
||||
|
||||
"add r4, r4, #1\n"
|
||||
"vdup.8 d0, r4\n"
|
||||
"vmov d1, d0\n"
|
||||
"vmov d2, d0\n"
|
||||
"vmov d3, d0\n"
|
||||
|
||||
"1:\n"
|
||||
"subs r3, r3, #1\n"
|
||||
"vst1.8 {d0-d3}, [r0:128]!\n"
|
||||
"bgt 1b\n"
|
||||
|
||||
"sub r0, r0, r1\n"
|
||||
"subs r2, r2, #1\n"
|
||||
"bgt 0b\n"
|
||||
|
||||
"ldmfd sp!, {r0,r1,r2,r3,r4}\n"
|
||||
:: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
|
||||
#else
|
||||
void bench(size_t) {
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
class WriteVstrBenchmark : public WriteBandwidthBenchmark {
|
||||
public:
|
||||
WriteVstrBenchmark() : WriteBandwidthBenchmark() { }
|
||||
virtual ~WriteVstrBenchmark() {}
|
||||
|
||||
const char *getName() { return "vstr"; }
|
||||
|
||||
bool usesNeon() { return true; }
|
||||
|
||||
protected:
|
||||
// Write a given value using vst.
|
||||
#if defined(__ARM_NEON__)
|
||||
void bench(size_t num_loops) {
|
||||
asm volatile(
|
||||
"stmfd sp!, {r0,r1,r2,r3,r4}\n"
|
||||
|
||||
"mov r0, %0\n"
|
||||
"mov r1, %1\n"
|
||||
"mov r2, %2\n"
|
||||
"mov r4, #0\n"
|
||||
|
||||
"0:\n"
|
||||
"mov r3, r1, lsr #5\n"
|
||||
|
||||
"add r4, r4, #1\n"
|
||||
"vdup.8 d0, r4\n"
|
||||
"vmov d1, d0\n"
|
||||
"vmov d2, d0\n"
|
||||
"vmov d3, d0\n"
|
||||
|
||||
"1:\n"
|
||||
"vstr d0, [r0, #0]\n"
|
||||
"subs r3, r3, #1\n"
|
||||
"vstr d1, [r0, #8]\n"
|
||||
"vstr d0, [r0, #16]\n"
|
||||
"vstr d1, [r0, #24]\n"
|
||||
"add r0, r0, #32\n"
|
||||
"bgt 1b\n"
|
||||
|
||||
"sub r0, r0, r1\n"
|
||||
"subs r2, r2, #1\n"
|
||||
"bgt 0b\n"
|
||||
|
||||
"ldmfd sp!, {r0,r1,r2,r3,r4}\n"
|
||||
:: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
|
||||
#else
|
||||
void bench(size_t) {
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
class WriteVstmiaBenchmark : public WriteBandwidthBenchmark {
|
||||
public:
|
||||
WriteVstmiaBenchmark() : WriteBandwidthBenchmark() { }
|
||||
virtual ~WriteVstmiaBenchmark() {}
|
||||
|
||||
const char *getName() { return "vstmia"; }
|
||||
|
||||
bool usesNeon() { return true; }
|
||||
|
||||
protected:
|
||||
// Write a given value using vstmia.
|
||||
#if defined(__ARM_NEON__)
|
||||
void bench(size_t num_loops) {
|
||||
asm volatile(
|
||||
"stmfd sp!, {r0,r1,r2,r3,r4}\n"
|
||||
|
||||
"mov r0, %0\n"
|
||||
"mov r1, %1\n"
|
||||
"mov r2, %2\n"
|
||||
"mov r4, #0\n"
|
||||
|
||||
"0:\n"
|
||||
"mov r3, r1, lsr #5\n"
|
||||
|
||||
"add r4, r4, #1\n"
|
||||
"vdup.8 d0, r4\n"
|
||||
"vmov d1, d0\n"
|
||||
"vmov d2, d0\n"
|
||||
"vmov d3, d0\n"
|
||||
|
||||
"1:\n"
|
||||
"subs r3, r3, #1\n"
|
||||
"vstmia r0!, {d0-d3}\n"
|
||||
"bgt 1b\n"
|
||||
|
||||
"sub r0, r0, r1\n"
|
||||
"subs r2, r2, #1\n"
|
||||
"bgt 0b\n"
|
||||
|
||||
"ldmfd sp!, {r0,r1,r2,r3,r4}\n"
|
||||
:: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
|
||||
#else
|
||||
void bench(size_t) {
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
class MemsetBenchmark : public WriteBandwidthBenchmark {
|
||||
public:
|
||||
MemsetBenchmark() : WriteBandwidthBenchmark() { }
|
||||
virtual ~MemsetBenchmark() {}
|
||||
|
||||
const char *getName() { return "memset"; }
|
||||
|
||||
protected:
|
||||
void bench(size_t num_loops) {
|
||||
for (size_t i = 0; i < num_loops; i++) {
|
||||
memset(_buffer, (i % 255) + 1, _size);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class ReadLdrdBenchmark : public SingleBufferBandwidthBenchmark {
|
||||
public:
|
||||
ReadLdrdBenchmark() : SingleBufferBandwidthBenchmark() { }
|
||||
virtual ~ReadLdrdBenchmark() {}
|
||||
|
||||
const char *getName() { return "ldrd"; }
|
||||
|
||||
protected:
|
||||
// Write a given value using strd.
|
||||
void bench(size_t num_loops) {
|
||||
asm volatile(
|
||||
"stmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
|
||||
|
||||
"mov r0, %0\n"
|
||||
"mov r1, %1\n"
|
||||
"mov r2, %2\n"
|
||||
|
||||
"0:\n"
|
||||
"mov r3, r1, lsr #5\n"
|
||||
|
||||
"1:\n"
|
||||
"subs r3, r3, #1\n"
|
||||
"ldrd r4, r5, [r0]\n"
|
||||
"ldrd r4, r5, [r0, #8]\n"
|
||||
"ldrd r4, r5, [r0, #16]\n"
|
||||
"ldrd r4, r5, [r0, #24]\n"
|
||||
"add r0, r0, #32\n"
|
||||
"bgt 1b\n"
|
||||
|
||||
"sub r0, r0, r1\n"
|
||||
"subs r2, r2, #1\n"
|
||||
"bgt 0b\n"
|
||||
|
||||
"ldmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
|
||||
:: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
|
||||
}
|
||||
};
|
||||
|
||||
class ReadLdmiaBenchmark : public SingleBufferBandwidthBenchmark {
|
||||
public:
|
||||
ReadLdmiaBenchmark() : SingleBufferBandwidthBenchmark() { }
|
||||
virtual ~ReadLdmiaBenchmark() {}
|
||||
|
||||
const char *getName() { return "ldmia"; }
|
||||
|
||||
protected:
|
||||
// Write a given value using stmia.
|
||||
void bench(size_t num_loops) {
|
||||
asm volatile(
|
||||
"stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
|
||||
|
||||
"mov r0, %0\n"
|
||||
"mov r1, %1\n"
|
||||
"mov r2, %2\n"
|
||||
|
||||
"0:\n"
|
||||
"mov r3, r1, lsr #5\n"
|
||||
|
||||
"1:\n"
|
||||
"subs r3, r3, #1\n"
|
||||
"ldmia r0!, {r4, r5, r6, r7, r8, r9, r10, r11}\n"
|
||||
"bgt 1b\n"
|
||||
|
||||
"sub r0, r0, r1\n"
|
||||
"subs r2, r2, #1\n"
|
||||
"bgt 0b\n"
|
||||
|
||||
"ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
|
||||
:: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
|
||||
}
|
||||
};
|
||||
|
||||
class ReadVld1Benchmark : public SingleBufferBandwidthBenchmark {
|
||||
public:
|
||||
ReadVld1Benchmark() : SingleBufferBandwidthBenchmark() { }
|
||||
virtual ~ReadVld1Benchmark() {}
|
||||
|
||||
const char *getName() { return "vld1"; }
|
||||
|
||||
bool usesNeon() { return true; }
|
||||
|
||||
protected:
|
||||
// Write a given value using vst.
|
||||
#if defined(__ARM_NEON__)
|
||||
void bench(size_t num_loops) {
|
||||
asm volatile(
|
||||
"stmfd sp!, {r0,r1,r2,r3}\n"
|
||||
|
||||
"mov r0, %0\n"
|
||||
"mov r1, %1\n"
|
||||
"mov r2, %2\n"
|
||||
|
||||
"0:\n"
|
||||
"mov r3, r1, lsr #5\n"
|
||||
|
||||
"1:\n"
|
||||
"subs r3, r3, #1\n"
|
||||
"vld1.8 {d0-d3}, [r0:128]!\n"
|
||||
"bgt 1b\n"
|
||||
|
||||
"sub r0, r0, r1\n"
|
||||
"subs r2, r2, #1\n"
|
||||
"bgt 0b\n"
|
||||
|
||||
"ldmfd sp!, {r0,r1,r2,r3}\n"
|
||||
:: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
|
||||
#else
|
||||
void bench(size_t) {
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
class ReadVldrBenchmark : public SingleBufferBandwidthBenchmark {
|
||||
public:
|
||||
ReadVldrBenchmark() : SingleBufferBandwidthBenchmark() { }
|
||||
virtual ~ReadVldrBenchmark() {}
|
||||
|
||||
const char *getName() { return "vldr"; }
|
||||
|
||||
bool usesNeon() { return true; }
|
||||
|
||||
protected:
|
||||
// Write a given value using vst.
|
||||
#if defined(__ARM_NEON__)
|
||||
void bench(size_t num_loops) {
|
||||
asm volatile(
|
||||
"stmfd sp!, {r0,r1,r2,r3}\n"
|
||||
|
||||
"mov r0, %0\n"
|
||||
"mov r1, %1\n"
|
||||
"mov r2, %2\n"
|
||||
|
||||
"0:\n"
|
||||
"mov r3, r1, lsr #5\n"
|
||||
|
||||
"1:\n"
|
||||
"vldr d0, [r0, #0]\n"
|
||||
"subs r3, r3, #1\n"
|
||||
"vldr d1, [r0, #8]\n"
|
||||
"vldr d0, [r0, #16]\n"
|
||||
"vldr d1, [r0, #24]\n"
|
||||
"add r0, r0, #32\n"
|
||||
"bgt 1b\n"
|
||||
|
||||
"sub r0, r0, r1\n"
|
||||
"subs r2, r2, #1\n"
|
||||
"bgt 0b\n"
|
||||
|
||||
"ldmfd sp!, {r0,r1,r2,r3}\n"
|
||||
:: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
|
||||
#else
|
||||
void bench(size_t) {
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
class ReadVldmiaBenchmark : public SingleBufferBandwidthBenchmark {
|
||||
public:
|
||||
ReadVldmiaBenchmark() : SingleBufferBandwidthBenchmark() { }
|
||||
virtual ~ReadVldmiaBenchmark() {}
|
||||
|
||||
const char *getName() { return "vldmia"; }
|
||||
|
||||
bool usesNeon() { return true; }
|
||||
|
||||
protected:
|
||||
// Write a given value using vstmia.
|
||||
#if defined(__ARM_NEON__)
|
||||
void bench(size_t num_loops) {
|
||||
asm volatile(
|
||||
"stmfd sp!, {r0,r1,r2,r3}\n"
|
||||
|
||||
"mov r0, %0\n"
|
||||
"mov r1, %1\n"
|
||||
"mov r2, %2\n"
|
||||
|
||||
"0:\n"
|
||||
"mov r3, r1, lsr #5\n"
|
||||
|
||||
"1:\n"
|
||||
"subs r3, r3, #1\n"
|
||||
"vldmia r0!, {d0-d3}\n"
|
||||
"bgt 1b\n"
|
||||
|
||||
"sub r0, r0, r1\n"
|
||||
"subs r2, r2, #1\n"
|
||||
"bgt 0b\n"
|
||||
|
||||
"ldmfd sp!, {r0,r1,r2,r3}\n"
|
||||
:: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
|
||||
#else
|
||||
void bench(size_t) {
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
#endif // __BANDWIDTH_H__
|
Loading…
Add table
Add a link
Reference in a new issue