Weee, множество решений, а не контрольный показатель. Вы, должно быть, вам стыдитесь, -)
Моя машина - Intel i530 (2,9 ГГц), работающая под управлением Windows 7 64-бит. Я скомпилирован с 32-разрядной версией MinGW.
$ gcc --version
gcc.exe (GCC) 4.7.2
$ gcc bench.c -o bench.exe -std=c99 -Wall -O2
$ bench
Naive loop. Time = 2.91 (Original questioner)
De Bruijn multiply. Time = 1.16 (Tykhyy)
Lookup table. Time = 0.36 (Andrew Grant)
FFS instruction. Time = 0.90 (ephemient)
Branch free mask. Time = 3.48 (Dan / Jim Balter)
Double hack. Time = 3.41 (DocMax)
$ gcc bench.c -o bench.exe -std=c99 -Wall -O2 -march=native
$ bench
Naive loop. Time = 2.92
De Bruijn multiply. Time = 0.47
Lookup table. Time = 0.35
FFS instruction. Time = 0.68
Branch free mask. Time = 3.49
Double hack. Time = 0.92
Мой код:
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define ARRAY_SIZE 65536
#define NUM_ITERS 5000 // Number of times to process array
int find_first_bits_naive_loop(unsigned nums[ARRAY_SIZE])
{
int total = 0; // Prevent compiler from optimizing out the code
for (int j = 0; j < NUM_ITERS; j++) {
for (int i = 0; i < ARRAY_SIZE; i++) {
unsigned value = nums[i];
if (value == 0)
continue;
unsigned pos = 0;
while (!(value & 1))
{
value >>= 1;
++pos;
}
total += pos + 1;
}
}
return total;
}
int find_first_bits_de_bruijn(unsigned nums[ARRAY_SIZE])
{
static const int MultiplyDeBruijnBitPosition[32] =
{
1, 2, 29, 3, 30, 15, 25, 4, 31, 23, 21, 16, 26, 18, 5, 9,
32, 28, 14, 24, 22, 20, 17, 8, 27, 13, 19, 7, 12, 6, 11, 10
};
int total = 0; // Prevent compiler from optimizing out the code
for (int j = 0; j < NUM_ITERS; j++) {
for (int i = 0; i < ARRAY_SIZE; i++) {
unsigned int c = nums[i];
total += MultiplyDeBruijnBitPosition[((unsigned)((c & -c) * 0x077CB531U)) >> 27];
}
}
return total;
}
unsigned char lowestBitTable[256];
int get_lowest_set_bit(unsigned num) {
unsigned mask = 1;
for (int cnt = 1; cnt <= 32; cnt++, mask <<= 1) {
if (num & mask) {
return cnt;
}
}
return 0;
}
int find_first_bits_lookup_table(unsigned nums[ARRAY_SIZE])
{
int total = 0; // Prevent compiler from optimizing out the code
for (int j = 0; j < NUM_ITERS; j++) {
for (int i = 0; i < ARRAY_SIZE; i++) {
unsigned int value = nums[i];
// note that order to check indices will depend whether you are on a big
// or little endian machine. This is for little-endian
unsigned char *bytes = (unsigned char *)&value;
if (bytes[0])
total += lowestBitTable[bytes[0]];
else if (bytes[1])
total += lowestBitTable[bytes[1]] + 8;
else if (bytes[2])
total += lowestBitTable[bytes[2]] + 16;
else
total += lowestBitTable[bytes[3]] + 24;
}
}
return total;
}
int find_first_bits_ffs_instruction(unsigned nums[ARRAY_SIZE])
{
int total = 0; // Prevent compiler from optimizing out the code
for (int j = 0; j < NUM_ITERS; j++) {
for (int i = 0; i < ARRAY_SIZE; i++) {
total += __builtin_ffs(nums[i]);
}
}
return total;
}
int find_first_bits_branch_free_mask(unsigned nums[ARRAY_SIZE])
{
int total = 0; // Prevent compiler from optimizing out the code
for (int j = 0; j < NUM_ITERS; j++) {
for (int i = 0; i < ARRAY_SIZE; i++) {
unsigned value = nums[i];
int i16 = !(value & 0xffff) << 4;
value >>= i16;
int i8 = !(value & 0xff) << 3;
value >>= i8;
int i4 = !(value & 0xf) << 2;
value >>= i4;
int i2 = !(value & 0x3) << 1;
value >>= i2;
int i1 = !(value & 0x1);
int i0 = (value >> i1) & 1? 0 : -32;
total += i16 + i8 + i4 + i2 + i1 + i0 + 1;
}
}
return total;
}
int find_first_bits_double_hack(unsigned nums[ARRAY_SIZE])
{
int total = 0; // Prevent compiler from optimizing out the code
for (int j = 0; j < NUM_ITERS; j++) {
for (int i = 0; i < ARRAY_SIZE; i++) {
unsigned value = nums[i];
double d = value ^ (value - !!value);
total += (((int*)&d)[1]>>20)-1022;
}
}
return total;
}
int main() {
unsigned nums[ARRAY_SIZE];
for (int i = 0; i < ARRAY_SIZE; i++) {
nums[i] = rand() + (rand() << 15);
}
for (int i = 0; i < 256; i++) {
lowestBitTable[i] = get_lowest_set_bit(i);
}
clock_t start_time, end_time;
int result;
start_time = clock();
result = find_first_bits_naive_loop(nums);
end_time = clock();
printf("Naive loop. Time = %.2f, result = %d\n",
(end_time - start_time) / (double)(CLOCKS_PER_SEC), result);
start_time = clock();
result = find_first_bits_de_bruijn(nums);
end_time = clock();
printf("De Bruijn multiply. Time = %.2f, result = %d\n",
(end_time - start_time) / (double)(CLOCKS_PER_SEC), result);
start_time = clock();
result = find_first_bits_lookup_table(nums);
end_time = clock();
printf("Lookup table. Time = %.2f, result = %d\n",
(end_time - start_time) / (double)(CLOCKS_PER_SEC), result);
start_time = clock();
result = find_first_bits_ffs_instruction(nums);
end_time = clock();
printf("FFS instruction. Time = %.2f, result = %d\n",
(end_time - start_time) / (double)(CLOCKS_PER_SEC), result);
start_time = clock();
result = find_first_bits_branch_free_mask(nums);
end_time = clock();
printf("Branch free mask. Time = %.2f, result = %d\n",
(end_time - start_time) / (double)(CLOCKS_PER_SEC), result);
start_time = clock();
result = find_first_bits_double_hack(nums);
end_time = clock();
printf("Double hack. Time = %.2f, result = %d\n",
(end_time - start_time) / (double)(CLOCKS_PER_SEC), result);
}
Более быстрое решение:
from timeit import Timer import os import binascii def testSpeed (statement, setup = 'pass'): print '% s'% statement print '% s'% Timer (statement, setup) .timeit () setup = "" "import os value = os.urandom (32)" "" # winner statement = "" "import binascii binascii.hexlify (value)" "" testSpeed (statement, setup) # loser statement = "" "import binascii value.encode ('hex_codec')" "" testSpeed (инструкция, настройка)
Результаты:
import binascii binascii.hexlify (значение) 2.18547999816 value.encode ('hex_codec') 2.91231595077
Вы можете использовать ord и hex следующим образом:
& gt; & gt; & gt; & gt; s = 'some string' & gt; & gt; & gt; & gt; & gt; hex_chars = map (hex, map (ord, s)) & gt; & gt; & gt; & gt; & gt; print hex_chars ['0x73', '0x6f', '0x6d', '0x65', '0x20', '0x73', '0x74', '0x72', '0x69', '0x6e', '0x67'] & gt; gt ; & GT; hex_string = "" .join (c [2: 4] для c в hex_chars) & gt; gt; & gt; gt; print hex_string 736f6d6520737472696e67 & gt; & gt; & gt; & gt; & gt; & gt; & gt; & gt; & gt; & gt; & gt;
Или используйте встроенную кодировку:
& gt; & gt; & gt; & gt; s = 'some string' & gt; & gt; & gt; & gt; & gt; print s.encode ('hex_codec') 736f6d6520737472696e67 & gt; & gt; & gt; & gt; & gt;
hex
и ord
не работает надежно. Используйте «% 2.2x» .__ mod__ вместо hex, и вы также можете избежать c [2: 4]
. В результате это будет выглядеть так: ".join (map ("% 2.2x ".__ mod__, map (ord, s)))
. Версия кодирования, конечно, лучше. :-)
– Helmut Grohne
21 February 2011 в 14:19
& GT; & GT; & GT; import binascii & gt; & gt; & gt; & gt; s = '2F' & gt; & gt; & gt; & gt; hex_str = binascii.b2a_hex (s) & gt; & gt; & gt; & gt; & gt; hex_str & gt; & gt; & gt; & gt; '3246'
ИЛИ
& gt; gt; import binascii & gt; & gt; & gt; & gt; & gt; hex_str = binascii.hexlify (s) & gt; & gt; & gt; & gt; hex_str & gt; & gt; & gt; & gt; '3246' & gt; & gt; & gt; & gt;