6fd92b63d0
The versions with inline assembly are in fact slower on the machines I tested them on (in userspace) (Athlon XP 2800+, p4-like Xeon 2.8GHz, AMD Opteron 270). The i386-version needed a fix similar to06024f21
to avoid crashing the benchmark. Benchmark using: gcc -fomit-frame-pointer -Os. For each bitmap size 1...512, for each possible bitmap with one bit set, for each possible offset: find the position of the first bit starting at offset. If you follow ;). Times include setup of the bitmap and checking of the results. Athlon Xeon Opteron 32/64bit x86-specific: 0m3.692s 0m2.820s 0m3.196s / 0m2.480s generic: 0m2.622s 0m1.662s 0m2.100s / 0m1.572s If the bitmap size is not a multiple of BITS_PER_LONG, and no set (cleared) bit is found, find_next_bit (find_next_zero_bit) returns a value outside of the range [0, size]. The generic version always returns exactly size. The generic version also uses unsigned long everywhere, while the x86 versions use a mishmash of int, unsigned (int), long and unsigned long. Using the generic version does give a slightly bigger kernel, though. defconfig: text data bss dec hex filename x86-specific:4738555
481232 626688 5846475 5935cb vmlinux (32 bit) generic: 4738621 481232 626688 5846541 59360d vmlinux (32 bit) x86-specific: 5392395 846568 724424 6963387 6a40bb vmlinux (64 bit) generic: 5392458 846568 724424 6963450 6a40fa vmlinux (64 bit) Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm> Signed-off-by: Ingo Molnar <mingo@elte.hu>
227 lines
5.3 KiB
C
227 lines
5.3 KiB
C
/* find_next_bit.c: fallback find next bit implementation
|
|
*
|
|
* Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
|
|
* Written by David Howells (dhowells@redhat.com)
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version
|
|
* 2 of the License, or (at your option) any later version.
|
|
*/
|
|
|
|
#include <linux/bitops.h>
|
|
#include <linux/module.h>
|
|
#include <asm/types.h>
|
|
#include <asm/byteorder.h>
|
|
|
|
#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG)
|
|
#undef find_next_bit
|
|
#undef find_next_zero_bit
|
|
|
|
/**
|
|
* find_next_bit - find the next set bit in a memory region
|
|
* @addr: The address to base the search on
|
|
* @offset: The bitnumber to start searching at
|
|
* @size: The maximum size to search
|
|
*/
|
|
unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
|
|
unsigned long offset)
|
|
{
|
|
const unsigned long *p = addr + BITOP_WORD(offset);
|
|
unsigned long result = offset & ~(BITS_PER_LONG-1);
|
|
unsigned long tmp;
|
|
|
|
if (offset >= size)
|
|
return size;
|
|
size -= result;
|
|
offset %= BITS_PER_LONG;
|
|
if (offset) {
|
|
tmp = *(p++);
|
|
tmp &= (~0UL << offset);
|
|
if (size < BITS_PER_LONG)
|
|
goto found_first;
|
|
if (tmp)
|
|
goto found_middle;
|
|
size -= BITS_PER_LONG;
|
|
result += BITS_PER_LONG;
|
|
}
|
|
while (size & ~(BITS_PER_LONG-1)) {
|
|
if ((tmp = *(p++)))
|
|
goto found_middle;
|
|
result += BITS_PER_LONG;
|
|
size -= BITS_PER_LONG;
|
|
}
|
|
if (!size)
|
|
return result;
|
|
tmp = *p;
|
|
|
|
found_first:
|
|
tmp &= (~0UL >> (BITS_PER_LONG - size));
|
|
if (tmp == 0UL) /* Are any bits set? */
|
|
return result + size; /* Nope. */
|
|
found_middle:
|
|
return result + __ffs(tmp);
|
|
}
|
|
|
|
EXPORT_SYMBOL(find_next_bit);
|
|
|
|
/*
|
|
* This implementation of find_{first,next}_zero_bit was stolen from
|
|
* Linus' asm-alpha/bitops.h.
|
|
*/
|
|
unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
|
|
unsigned long offset)
|
|
{
|
|
const unsigned long *p = addr + BITOP_WORD(offset);
|
|
unsigned long result = offset & ~(BITS_PER_LONG-1);
|
|
unsigned long tmp;
|
|
|
|
if (offset >= size)
|
|
return size;
|
|
size -= result;
|
|
offset %= BITS_PER_LONG;
|
|
if (offset) {
|
|
tmp = *(p++);
|
|
tmp |= ~0UL >> (BITS_PER_LONG - offset);
|
|
if (size < BITS_PER_LONG)
|
|
goto found_first;
|
|
if (~tmp)
|
|
goto found_middle;
|
|
size -= BITS_PER_LONG;
|
|
result += BITS_PER_LONG;
|
|
}
|
|
while (size & ~(BITS_PER_LONG-1)) {
|
|
if (~(tmp = *(p++)))
|
|
goto found_middle;
|
|
result += BITS_PER_LONG;
|
|
size -= BITS_PER_LONG;
|
|
}
|
|
if (!size)
|
|
return result;
|
|
tmp = *p;
|
|
|
|
found_first:
|
|
tmp |= ~0UL << size;
|
|
if (tmp == ~0UL) /* Are any bits zero? */
|
|
return result + size; /* Nope. */
|
|
found_middle:
|
|
return result + ffz(tmp);
|
|
}
|
|
|
|
EXPORT_SYMBOL(find_next_zero_bit);
|
|
|
|
#ifdef __BIG_ENDIAN
|
|
|
|
/* include/linux/byteorder does not support "unsigned long" type */
|
|
static inline unsigned long ext2_swabp(const unsigned long * x)
|
|
{
|
|
#if BITS_PER_LONG == 64
|
|
return (unsigned long) __swab64p((u64 *) x);
|
|
#elif BITS_PER_LONG == 32
|
|
return (unsigned long) __swab32p((u32 *) x);
|
|
#else
|
|
#error BITS_PER_LONG not defined
|
|
#endif
|
|
}
|
|
|
|
/* include/linux/byteorder doesn't support "unsigned long" type */
|
|
static inline unsigned long ext2_swab(const unsigned long y)
|
|
{
|
|
#if BITS_PER_LONG == 64
|
|
return (unsigned long) __swab64((u64) y);
|
|
#elif BITS_PER_LONG == 32
|
|
return (unsigned long) __swab32((u32) y);
|
|
#else
|
|
#error BITS_PER_LONG not defined
|
|
#endif
|
|
}
|
|
|
|
unsigned long generic_find_next_zero_le_bit(const unsigned long *addr, unsigned
|
|
long size, unsigned long offset)
|
|
{
|
|
const unsigned long *p = addr + BITOP_WORD(offset);
|
|
unsigned long result = offset & ~(BITS_PER_LONG - 1);
|
|
unsigned long tmp;
|
|
|
|
if (offset >= size)
|
|
return size;
|
|
size -= result;
|
|
offset &= (BITS_PER_LONG - 1UL);
|
|
if (offset) {
|
|
tmp = ext2_swabp(p++);
|
|
tmp |= (~0UL >> (BITS_PER_LONG - offset));
|
|
if (size < BITS_PER_LONG)
|
|
goto found_first;
|
|
if (~tmp)
|
|
goto found_middle;
|
|
size -= BITS_PER_LONG;
|
|
result += BITS_PER_LONG;
|
|
}
|
|
|
|
while (size & ~(BITS_PER_LONG - 1)) {
|
|
if (~(tmp = *(p++)))
|
|
goto found_middle_swap;
|
|
result += BITS_PER_LONG;
|
|
size -= BITS_PER_LONG;
|
|
}
|
|
if (!size)
|
|
return result;
|
|
tmp = ext2_swabp(p);
|
|
found_first:
|
|
tmp |= ~0UL << size;
|
|
if (tmp == ~0UL) /* Are any bits zero? */
|
|
return result + size; /* Nope. Skip ffz */
|
|
found_middle:
|
|
return result + ffz(tmp);
|
|
|
|
found_middle_swap:
|
|
return result + ffz(ext2_swab(tmp));
|
|
}
|
|
|
|
EXPORT_SYMBOL(generic_find_next_zero_le_bit);
|
|
|
|
unsigned long generic_find_next_le_bit(const unsigned long *addr, unsigned
|
|
long size, unsigned long offset)
|
|
{
|
|
const unsigned long *p = addr + BITOP_WORD(offset);
|
|
unsigned long result = offset & ~(BITS_PER_LONG - 1);
|
|
unsigned long tmp;
|
|
|
|
if (offset >= size)
|
|
return size;
|
|
size -= result;
|
|
offset &= (BITS_PER_LONG - 1UL);
|
|
if (offset) {
|
|
tmp = ext2_swabp(p++);
|
|
tmp &= (~0UL << offset);
|
|
if (size < BITS_PER_LONG)
|
|
goto found_first;
|
|
if (tmp)
|
|
goto found_middle;
|
|
size -= BITS_PER_LONG;
|
|
result += BITS_PER_LONG;
|
|
}
|
|
|
|
while (size & ~(BITS_PER_LONG - 1)) {
|
|
tmp = *(p++);
|
|
if (tmp)
|
|
goto found_middle_swap;
|
|
result += BITS_PER_LONG;
|
|
size -= BITS_PER_LONG;
|
|
}
|
|
if (!size)
|
|
return result;
|
|
tmp = ext2_swabp(p);
|
|
found_first:
|
|
tmp &= (~0UL >> (BITS_PER_LONG - size));
|
|
if (tmp == 0UL) /* Are any bits set? */
|
|
return result + size; /* Nope. */
|
|
found_middle:
|
|
return result + __ffs(tmp);
|
|
|
|
found_middle_swap:
|
|
return result + __ffs(ext2_swab(tmp));
|
|
}
|
|
EXPORT_SYMBOL(generic_find_next_le_bit);
|
|
#endif /* __BIG_ENDIAN */
|