diff options
author | Bob Peterson <rpeterso@redhat.com> | 2008-03-11 02:17:47 +0300 |
---|---|---|
committer | Steven Whitehouse <swhiteho@redhat.com> | 2008-03-31 13:41:39 +0400 |
commit | 1f466a47e8a3a3e3b527b3285c7b9c8a837fb7ec (patch) | |
tree | 355f6084118d4ee9a986e07e5154eaa0e25b834a /fs/gfs2/rgrp.c | |
parent | d82661d96993ac4efc1d54259ea85ffcd9b8bec6 (diff) | |
download | linux-1f466a47e8a3a3e3b527b3285c7b9c8a837fb7ec.tar.xz |
[GFS2] Faster gfs2_bitfit algorithm
This version of the gfs2_bitfit algorithm includes the latest
suggestions from Steve Whitehouse. It is typically eight to
ten times faster than the version we're using today. If there
is a lot of metadata mixed in (lots of small files) the
algorithm is often 15 times faster, and given the right
conditions, I've seen peaks of 20 times faster.
Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Diffstat (limited to 'fs/gfs2/rgrp.c')
-rw-r--r-- | fs/gfs2/rgrp.c | 93 |
1 files changed, 61 insertions, 32 deletions
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 4291375cecc6..7e8f0b1d6c6e 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -14,6 +14,7 @@ #include <linux/fs.h> #include <linux/gfs2_ondisk.h> #include <linux/lm_interface.h> +#include <linux/prefetch.h> #include "gfs2.h" #include "incore.h" @@ -33,6 +34,16 @@ #define BFITNOENT ((u32)~0) #define NO_BLOCK ((u64)~0) +#if BITS_PER_LONG == 32 +#define LBITMASK (0x55555555UL) +#define LBITSKIP55 (0x55555555UL) +#define LBITSKIP00 (0x00000000UL) +#else +#define LBITMASK (0x5555555555555555UL) +#define LBITSKIP55 (0x5555555555555555UL) +#define LBITSKIP00 (0x0000000000000000UL) +#endif + /* * These routines are used by the resource group routines (rgrp.c) * to keep track of block allocation. Each block is represented by two @@ -138,45 +149,63 @@ static inline unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd, static u32 gfs2_bitfit(const u8 *buffer, unsigned int buflen, u32 goal, u8 old_state) { - const u8 *byte; - u32 blk = goal; - unsigned int bit, bitlong; - const unsigned long *plong; -#if BITS_PER_LONG == 32 - const unsigned long plong55 = 0x55555555; -#else - const unsigned long plong55 = 0x5555555555555555; -#endif - - byte = buffer + (goal / GFS2_NBBY); - plong = (const unsigned long *)(buffer + (goal / GFS2_NBBY)); - bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE; - bitlong = bit; - - while (byte < buffer + buflen) { - - if (bitlong == 0 && old_state == 0 && *plong == plong55) { - plong++; - byte += sizeof(unsigned long); - blk += sizeof(unsigned long) * GFS2_NBBY; - continue; + const u8 *byte, *start, *end; + int bit, startbit; + u32 g1, g2, misaligned; + unsigned long *plong; + unsigned long lskipval; + + lskipval = (old_state & GFS2_BLKST_USED) ? LBITSKIP00 : LBITSKIP55; + g1 = (goal / GFS2_NBBY); + start = buffer + g1; + byte = start; + end = buffer + buflen; + g2 = ALIGN(g1, sizeof(unsigned long)); + plong = (unsigned long *)(buffer + g2); + startbit = bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE; + misaligned = g2 - g1; + if (!misaligned) + goto ulong_aligned; +/* parse the bitmap a byte at a time */ +misaligned: + while (byte < end) { + if (((*byte >> bit) & GFS2_BIT_MASK) == old_state) { + return goal + + (((byte - start) * GFS2_NBBY) + + ((bit - startbit) >> 1)); } - if (((*byte >> bit) & GFS2_BIT_MASK) == old_state) - return blk; bit += GFS2_BIT_SIZE; - if (bit >= 8) { + if (bit >= GFS2_NBBY * GFS2_BIT_SIZE) { bit = 0; byte++; + misaligned--; + if (!misaligned) { + plong = (unsigned long *)byte; + goto ulong_aligned; + } } - bitlong += GFS2_BIT_SIZE; - if (bitlong >= sizeof(unsigned long) * 8) { - bitlong = 0; - plong++; - } - - blk++; } + return BFITNOENT; +/* parse the bitmap a unsigned long at a time */ +ulong_aligned: + /* Stop at "end - 1" or else prefetch can go past the end and segfault. + We could "if" it but we'd lose some of the performance gained. + This way will only slow down searching the very last 4/8 bytes + depending on architecture. I've experimented with several ways + of writing this section such as using an else before the goto + but this one seems to be the fastest. */ + while ((unsigned char *)plong < end - 1) { + prefetch(plong + 1); + if (((*plong) & LBITMASK) != lskipval) + break; + plong++; + } + if ((unsigned char *)plong < end) { + byte = (const u8 *)plong; + misaligned += sizeof(unsigned long) - 1; + goto misaligned; + } return BFITNOENT; } |