[deliverable/linux.git] / arch / powerpc / include / asm / word-at-a-time.h

#ifndef _ASM_WORD_AT_A_TIME_H
#define _ASM_WORD_AT_A_TIME_H

/*
 * Word-at-a-time interfaces for PowerPC.
 */

#include <linux/kernel.h>
#include <asm/asm-compat.h>

#ifdef __BIG_ENDIAN__

struct word_at_a_time {
	const unsigned long high_bits, low_bits;
};

#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0xfe) + 1, REPEAT_BYTE(0x7f) }

/* Bit set in the bytes that have a zero */
static inline long prep_zero_mask(unsigned long val, unsigned long rhs, const struct word_at_a_time *c)
{
	unsigned long mask = (val & c->low_bits) + c->low_bits;
	return ~(mask | rhs);
}

#define create_zero_mask(mask) (mask)

static inline long find_zero(unsigned long mask)
{
	long leading_zero_bits;

	asm (PPC_CNTLZL "%0,%1" : "=r" (leading_zero_bits) : "r" (mask));
	return leading_zero_bits >> 3;
}

static inline bool has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c)
{
	unsigned long rhs = val | c->low_bits;
	*data = rhs;
	return (val + c->high_bits) & ~rhs;
}

#else

struct word_at_a_time {
	const unsigned long one_bits, high_bits;
};

#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) }

#ifdef CONFIG_64BIT

/* Alan Modra's little-endian strlen tail for 64-bit */
#define create_zero_mask(mask) (mask)

static inline unsigned long find_zero(unsigned long mask)
{
	unsigned long leading_zero_bits;
	long trailing_zero_bit_mask;

	asm ("addi %1,%2,-1\n\t"
	     "andc %1,%1,%2\n\t"
	     "popcntd %0,%1"
	     : "=r" (leading_zero_bits), "=&r" (trailing_zero_bit_mask)
	     : "r" (mask));
	return leading_zero_bits >> 3;
}

#else	/* 32-bit case */

/*
 * This is largely generic for little-endian machines, but the
 * optimal byte mask counting is probably going to be something
 * that is architecture-specific. If you have a reliably fast
 * bit count instruction, that might be better than the multiply
 * and shift, for example.
 */

/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
static inline long count_masked_bytes(long mask)
{
	/* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
	long a = (0x0ff0001+mask) >> 23;
	/* Fix the 1 for 00 case */
	return a & mask;
}

static inline unsigned long create_zero_mask(unsigned long bits)
{
	bits = (bits - 1) & ~bits;
	return bits >> 7;
}

static inline unsigned long find_zero(unsigned long mask)
{
	return count_masked_bytes(mask);
}

#endif

/* Return nonzero if it has a zero */
static inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c)
{
	unsigned long mask = ((a - c->one_bits) & ~a) & c->high_bits;
	*bits = mask;
	return mask;
}

static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits, const struct word_at_a_time *c)
{
	return bits;
}

/* The mask we created is directly usable as a bytemask */
#define zero_bytemask(mask) (mask)

#endif

/*
 * We use load_unaligned_zero() in a selftest, which builds a userspace
 * program. Some linker scripts seem to discard the .fixup section, so allow
 * the test code to use a different section name.
 */
#ifndef FIXUP_SECTION
#define FIXUP_SECTION ".fixup"
#endif

static inline unsigned long load_unaligned_zeropad(const void *addr)
{
	unsigned long ret, offset, tmp;

	asm(
	"1:	" PPC_LL "%[ret], 0(%[addr])\n"
	"2:\n"
	".section " FIXUP_SECTION ",\"ax\"\n"
	"3:	"
#ifdef __powerpc64__
	"clrrdi		%[tmp], %[addr], 3\n\t"
	"clrlsldi	%[offset], %[addr], 61, 3\n\t"
	"ld		%[ret], 0(%[tmp])\n\t"
#ifdef __BIG_ENDIAN__
	"sld		%[ret], %[ret], %[offset]\n\t"
#else
	"srd		%[ret], %[ret], %[offset]\n\t"
#endif
#else
	"clrrwi		%[tmp], %[addr], 2\n\t"
	"clrlslwi	%[offset], %[addr], 30, 3\n\t"
	"lwz		%[ret], 0(%[tmp])\n\t"
#ifdef __BIG_ENDIAN__
	"slw		%[ret], %[ret], %[offset]\n\t"
#else
	"srw		%[ret], %[ret], %[offset]\n\t"
#endif
#endif
	"b	2b\n"
	".previous\n"
	".section __ex_table,\"a\"\n\t"
		PPC_LONG_ALIGN "\n\t"
		PPC_LONG "1b,3b\n"
	".previous"
	: [tmp] "=&b" (tmp), [offset] "=&r" (offset), [ret] "=&r" (ret)
	: [addr] "b" (addr), "m" (*(unsigned long *)addr));

	return ret;
}

#undef FIXUP_SECTION

#endif /* _ASM_WORD_AT_A_TIME_H */
Commit	Line	Data
1629372c PM	1	#ifndef _ASM_WORD_AT_A_TIME_H
	2	#define _ASM_WORD_AT_A_TIME_H
	3
	4	/*
	5	* Word-at-a-time interfaces for PowerPC.
	6	*/
	7
	8	#include <linux/kernel.h>
	9	#include <asm/asm-compat.h>
	10
4c74c330 AB	11	#ifdef __BIG_ENDIAN__
4c74c330 AB	12
1629372c PM	13	struct word_at_a_time {
	14	const unsigned long high_bits, low_bits;
	15	};
	16
	17	#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0xfe) + 1, REPEAT_BYTE(0x7f) }
	18
	19	/* Bit set in the bytes that have a zero */
	20	static inline long prep_zero_mask(unsigned long val, unsigned long rhs, const struct word_at_a_time *c)
	21	{
	22	unsigned long mask = (val & c->low_bits) + c->low_bits;
	23	return ~(mask \| rhs);
	24	}
	25
	26	#define create_zero_mask(mask) (mask)
	27
	28	static inline long find_zero(unsigned long mask)
	29	{
	30	long leading_zero_bits;
	31
	32	asm (PPC_CNTLZL "%0,%1" : "=r" (leading_zero_bits) : "r" (mask));
	33	return leading_zero_bits >> 3;
	34	}
	35
	36	static inline bool has_zero(unsigned long val, unsigned long data, const struct word_at_a_time c)
	37	{
	38	unsigned long rhs = val \| c->low_bits;
	39	*data = rhs;
	40	return (val + c->high_bits) & ~rhs;
	41	}
	42
4c74c330 AB	43	#else
4c74c330 AB	44
4c74c330 AB	45	struct word_at_a_time {
	46	const unsigned long one_bits, high_bits;
	47	};
	48
	49	#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) }
	50
	51	#ifdef CONFIG_64BIT
	52
d0cebfa6 PB	53	/* Alan Modra's little-endian strlen tail for 64-bit */
	54	#define create_zero_mask(mask) (mask)
	55
	56	static inline unsigned long find_zero(unsigned long mask)
4c74c330	57	{
d0cebfa6 PB	58	unsigned long leading_zero_bits;
	59	long trailing_zero_bit_mask;
	60
	61	asm ("addi %1,%2,-1\n\t"
	62	"andc %1,%1,%2\n\t"
	63	"popcntd %0,%1"
	64	: "=r" (leading_zero_bits), "=&r" (trailing_zero_bit_mask)
	65	: "r" (mask));
	66	return leading_zero_bits >> 3;
4c74c330 AB	67	}
	68
	69	#else /* 32-bit case */
	70
d0cebfa6 PB	71	/*
	72	* This is largely generic for little-endian machines, but the
	73	* optimal byte mask counting is probably going to be something
	74	* that is architecture-specific. If you have a reliably fast
	75	* bit count instruction, that might be better than the multiply
	76	* and shift, for example.
	77	*/
	78
4c74c330 AB	79	/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
	80	static inline long count_masked_bytes(long mask)
	81	{
	82	/* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
	83	long a = (0x0ff0001+mask) >> 23;
	84	/* Fix the 1 for 00 case */
	85	return a & mask;
	86	}
	87
d0cebfa6 PB	88	static inline unsigned long create_zero_mask(unsigned long bits)
	89	{
	90	bits = (bits - 1) & ~bits;
	91	return bits >> 7;
	92	}
	93
	94	static inline unsigned long find_zero(unsigned long mask)
	95	{
	96	return count_masked_bytes(mask);
	97	}
	98
4c74c330 AB	99	#endif
	100
	101	/* Return nonzero if it has a zero */
	102	static inline unsigned long has_zero(unsigned long a, unsigned long bits, const struct word_at_a_time c)
	103	{
	104	unsigned long mask = ((a - c->one_bits) & ~a) & c->high_bits;
	105	*bits = mask;
	106	return mask;
	107	}
	108
	109	static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits, const struct word_at_a_time *c)
	110	{
	111	return bits;
	112	}
	113
4c74c330 AB	114	/* The mask we created is directly usable as a bytemask */
	115	#define zero_bytemask(mask) (mask)
	116
4c74c330 AB	117	#endif
4c74c330 AB	118
fe2a1bb1 ME	119	/*
	120	* We use load_unaligned_zero() in a selftest, which builds a userspace
	121	* program. Some linker scripts seem to discard the .fixup section, so allow
	122	* the test code to use a different section name.
	123	*/
	124	#ifndef FIXUP_SECTION
	125	#define FIXUP_SECTION ".fixup"
	126	#endif
	127
de5946c0 AB	128	static inline unsigned long load_unaligned_zeropad(const void *addr)
	129	{
	130	unsigned long ret, offset, tmp;
	131
	132	asm(
	133	"1: " PPC_LL "%[ret], 0(%[addr])\n"
	134	"2:\n"
fe2a1bb1	135	".section " FIXUP_SECTION ",\"ax\"\n"
de5946c0 AB	136	"3: "
	137	#ifdef __powerpc64__
	138	"clrrdi %[tmp], %[addr], 3\n\t"
	139	"clrlsldi %[offset], %[addr], 61, 3\n\t"
	140	"ld %[ret], 0(%[tmp])\n\t"
	141	#ifdef __BIG_ENDIAN__
	142	"sld %[ret], %[ret], %[offset]\n\t"
	143	#else
	144	"srd %[ret], %[ret], %[offset]\n\t"
	145	#endif
	146	#else
	147	"clrrwi %[tmp], %[addr], 2\n\t"
	148	"clrlslwi %[offset], %[addr], 30, 3\n\t"
	149	"lwz %[ret], 0(%[tmp])\n\t"
	150	#ifdef __BIG_ENDIAN__
	151	"slw %[ret], %[ret], %[offset]\n\t"
	152	#else
	153	"srw %[ret], %[ret], %[offset]\n\t"
	154	#endif
	155	#endif
	156	"b 2b\n"
	157	".previous\n"
	158	".section __ex_table,\"a\"\n\t"
	159	PPC_LONG_ALIGN "\n\t"
	160	PPC_LONG "1b,3b\n"
	161	".previous"
	162	: [tmp] "=&b" (tmp), [offset] "=&r" (offset), [ret] "=&r" (ret)
	163	: [addr] "b" (addr), "m" ((unsigned long )addr));
	164
	165	return ret;
	166	}
	167
fe2a1bb1 ME	168	#undef FIXUP_SECTION
fe2a1bb1 ME	169
1629372c	170	#endif /* _ASM_WORD_AT_A_TIME_H */