[PATCH] Simplify in*() and out*() functions of AMD64 and i386
Christoph Mallon
christoph.mallon at gmx.de
Wed Apr 8 13:25:05 PDT 2009
Hi amd64@ and i386@,
attached is a patch which simplifies the in*() and out*() functions for
I/O port access of AMD64 and i386. It removes an unnecessary distinction
of cases for inb() and outb(), which was used to generate better code
for ports < 256. This is unnecessary, because GCC supports an asm input
constraint to handle this ("N"). The stale comment, which states there
is no constraint for this, is removed, too. Also the {in,out}{w,l}() get
treated with this constraint. They had no special case before, so now
better code is generated for them. Further, the unnecessary "cld" is
removed from {in,out}s{b,w,l}(), because it is guaranteed by the ABI
that the direction flag is cleared. All in all the code for in/out gets
a bit simpler.
Comments are welcome
Christoph
-------------- next part --------------
Index: sys/i386/include/cpufunc.h
===================================================================
--- sys/i386/include/cpufunc.h (Revision 190841)
+++ sys/i386/include/cpufunc.h (Arbeitskopie)
@@ -170,177 +170,97 @@
__asm __volatile("hlt");
}
-#if !defined(__GNUCLIKE_BUILTIN_CONSTANT_P) || __GNUCLIKE_ASM < 3
-
-#define inb(port) inbv(port)
-#define outb(port, data) outbv(port, data)
-
-#else /* __GNUCLIKE_BUILTIN_CONSTANT_P && __GNUCLIKE_ASM >= 3 */
-
-/*
- * The following complications are to get around gcc not having a
- * constraint letter for the range 0..255. We still put "d" in the
- * constraint because "i" isn't a valid constraint when the port
- * isn't constant. This only matters for -O0 because otherwise
- * the non-working version gets optimized away.
- *
- * Use an expression-statement instead of a conditional expression
- * because gcc-2.6.0 would promote the operands of the conditional
- * and produce poor code for "if ((inb(var) & const1) == const2)".
- *
- * The unnecessary test `(port) < 0x10000' is to generate a warning if
- * the `port' has type u_short or smaller. Such types are pessimal.
- * This actually only works for signed types. The range check is
- * careful to avoid generating warnings.
- */
-#define inb(port) __extension__ ({ \
- u_char _data; \
- if (__builtin_constant_p(port) && ((port) & 0xffff) < 0x100 \
- && (port) < 0x10000) \
- _data = inbc(port); \
- else \
- _data = inbv(port); \
- _data; })
-
-#define outb(port, data) ( \
- __builtin_constant_p(port) && ((port) & 0xffff) < 0x100 \
- && (port) < 0x10000 \
- ? outbc(port, data) : outbv(port, data))
-
-static __inline u_char
-inbc(u_int port)
+static inline u_char
+inb(u_short port)
{
- u_char data;
-
- __asm __volatile("inb %1,%0" : "=a" (data) : "id" ((u_short)(port)));
- return (data);
+ u_char data;
+ __asm volatile("inb %1, %0" : "=a" (data) : "Nd" (port));
+ return data;
}
-static __inline void
-outbc(u_int port, u_char data)
+static inline u_int
+inl(u_short port)
{
- __asm __volatile("outb %0,%1" : : "a" (data), "id" ((u_short)(port)));
-}
-
-#endif /* __GNUCLIKE_BUILTIN_CONSTANT_P && __GNUCLIKE_ASM >= 3*/
-
-static __inline u_char
-inbv(u_int port)
-{
- u_char data;
- /*
- * We use %%dx and not %1 here because i/o is done at %dx and not at
- * %edx, while gcc generates inferior code (movw instead of movl)
- * if we tell it to load (u_short) port.
- */
- __asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port));
+ u_int data;
+ __asm volatile("inl %1, %0" : "=a" (data) : "Nd" (port));
return (data);
}
-static __inline u_int
-inl(u_int port)
+static inline void
+insb(u_short port, void *addr, size_t cnt)
{
- u_int data;
-
- __asm __volatile("inl %%dx,%0" : "=a" (data) : "d" (port));
- return (data);
+ __asm volatile("rep; insb"
+ : "+D" (addr), "+c" (cnt)
+ : "d" (port)
+ : "memory");
}
-static __inline void
-insb(u_int port, void *addr, size_t cnt)
+static inline void
+insw(u_short port, void *addr, size_t cnt)
{
- __asm __volatile("cld; rep; insb"
- : "+D" (addr), "+c" (cnt)
- : "d" (port)
- : "memory");
+ __asm volatile("rep; insw"
+ : "+D" (addr), "+c" (cnt)
+ : "d" (port)
+ : "memory");
}
-static __inline void
-insw(u_int port, void *addr, size_t cnt)
+static inline void
+insl(u_short port, void *addr, size_t cnt)
{
- __asm __volatile("cld; rep; insw"
- : "+D" (addr), "+c" (cnt)
- : "d" (port)
- : "memory");
+ __asm volatile("rep; insl"
+ : "+D" (addr), "+c" (cnt)
+ : "d" (port)
+ : "memory");
}
static __inline void
-insl(u_int port, void *addr, size_t cnt)
-{
- __asm __volatile("cld; rep; insl"
- : "+D" (addr), "+c" (cnt)
- : "d" (port)
- : "memory");
-}
-
-static __inline void
invd(void)
{
__asm __volatile("invd");
}
-static __inline u_short
-inw(u_int port)
+static inline u_short
+inw(u_short port)
{
- u_short data;
-
- __asm __volatile("inw %%dx,%0" : "=a" (data) : "d" (port));
+ u_short data;
+ __asm volatile("inw %1, %0" : "=a" (data) : "Nd" (port));
return (data);
}
-static __inline void
-outbv(u_int port, u_char data)
+static inline void
+outb(u_short port, u_char data)
{
- u_char al;
- /*
- * Use an unnecessary assignment to help gcc's register allocator.
- * This make a large difference for gcc-1.40 and a tiny difference
- * for gcc-2.6.0. For gcc-1.40, al had to be ``asm("ax")'' for
- * best results. gcc-2.6.0 can't handle this.
- */
- al = data;
- __asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port));
+ __asm volatile("outb %0, %1" : : "a" (data), "Nd" (port));
}
-static __inline void
-outl(u_int port, u_int data)
+static inline void
+outl(u_short port, u_int data)
{
- /*
- * outl() and outw() aren't used much so we haven't looked at
- * possible micro-optimizations such as the unnecessary
- * assignment for them.
- */
- __asm __volatile("outl %0,%%dx" : : "a" (data), "d" (port));
+ __asm volatile("outl %0, %1" : : "a" (data), "Nd" (port));
}
-static __inline void
-outsb(u_int port, const void *addr, size_t cnt)
+static inline void
+outsb(u_short port, const void *addr, size_t cnt)
{
- __asm __volatile("cld; rep; outsb"
- : "+S" (addr), "+c" (cnt)
- : "d" (port));
+ __asm volatile("rep; outsb" : "+S" (addr), "+c" (cnt) : "d" (port));
}
-static __inline void
-outsw(u_int port, const void *addr, size_t cnt)
+static inline void
+outsw(u_short port, const void *addr, size_t cnt)
{
- __asm __volatile("cld; rep; outsw"
- : "+S" (addr), "+c" (cnt)
- : "d" (port));
+ __asm volatile("rep; outsw" : "+S" (addr), "+c" (cnt) : "d" (port));
}
-static __inline void
-outsl(u_int port, const void *addr, size_t cnt)
+static inline void
+outsl(u_short port, const void *addr, size_t cnt)
{
- __asm __volatile("cld; rep; outsl"
- : "+S" (addr), "+c" (cnt)
- : "d" (port));
+ __asm volatile("rep; outsl" : "+S" (addr), "+c" (cnt) : "d" (port));
}
-static __inline void
-outw(u_int port, u_short data)
+static inline void
+outw(u_short port, u_short data)
{
- __asm __volatile("outw %0,%%dx" : : "a" (data), "d" (port));
+ __asm volatile("outw %0, %1" : : "a" (data), "Nd" (port));
}
static __inline void
Index: sys/i386/i386/machdep.c
===================================================================
--- sys/i386/i386/machdep.c (Revision 190841)
+++ sys/i386/i386/machdep.c (Arbeitskopie)
@@ -3555,45 +3555,24 @@
#ifdef KDB
/*
- * Provide inb() and outb() as functions. They are normally only
- * available as macros calling inlined functions, thus cannot be
- * called from the debugger.
- *
- * The actual code is stolen from <machine/cpufunc.h>, and de-inlined.
+ * Provide inb() and outb() as functions. They are normally only available as
+ * inline functions, thus cannot be called from the debugger.
*/
-#undef inb
-#undef outb
-
/* silence compiler warnings */
-u_char inb(u_int);
-void outb(u_int, u_char);
+u_char inb_(u_short);
+void outb_(u_short, u_char);
u_char
-inb(u_int port)
+inb_(u_short port)
{
- u_char data;
- /*
- * We use %%dx and not %1 here because i/o is done at %dx and not at
- * %edx, while gcc generates inferior code (movw instead of movl)
- * if we tell it to load (u_short) port.
- */
- __asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port));
- return (data);
+ return inb(port);
}
void
-outb(u_int port, u_char data)
+outb_(u_short port, u_char data)
{
- u_char al;
- /*
- * Use an unnecessary assignment to help gcc's register allocator.
- * This make a large difference for gcc-1.40 and a tiny difference
- * for gcc-2.6.0. For gcc-1.40, al had to be ``asm("ax")'' for
- * best results. gcc-2.6.0 can't handle this.
- */
- al = data;
- __asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port));
+ outb(port, data);
}
#endif /* KDB */
Index: sys/amd64/include/cpufunc.h
===================================================================
--- sys/amd64/include/cpufunc.h (Revision 190841)
+++ sys/amd64/include/cpufunc.h (Arbeitskopie)
@@ -164,177 +164,97 @@
__asm __volatile("hlt");
}
-#if !defined(__GNUCLIKE_BUILTIN_CONSTANT_P) || __GNUCLIKE_ASM < 3
-
-#define inb(port) inbv(port)
-#define outb(port, data) outbv(port, data)
-
-#else /* __GNUCLIKE_BUILTIN_CONSTANT_P && __GNUCLIKE_ASM >= 3 */
-
-/*
- * The following complications are to get around gcc not having a
- * constraint letter for the range 0..255. We still put "d" in the
- * constraint because "i" isn't a valid constraint when the port
- * isn't constant. This only matters for -O0 because otherwise
- * the non-working version gets optimized away.
- *
- * Use an expression-statement instead of a conditional expression
- * because gcc-2.6.0 would promote the operands of the conditional
- * and produce poor code for "if ((inb(var) & const1) == const2)".
- *
- * The unnecessary test `(port) < 0x10000' is to generate a warning if
- * the `port' has type u_short or smaller. Such types are pessimal.
- * This actually only works for signed types. The range check is
- * careful to avoid generating warnings.
- */
-#define inb(port) __extension__ ({ \
- u_char _data; \
- if (__builtin_constant_p(port) && ((port) & 0xffff) < 0x100 \
- && (port) < 0x10000) \
- _data = inbc(port); \
- else \
- _data = inbv(port); \
- _data; })
-
-#define outb(port, data) ( \
- __builtin_constant_p(port) && ((port) & 0xffff) < 0x100 \
- && (port) < 0x10000 \
- ? outbc(port, data) : outbv(port, data))
-
-static __inline u_char
-inbc(u_int port)
+static inline u_char
+inb(u_short port)
{
- u_char data;
-
- __asm __volatile("inb %1,%0" : "=a" (data) : "id" ((u_short)(port)));
- return (data);
+ u_char data;
+ __asm volatile("inb %1, %0" : "=a" (data) : "Nd" (port));
+ return data;
}
-static __inline void
-outbc(u_int port, u_char data)
+static inline u_int
+inl(u_short port)
{
- __asm __volatile("outb %0,%1" : : "a" (data), "id" ((u_short)(port)));
-}
-
-#endif /* __GNUCLIKE_BUILTIN_CONSTANT_P && __GNUCLIKE_ASM >= 3*/
-
-static __inline u_char
-inbv(u_int port)
-{
- u_char data;
- /*
- * We use %%dx and not %1 here because i/o is done at %dx and not at
- * %edx, while gcc generates inferior code (movw instead of movl)
- * if we tell it to load (u_short) port.
- */
- __asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port));
+ u_int data;
+ __asm volatile("inl %1, %0" : "=a" (data) : "Nd" (port));
return (data);
}
-static __inline u_int
-inl(u_int port)
+static inline void
+insb(u_short port, void *addr, size_t cnt)
{
- u_int data;
-
- __asm __volatile("inl %%dx,%0" : "=a" (data) : "d" (port));
- return (data);
+ __asm volatile("rep; insb"
+ : "+D" (addr), "+c" (cnt)
+ : "d" (port)
+ : "memory");
}
-static __inline void
-insb(u_int port, void *addr, size_t cnt)
+static inline void
+insw(u_short port, void *addr, size_t cnt)
{
- __asm __volatile("cld; rep; insb"
- : "+D" (addr), "+c" (cnt)
- : "d" (port)
- : "memory");
+ __asm volatile("rep; insw"
+ : "+D" (addr), "+c" (cnt)
+ : "d" (port)
+ : "memory");
}
-static __inline void
-insw(u_int port, void *addr, size_t cnt)
+static inline void
+insl(u_short port, void *addr, size_t cnt)
{
- __asm __volatile("cld; rep; insw"
- : "+D" (addr), "+c" (cnt)
- : "d" (port)
- : "memory");
+ __asm volatile("rep; insl"
+ : "+D" (addr), "+c" (cnt)
+ : "d" (port)
+ : "memory");
}
static __inline void
-insl(u_int port, void *addr, size_t cnt)
-{
- __asm __volatile("cld; rep; insl"
- : "+D" (addr), "+c" (cnt)
- : "d" (port)
- : "memory");
-}
-
-static __inline void
invd(void)
{
__asm __volatile("invd");
}
-static __inline u_short
-inw(u_int port)
+static inline u_short
+inw(u_short port)
{
- u_short data;
-
- __asm __volatile("inw %%dx,%0" : "=a" (data) : "d" (port));
+ u_short data;
+ __asm volatile("inw %1, %0" : "=a" (data) : "Nd" (port));
return (data);
}
-static __inline void
-outbv(u_int port, u_char data)
+static inline void
+outb(u_short port, u_char data)
{
- u_char al;
- /*
- * Use an unnecessary assignment to help gcc's register allocator.
- * This make a large difference for gcc-1.40 and a tiny difference
- * for gcc-2.6.0. For gcc-1.40, al had to be ``asm("ax")'' for
- * best results. gcc-2.6.0 can't handle this.
- */
- al = data;
- __asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port));
+ __asm volatile("outb %0, %1" : : "a" (data), "Nd" (port));
}
-static __inline void
-outl(u_int port, u_int data)
+static inline void
+outl(u_short port, u_int data)
{
- /*
- * outl() and outw() aren't used much so we haven't looked at
- * possible micro-optimizations such as the unnecessary
- * assignment for them.
- */
- __asm __volatile("outl %0,%%dx" : : "a" (data), "d" (port));
+ __asm volatile("outl %0, %1" : : "a" (data), "Nd" (port));
}
-static __inline void
-outsb(u_int port, const void *addr, size_t cnt)
+static inline void
+outsb(u_short port, const void *addr, size_t cnt)
{
- __asm __volatile("cld; rep; outsb"
- : "+S" (addr), "+c" (cnt)
- : "d" (port));
+ __asm volatile("rep; outsb" : "+S" (addr), "+c" (cnt) : "d" (port));
}
-static __inline void
-outsw(u_int port, const void *addr, size_t cnt)
+static inline void
+outsw(u_short port, const void *addr, size_t cnt)
{
- __asm __volatile("cld; rep; outsw"
- : "+S" (addr), "+c" (cnt)
- : "d" (port));
+ __asm volatile("rep; outsw" : "+S" (addr), "+c" (cnt) : "d" (port));
}
-static __inline void
-outsl(u_int port, const void *addr, size_t cnt)
+static inline void
+outsl(u_short port, const void *addr, size_t cnt)
{
- __asm __volatile("cld; rep; outsl"
- : "+S" (addr), "+c" (cnt)
- : "d" (port));
+ __asm volatile("rep; outsl" : "+S" (addr), "+c" (cnt) : "d" (port));
}
-static __inline void
-outw(u_int port, u_short data)
+static inline void
+outw(u_short port, u_short data)
{
- __asm __volatile("outw %0,%%dx" : : "a" (data), "d" (port));
+ __asm volatile("outw %0, %1" : : "a" (data), "Nd" (port));
}
static __inline void
Index: sys/amd64/amd64/machdep.c
===================================================================
--- sys/amd64/amd64/machdep.c (Revision 190841)
+++ sys/amd64/amd64/machdep.c (Arbeitskopie)
@@ -2178,45 +2178,24 @@
#ifdef KDB
/*
- * Provide inb() and outb() as functions. They are normally only
- * available as macros calling inlined functions, thus cannot be
- * called from the debugger.
- *
- * The actual code is stolen from <machine/cpufunc.h>, and de-inlined.
+ * Provide inb() and outb() as functions. They are normally only available as
+ * inline functions, thus cannot be called from the debugger.
*/
-#undef inb
-#undef outb
-
/* silence compiler warnings */
-u_char inb(u_int);
-void outb(u_int, u_char);
+u_char inb_(u_short);
+void outb_(u_short, u_char);
u_char
-inb(u_int port)
+inb_(u_short port)
{
- u_char data;
- /*
- * We use %%dx and not %1 here because i/o is done at %dx and not at
- * %edx, while gcc generates inferior code (movw instead of movl)
- * if we tell it to load (u_short) port.
- */
- __asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port));
- return (data);
+ return inb(port);
}
void
-outb(u_int port, u_char data)
+outb_(u_short port, u_char data)
{
- u_char al;
- /*
- * Use an unnecessary assignment to help gcc's register allocator.
- * This make a large difference for gcc-1.40 and a tiny difference
- * for gcc-2.6.0. For gcc-1.40, al had to be ``asm("ax")'' for
- * best results. gcc-2.6.0 can't handle this.
- */
- al = data;
- __asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port));
+ outb(port, data);
}
#endif /* KDB */
More information about the freebsd-amd64
mailing list