Extrem schnelle Variante eines strlen() von GNU
Verfasst: Fr Mär 20, 2015 6:05 pm
Moin,
vorhin fand ich folgende Version eines strlen, in der GNU-Lib:
https://sourceware.org/git/?p=glibc.git ... ab;hb=HEAD
Sieht fett aus; Ich fragte mich, ob er(wie in den Kommentaren behauptet) tatsächlich schneller ist, als ein "traditionelles" strlen() wie z.B:
Der Test - GNU Variante
Getestet wie folgt:
Kompletter Quelltext mit Text: http://paste.ubuntu.com/10635948/
... mit g++ kompiliert:
... und dann mit "time" die Zeit gestoppt:
Ergab, wie gesagt, erstaunlicherweise, folgende Zeiten(Nach dem 2. Aufruf):
Die "traditionelle" Variante
Der Code ist im grunde der selbe(vom Text her). Ich habe lediglich die strlen() ausgetauscht, mit der oben genannten Beispielvariante.
Wie gehabt:
... mit g++ kompiliert:
... und dann mit "time" die Zeit gestoppt:
Und folgendes Ergebnis:
Für mich stellt sich die Frage, und ich kann sie auch ehrlich nicht beantworten: Wie um himmelswillen schafft der GNU-Code es, so massiv schneller zu sein(5-7x), bei (scheinbar) mehr Abfragen, die natürlich Zeit kosten?
Liegt das wirklich an dieser "longword" abfrage? Hat einer von euch da eine Idee?
Grüße
vorhin fand ich folgende Version eines strlen, in der GNU-Lib:
https://sourceware.org/git/?p=glibc.git ... ab;hb=HEAD
Code: Alles auswählen
#include <stdlib.h>
#undef strlen
/* Return the length of the null-terminated string STR. Scan for
the null terminator quickly by testing four bytes at a time. */
size_t
strlen (const char *str)
{
const char *char_ptr;
const unsigned long int *longword_ptr;
unsigned long int longword, himagic, lomagic;
/* Handle the first few characters by reading one character at a time.
Do this until CHAR_PTR is aligned on a longword boundary. */
for (char_ptr = str; ((unsigned long int) char_ptr
& (sizeof (longword) - 1)) != 0;
++char_ptr)
if (*char_ptr == '\0')
return char_ptr - str;
/* All these elucidatory comments refer to 4-byte longwords,
but the theory applies equally well to 8-byte longwords. */
longword_ptr = (unsigned long int *) char_ptr;
/* Bits 31, 24, 16, and 8 of this number are zero. Call these bits
the "holes." Note that there is a hole just to the left of
each byte, with an extra at the end:
bits: 01111110 11111110 11111110 11111111
bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD
The 1-bits make sure that carries propagate to the next 0-bit.
The 0-bits provide holes for carries to fall into. */
himagic = 0x80808080L;
lomagic = 0x01010101L;
if (sizeof (longword) > 4)
{
/* 64-bit version of the magic. */
/* Do the shift in two steps to avoid a warning if long has 32 bits. */
himagic = ((himagic << 16) << 16) | himagic;
lomagic = ((lomagic << 16) << 16) | lomagic;
}
if (sizeof (longword) > 8)
abort ();
/* Instead of the traditional loop which tests each character,
we will test a longword at a time. The tricky part is testing
if *any of the four* bytes in the longword in question are zero. */
for (;;)
{
longword = *longword_ptr++;
if (((longword - lomagic) & ~longword & himagic) != 0)
{
/* Which of the bytes was the zero? If none of them were, it was
a misfire; continue the search. */
const char *cp = (const char *) (longword_ptr - 1);
if (cp[0] == 0)
return cp - str;
if (cp[1] == 0)
return cp - str + 1;
if (cp[2] == 0)
return cp - str + 2;
if (cp[3] == 0)
return cp - str + 3;
if (sizeof (longword) > 4)
{
if (cp[4] == 0)
return cp - str + 4;
if (cp[5] == 0)
return cp - str + 5;
if (cp[6] == 0)
return cp - str + 6;
if (cp[7] == 0)
return cp - str + 7;
}
}
}
}
Code: Alles auswählen
unsigned int strlen(char const *str)
{
int x=0;
while (*( str++ ))
{x++;}
return x;
}
Getestet wie folgt:
Code: Alles auswählen
/* Copyright (C) 1991-2015 Free Software Foundation, Inc.*/
#include <stdlib.h>
#undef strlen
/* Return the length of the null-terminated string STR. Scan for
the null terminator quickly by testing four bytes at a time. */
size_t
strlen (const char *str)
{
const char *char_ptr;
const unsigned long int *longword_ptr;
unsigned long int longword, himagic, lomagic;
/* Handle the first few characters by reading one character at a time.
Do this until CHAR_PTR is aligned on a longword boundary. */
for (char_ptr = str; ((unsigned long int) char_ptr
& (sizeof (longword) - 1)) != 0;
++char_ptr)
if (*char_ptr == '\0')
return char_ptr - str;
/* All these elucidatory comments refer to 4-byte longwords,
but the theory applies equally well to 8-byte longwords. */
longword_ptr = (unsigned long int *) char_ptr;
/* Bits 31, 24, 16, and 8 of this number are zero. Call these bits
the "holes." Note that there is a hole just to the left of
each byte, with an extra at the end:
bits: 01111110 11111110 11111110 11111111
bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD
The 1-bits make sure that carries propagate to the next 0-bit.
The 0-bits provide holes for carries to fall into. */
himagic = 0x80808080L;
lomagic = 0x01010101L;
if (sizeof (longword) > 4)
{
/* 64-bit version of the magic. */
/* Do the shift in two steps to avoid a warning if long has 32 bits. */
himagic = ((himagic << 16) << 16) | himagic;
lomagic = ((lomagic << 16) << 16) | lomagic;
}
if (sizeof (longword) > 8)
abort ();
/* Instead of the traditional loop which tests each character,
we will test a longword at a time. The tricky part is testing
if *any of the four* bytes in the longword in question are zero. */
for (;;)
{
longword = *longword_ptr++;
if (((longword - lomagic) & ~longword & himagic) != 0)
{
/* Which of the bytes was the zero? If none of them were, it was
a misfire; continue the search. */
const char *cp = (const char *) (longword_ptr - 1);
if (cp[0] == 0)
return cp - str;
if (cp[1] == 0)
return cp - str + 1;
if (cp[2] == 0)
return cp - str + 2;
if (cp[3] == 0)
return cp - str + 3;
if (sizeof (longword) > 4)
{
if (cp[4] == 0)
return cp - str + 4;
if (cp[5] == 0)
return cp - str + 5;
if (cp[6] == 0)
return cp - str + 6;
if (cp[7] == 0)
return cp - str + 7;
}
}
}
}
int main(int argc, char** argv)
{
const char* example=""; // Siehe Link für den Text
for(int i=0; i < 10000;i++)
strlen(example);
return 0;
}
... mit g++ kompiliert:
Code: Alles auswählen
g++ -Wall -o "strlen_ex" "strlen_ex.cpp"
Code: Alles auswählen
time ./strlen_ex
Code: Alles auswählen
real 0m3.846s
user 0m3.828s
sys 0m0.004s
Der Code ist im grunde der selbe(vom Text her). Ich habe lediglich die strlen() ausgetauscht, mit der oben genannten Beispielvariante.
Wie gehabt:
... mit g++ kompiliert:
Code: Alles auswählen
g++ -Wall -o "strlen_ex" "strlen_ex.cpp"
Code: Alles auswählen
time ./strlen_ex
Code: Alles auswählen
real 0m21.701s
user 0m21.665s
sys 0m0.000s
Liegt das wirklich an dieser "longword" abfrage? Hat einer von euch da eine Idee?
Grüße