#! /usr/bin/env python
"PYSTONE" Benchmark Program
Version: Python/1.1 (corresponds to C/1.1 plus 2 Pystone fixes)
Author: Reinhold P. Weicker, CACM Vol 27, No 10, 10/84 pg. 1013.
Translated from ADA to C by Rick Richardson.
Every method to preserve ADA-likeness has been used,
at the expense of C-ness.
Translated from C to Python by Guido van Rossum.
Version History:
Version 1.1 corrects two bugs in version 1.0:
First, it leaked memory: in Proc1(), NextRecord ends
up having a pointer to itself. I have corrected this
by zapping NextRecord.PtrComp at the end of Proc1().
Second, Proc3() used the operator != to compare a
record to None. This is rather inefficient and not
true to the intention of the original benchmark (where
a pointer comparison to None is intended; the !=
operator attempts to find a method __cmp__ to do value
comparison of the record). Version 1.1 runs 5-10
percent faster than version 1.0, so benchmark figures
of different versions can't be compared directly.
LOOPS = 50000
from time import perf_counter as clock
except ImportError:
from time import clock
__version__ = "1.1"
[Ident1, Ident2, Ident3, Ident4, Ident5] = range(1, 6)
class Record:
def __init__(self, PtrComp = None, Discr = 0, EnumComp = 0,
IntComp = 0, StringComp = 0):
self.PtrComp = PtrComp
self.Discr = Discr
self.EnumComp = EnumComp
self.IntComp = IntComp
self.StringComp = StringComp
def copy(self):
return Record(self.PtrComp, self.Discr, self.EnumComp,
self.IntComp, self.StringComp)
TRUE = 1
def main(loops=LOOPS):
benchtime, stones = pystones(loops)
print("Pystone(%s) time for %d passes = %g" % \
(__version__, loops, benchtime))
print("This machine benchmarks at %g pystones/second" % stones)
def pystones(loops=LOOPS):
return Proc0(loops)
IntGlob = 0
BoolGlob = FALSE
Char1Glob = '\0'
Char2Glob = '\0'
Array1Glob = [0]*51
Array2Glob = list(map(lambda x: x[:], [Array1Glob]*51))
PtrGlb = None
PtrGlbNext = None
def Proc0(loops=LOOPS):
global IntGlob
global BoolGlob
global Char1Glob
global Char2Glob
global Array1Glob
global Array2Glob
global PtrGlb
global PtrGlbNext
starttime = clock()
for i in range(loops):
nulltime = clock() - starttime
PtrGlbNext = Record()
PtrGlb = Record()
PtrGlb.PtrComp = PtrGlbNext
PtrGlb.Discr = Ident1
PtrGlb.EnumComp = Ident3
PtrGlb.IntComp = 40
Array2Glob[8][7] = 10
starttime = clock()
for i in range(loops):
IntLoc1 = 2
IntLoc2 = 3
EnumLoc = Ident2
BoolGlob = not Func2(String1Loc, String2Loc)
while IntLoc1 < IntLoc2:
IntLoc3 = 5 * IntLoc1 - IntLoc2
IntLoc3 = Proc7(IntLoc1, IntLoc2)
IntLoc1 = IntLoc1 + 1
Proc8(Array1Glob, Array2Glob, IntLoc1, IntLoc3)
PtrGlb = Proc1(PtrGlb)
CharIndex = 'A'
while CharIndex <= Char2Glob:
if EnumLoc == Func1(CharIndex, 'C'):
EnumLoc = Proc6(Ident1)
CharIndex = chr(ord(CharIndex)+1)
IntLoc3 = IntLoc2 * IntLoc1
IntLoc2 = IntLoc3 / IntLoc1
IntLoc2 = 7 * (IntLoc3 - IntLoc2) - IntLoc1
IntLoc1 = Proc2(IntLoc1)
benchtime = clock() - starttime - nulltime
if benchtime == 0.0:
loopsPerBenchtime = 0.0
loopsPerBenchtime = (loops / benchtime)
return benchtime, loopsPerBenchtime
def Proc1(PtrParIn):
PtrParIn.PtrComp = NextRecord = PtrGlb.copy()
PtrParIn.IntComp = 5
NextRecord.IntComp = PtrParIn.IntComp
NextRecord.PtrComp = PtrParIn.PtrComp
NextRecord.PtrComp = Proc3(NextRecord.PtrComp)
if NextRecord.Discr == Ident1:
NextRecord.IntComp = 6
NextRecord.EnumComp = Proc6(PtrParIn.EnumComp)
NextRecord.PtrComp = PtrGlb.PtrComp
NextRecord.IntComp = Proc7(NextRecord.IntComp, 10)
PtrParIn = NextRecord.copy()
NextRecord.PtrComp = None
return PtrParIn
def Proc2(IntParIO):
IntLoc = IntParIO + 10
while 1:
if Char1Glob == 'A':
IntLoc = IntLoc - 1
IntParIO = IntLoc - IntGlob
EnumLoc = Ident1
if EnumLoc == Ident1:
return IntParIO
def Proc3(PtrParOut):
global IntGlob
if PtrGlb is not None:
PtrParOut = PtrGlb.PtrComp
IntGlob = 100
PtrGlb.IntComp = Proc7(10, IntGlob)
return PtrParOut
def Proc4():
global Char2Glob
BoolLoc = Char1Glob == 'A'
BoolLoc = BoolLoc or BoolGlob
Char2Glob = 'B'
def Proc5():
global Char1Glob
global BoolGlob
Char1Glob = 'A'
BoolGlob = FALSE
def Proc6(EnumParIn):
EnumParOut = EnumParIn
if not Func3(EnumParIn):
EnumParOut = Ident4
if EnumParIn == Ident1:
EnumParOut = Ident1
elif EnumParIn == Ident2:
if IntGlob > 100:
EnumParOut = Ident1
EnumParOut = Ident4
elif EnumParIn == Ident3:
EnumParOut = Ident2
elif EnumParIn == Ident4:
elif EnumParIn == Ident5:
EnumParOut = Ident3
return EnumParOut
def Proc7(IntParI1, IntParI2):
IntLoc = IntParI1 + 2
IntParOut = IntParI2 + IntLoc
return IntParOut
def Proc8(Array1Par, Array2Par, IntParI1, IntParI2):
global IntGlob
IntLoc = IntParI1 + 5
Array1Par[IntLoc] = IntParI2
Array1Par[IntLoc+1] = Array1Par[IntLoc]
Array1Par[IntLoc+30] = IntLoc
for IntIndex in range(IntLoc, IntLoc+2):
Array2Par[IntLoc][IntIndex] = IntLoc
Array2Par[IntLoc][IntLoc-1] = Array2Par[IntLoc][IntLoc-1] + 1
Array2Par[IntLoc+20][IntLoc] = Array1Par[IntLoc]
IntGlob = 5
def Func1(CharPar1, CharPar2):
CharLoc1 = CharPar1
CharLoc2 = CharLoc1
if CharLoc2 != CharPar2:
return Ident1
return Ident2
def Func2(StrParI1, StrParI2):
IntLoc = 1
while IntLoc <= 1:
if Func1(StrParI1[IntLoc], StrParI2[IntLoc+1]) == Ident1:
CharLoc = 'A'
IntLoc = IntLoc + 1
if CharLoc >= 'W' and CharLoc <= 'Z':
IntLoc = 7
if CharLoc == 'X':
return TRUE
if StrParI1 > StrParI2:
IntLoc = IntLoc + 7
return TRUE
return FALSE
def Func3(EnumParIn):
EnumLoc = EnumParIn
if EnumLoc == Ident3: return TRUE
return FALSE
if __name__ == '__main__':
import sys
def error(msg):
print >>sys.stderr, msg,
print >>sys.stderr, "usage: %s [number_of_loops]" % sys.argv[0]
nargs = len(sys.argv) - 1
if nargs > 1:
error("%d arguments are too many;" % nargs)
elif nargs == 1:
try: loops = int(sys.argv[1])
except ValueError:
error("Invalid argument %r;" % sys.argv[1])
loops = LOOPS
Python 2.7
Pystone(1.1) time for 50000 passes = 0.178948
This machine benchmarks at 279411 pystones/second
Python 3.7
Pystone(1.1) time for 50000 passes = 0.201795
This machine benchmarks at 247777 pystones/second
Python 3.8
Pystone(1.1) time for 50000 passes = 0.222014
This machine benchmarks at 225211 pystones/second
Python 3.9
Pystone(1.1) time for 50000 passes = 0.223407
This machine benchmarks at 223807 pystones/second
Python 3.10
Pystone(1.1) time for 50000 passes = 0.265725
This machine benchmarks at 188164 pystones/second
Python 3.11
Pystone(1.1) time for 50000 passes = 0.104691
This machine benchmarks at 477596 pystones/second
可以看到,Python 3.11版本有了明显的性能提升,这个与官方的宣传也是一致的。
$ pip install cython
$ cython -3 --embed pystone.py
$ gcc -pthread -fPIC -fwrapv -O2 -Wall -fno-strict-aliasing -I/usr/include/python3.7 -l:libpython3.7m.so -o pystone pystone.c
$ ls -l pystone
-rwxrwxrwx 1 drunkdream drunkdream 178928 Sep 6 15:42 pystone
$ readelf -d pystone
Dynamic section at offset 0x1fd08 contains 26 entries:
Tag Type Name/Value
0x0000000000000001 (NEEDED) Shared library: [libpython3.7m.so.1.0]
0x0000000000000001 (NEEDED) Shared library: [libpthread.so.0]
0x0000000000000001 (NEEDED) Shared library: [libc.so.6]
0x000000000000000c (INIT) 0x403000
0x000000000000000d (FINI) 0x41b514
0x0000000000000019 (INIT_ARRAY) 0x420cf8
0x000000000000001b (INIT_ARRAYSZ) 8 (bytes)
0x000000000000001a (FINI_ARRAY) 0x420d00
0x000000000000001c (FINI_ARRAYSZ) 8 (bytes)
0x000000006ffffef5 (GNU_HASH) 0x400308
0x0000000000000005 (STRTAB) 0x401078
0x0000000000000006 (SYMTAB) 0x400328
0x000000000000000a (STRSZ) 2404 (bytes)
0x000000000000000b (SYMENT) 24 (bytes)
0x0000000000000015 (DEBUG) 0x0
0x0000000000000003 (PLTGOT) 0x421000
0x0000000000000002 (PLTRELSZ) 2592 (bytes)
0x0000000000000014 (PLTREL) RELA
0x0000000000000017 (JMPREL) 0x401e30
0x0000000000000007 (RELA) 0x401b18
0x0000000000000008 (RELASZ) 792 (bytes)
0x0000000000000009 (RELAENT) 24 (bytes)
0x000000006ffffffe (VERNEED) 0x401af8
0x000000006fffffff (VERNEEDNUM) 1
0x000000006ffffff0 (VERSYM) 0x4019dc
0x0000000000000000 (NULL) 0x0
$ ./pystone
Pystone(1.1) time for 50000 passes = 0.171947
This machine benchmarks at 290787 pystones/second
/* "pystone.py":73
* return Proc0(loops)
* IntGlob = 0 # <<<<<<<<<<<<<<
* BoolGlob = FALSE
* Char1Glob = '\0'
if (PyDict_SetItem(__pyx_d, __pyx_n_s_IntGlob, __pyx_int_0) < 0) __PYX_ERR(0, 73, __pyx_L1_error)
/* "pystone.py":74
* IntGlob = 0
* BoolGlob = FALSE # <<<<<<<<<<<<<<
* Char1Glob = '\0'
* Char2Glob = '\0'
__Pyx_GetModuleGlobalName(__pyx_t_7, __pyx_n_s_FALSE); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 74, __pyx_L1_error)
if (PyDict_SetItem(__pyx_d, __pyx_n_s_BoolGlob, __pyx_t_7) < 0) __PYX_ERR(0, 74, __pyx_L1_error)
__Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
/* "pystone.py":75
* IntGlob = 0
* BoolGlob = FALSE
* Char1Glob = '\0' # <<<<<<<<<<<<<<
* Char2Glob = '\0'
* Array1Glob = [0]*51
if (PyDict_SetItem(__pyx_d, __pyx_n_s_Char1Glob, __pyx_kp_u__12) < 0) __PYX_ERR(0, 75, __pyx_L1_error)
/* "pystone.py":76
* BoolGlob = FALSE
* Char1Glob = '\0'
* Char2Glob = '\0' # <<<<<<<<<<<<<<
* Array1Glob = [0]*51
* Array2Glob = list(map(lambda x: x[:], [Array1Glob]*51))
if (PyDict_SetItem(__pyx_d, __pyx_n_s_Char2Glob, __pyx_kp_u__12) < 0) __PYX_ERR(0, 76, __pyx_L1_error)
/* "pystone.py":77
* Char1Glob = '\0'
* Char2Glob = '\0'
* Array1Glob = [0]*51 # <<<<<<<<<<<<<<
* Array2Glob = list(map(lambda x: x[:], [Array1Glob]*51))
* PtrGlb = None
__pyx_t_7 = PyList_New(1 * 51); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 77, __pyx_L1_error)
{ Py_ssize_t __pyx_temp;
for (__pyx_temp=0; __pyx_temp < 51; __pyx_temp++) {
PyList_SET_ITEM(__pyx_t_7, __pyx_temp, __pyx_int_0);
if (PyDict_SetItem(__pyx_d, __pyx_n_s_Array1Glob, __pyx_t_7) < 0) __PYX_ERR(0, 77, __pyx_L1_error)
__Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
$ pip install nuitka
$ nuitka pystone.py
Nuitka-Options:INFO: Used command line options: pystone.py
Nuitka-Options:WARNING: You did not specify to follow or include anything but main program. Check options
Nuitka-Options:WARNING: and make sure that is intended.
Nuitka:WARNING: Using very slow fallback for ordered sets, please install 'orderedset' PyPI package for best
Nuitka:WARNING: Python compile time performance.
Nuitka:INFO: Starting Python compilation with Nuitka '1.1.8' on Python '3.7' commercial grade 'not installed'.
Nuitka:INFO: Completed Python level compilation and optimization.
Nuitka:INFO: Generating source code for C backend compiler.
Nuitka:INFO: Running data composer tool for optimal constant value handling.
Nuitka:INFO: Running C compilation via Scons.
Nuitka-Scons:INFO: Backend C compiler: gcc (gcc).
Nuitka-Scons:INFO: Backend linking program with 9 files (no progress information available).
Nuitka-Scons:WARNING: You are not using ccache.
Nuitka:INFO: Keeping build directory 'pystone.build'.
Nuitka:INFO: Successfully created 'pystone.bin'.
$ ls -l pystone.bin
-rwxrwxrwx 1 drunkdream drunkdream 268440 Sep 6 20:57 pystone.bin
$ ./pystone.bin
Pystone(1.1) time for 50000 passes = 0.12965
This machine benchmarks at 385654 pystones/second
本来想在Python 3.11下测试下性能,不过发现目前最新版本的nuitka
还没适配Python 3.11,编译会有报错。
: 指定要生成的文件名--standalone
: 将依赖库都编译到一个文件中,不过对于依赖的动态链接库,还是会以多个文件的形式存在--onefile
: 这个参数可以解决--standalone
: 不编译import进来的第三方库--clang
: 强制使用clang作为编译后端--static-libpython=yes
: 静态链接libpython--show-scons
: 显示编译C代码过程中的详细日志通过观察可以发现,nuitka也是通过将python代码转换成C代码,然后编译成最终的可执行文件。使用--static-libpython=yes
$ gcc -o pystone.bin -fuse-linker-plugin -flto=8 -fpartial-inlining -freorder-functions -O2 -s -z noexecstack -Wl,-R,'/usr/lib' -Wl,--disable-new-dtags -Wl,-b -Wl,binary -Wl,./__constants.bin -Wl,-b -Wl,elf64-x86-64 -Wl,-defsym -Wl,constant_bin_data=_binary_____constants_bin_start @"./@link_input.txt" -L/usr/lib -ldl -lm /usr/lib/libpython3.7m.a
不过在实际执行时会有报错,原因是命令行中没有包含-lz -lpthread -lexpat -lutil