TVP

# 单精度浮点数误差与消除方法

DechinPhy

2360

## 累积误差

import numpy as np
np.random.seed(1)
sum_1 = np.array([0.], np.float64)
sum_2 = np.array([0.], np.float32)
for _ in range(100000):
x = np.random.random(1000)
sum_1 += x.sum()
sum_2 += x.astype(np.float32).sum()
print (sum_1)
print (sum_2)

[50003352.04503618]
[50003708.]

## 大数吃小数

import numpy as np
x = np.array([1000000.], np.float32)
y = np.array([0.01], np.float32)
print (x+y)

[1000000.]

import numpy as np
x = np.array([1000000.], np.float64)
y = np.array([0.01], np.float64)
print (x+y)

[1000000.01]

## Kahan求和公式

import numpy as np
np.random.seed(1)
sum_1 = np.array([0.], np.float64)
sum_2 = np.array([0.], np.float32)
sum_3 = np.array([0.], np.float32)
tmp_1 = np.array([0.], np.float32)
for _ in range(100000):
x = np.random.random(1000)
sum_1 += x.sum()
sum_2 += x.astype(np.float32).sum()
tmp_2 = x.astype(np.float32).sum() - tmp_1
tmp_3 = sum_3 + tmp_2
tmp_1 = (tmp_3 - sum_3) - tmp_2
sum_3 = tmp_3
print (sum_1)
print (sum_2)
print (sum_3)

[50003352.04503618]
[50003708.]
[50003352.]

import numpy as np
np.random.seed(1)
sum_1 = np.array([1000000.], np.float64)
sum_2 = np.array([1000000.], np.float32)
sum_3 = np.array([1000000.], np.float32)
tmp_1 = np.array([0.], np.float32)
for _ in range(100000):
x = np.random.random(1000) * 1e-05
sum_1 += x.sum()
sum_2 += x.astype(np.float32).sum()
tmp_2 = x.astype(np.float32).sum() - tmp_1
tmp_3 = sum_3 + tmp_2
tmp_1 = (tmp_3 - sum_3) - tmp_2
sum_3 = tmp_3
print (sum_1)
print (sum_2)
print (sum_3)

[1000500.03352045]
[1000000.]
[1000500.06]

0 条评论

LV.

• 技术背景
• 累积误差
• 大数吃小数
• Kahan求和公式
• 总结概要
• 版权声明