我很好奇地想看看我使用OpenGL的配置的纹理上传的性能,并注意到一些我认为奇怪的东西。我使用GL_RGBA8
格式的glTexStorage2D
创建了一个4K纹理。然后,每一帧我都使用glTexSubImage2D
将静态图像缓冲区重新上传到纹理。基于帧速率,我得到了大约5.19 to /s。接下来,我将纹理的格式更改为GL_SRGB8_ALPHA8
,然后重试实验。这一次,我得到了2.81 am /s,这是一个显著的下降。这似乎很奇怪,因为据我所知,上传sRGB数据和上传RGB数据应该没有什么不同,因为没有应该发生的转换(sRGB转换应该在采样期间在着色器中发生)。
一些额外的信息。对于第一个测试,我在调用glTexSubImage2D
时使用了GL_RGBA
和GL_UNSIGNED_INT_8_8_8_8_REV
,因为驱动程序(通过glGetInternalformativ
)告诉我这是理想的。根据驱动程序的建议,对于第二个测试,我使用GL_UNSIGNED_INT_8_8_8_8
。一些测试证实这两种格式分别是使用最快的格式。这是在使用332.21驱动程序的Windows7 x64上使用Nvidia GeForce GTX760。
#include <GL/glew.h>
#include <GLFW/glfw3.h>
#include <vector>
#include <cstdlib>
#include <cstdio>
#define SCREEN_SIZE_X 1024
#define SCREEN_SIZE_Y 1024
#define GLSL(src) "#version 440 core\n" #src
const char* vertex_shader = GLSL(
const vec2 data[4] = vec2[]
(
vec2(-1.0, 1.0),
vec2(-1.0, -1.0),
vec2( 1.0, 1.0),
vec2( 1.0, -1.0)
);
void main()
{
gl_Position = vec4(data[gl_VertexID], 0.0, 1.0);
}
);
const char* fragment_shader = GLSL(
layout(location = 0) uniform sampler2D texture0;
layout(location = 1) uniform vec2 screenSize;
out vec4 frag_color;
void main()
{
frag_color = texture(texture0, gl_FragCoord.xy / screenSize);
}
);
int main(int argc, char *argv[])
{
if(!glfwInit())
exit(EXIT_FAILURE);
glfwWindowHint(GLFW_RESIZABLE, GL_FALSE);
glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 4);
glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 4);
glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE);
GLFWwindow* window = glfwCreateWindow(SCREEN_SIZE_X, SCREEN_SIZE_Y, "OpenGL Texture Upload", nullptr, nullptr);
if(!window)
{
glfwTerminate();
exit(EXIT_FAILURE);
}
glfwMakeContextCurrent(window);
glfwSwapInterval(0);
glewExperimental = GL_TRUE;
if(glewInit() != GLEW_OK)
{
glfwTerminate();
exit(EXIT_FAILURE);
}
GLuint vao = 0;
glGenVertexArrays(1, &vao);
glBindVertexArray(vao);
GLuint vs = glCreateShader(GL_VERTEX_SHADER);
glShaderSource(vs, 1, &vertex_shader, nullptr);
glCompileShader(vs);
GLuint fs = glCreateShader(GL_FRAGMENT_SHADER);
glShaderSource(fs, 1, &fragment_shader, nullptr);
glCompileShader(fs);
GLuint shader_program = glCreateProgram();
glAttachShader(shader_program, fs);
glAttachShader(shader_program, vs);
glLinkProgram(shader_program);
glUseProgram(shader_program);
glProgramUniform2f(shader_program, 1, SCREEN_SIZE_X, SCREEN_SIZE_Y);
GLuint texture = 0;
glGenTextures(1, &texture);
#ifdef USE_SRGB
glTextureStorage2DEXT(texture, GL_TEXTURE_2D, 1, GL_SRGB8_ALPHA8, 4096, 4096);
#else
glTextureStorage2DEXT(texture, GL_TEXTURE_2D, 1, GL_RGBA8, 4096, 4096);
#endif
glTextureParameteriEXT(texture, GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTextureParameteriEXT(texture, GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTextureParameteriEXT(texture, GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTextureParameteriEXT(texture, GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glBindMultiTextureEXT(GL_TEXTURE0, GL_TEXTURE_2D, texture);
glProgramUniform1i(shader_program, 0, 0);
std::vector<unsigned int> image_buffer(4096*4096, 0xFF0000FFul);
double lastTime = glfwGetTime();
double nbFrames = 0;
while(!glfwWindowShouldClose(window))
{
double currentTime = glfwGetTime();
nbFrames++;
if (currentTime - lastTime >= 1.0)
{
char cbuffer[50];
snprintf(cbuffer, sizeof(cbuffer), "OpenGL Texture Upload [%.1f fps, %.3f ms]", nbFrames, 1000.0 / nbFrames);
glfwSetWindowTitle(window, cbuffer);
nbFrames = 0;
lastTime++;
}
#ifdef USE_SRGB
glTextureSubImage2DEXT(texture, GL_TEXTURE_2D, 0, 0, 0, 4096, 4096, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, image_buffer.data());
#else
glTextureSubImage2DEXT(texture, GL_TEXTURE_2D, 0, 0, 0, 4096, 4096, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, image_buffer.data());
#endif
glClear(GL_COLOR_BUFFER_BIT);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
glfwSwapBuffers(window);
glfwPollEvents();
}
glfwDestroyWindow(window);
glfwTerminate();
exit(EXIT_SUCCESS);
}
发布于 2014-02-22 14:48:18
显然,有一种叫做“原生像素格式”的东西。看看this link from Nvidia,特别是第32.1.3节。
https://stackoverflow.com/questions/21922117
复制相似问题