  1. code for MH's improved VBO here.


    Doom 3 GPL Source Code
    Copyright (C) 1999-2011 id Software LLC, a ZeniMax Media company.
    This file is part of the Doom 3 GPL Source Code ("Doom 3 Source Code").
    Doom 3 Source Code is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.
    Doom 3 Source Code is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    GNU General Public License for more details.
    You should have received a copy of the GNU General Public License
    along with Doom 3 Source Code.  If not, see <http://www.gnu.org/licenses/>.
    In addition, the Doom 3 Source Code is also subject to certain additional terms. You should have received a copy of these additional terms immediately following the terms and conditions of the GNU General Public License which accompanied the Doom 3 Source Code.  If not, please request a copy in writing from id Software at the address below.
    If you have questions concerning this license or the applicable additional terms, you may contact in writing id Software LLC, c/o ZeniMax Media Inc., Suite 120, Rockville, Maryland 20850 USA.
    #include "precompiled.h"
    #include "tr_local.h"
    static const int  FRAME_MEMORY_BYTES = 0x400000;
    static const int  EXPAND_HEADERS = 32;
    // in case r_useArbBufferRange fails (happens on some AMD cards) turn it off.
    idCVar idVertexCache::r_showVertexCache("r_showVertexCache", "0", CVAR_INTEGER | CVAR_RENDERER, "show vertex cache");
    idCVar idVertexCache::r_useArbBufferRange("r_useArbBufferRange", "1", CVAR_BOOL | CVAR_RENDERER, "use ARB_map_buffer_range for optimization");
    idCVar idVertexCache::r_reuseVertexCacheSooner("r_reuseVertexCacheSooner", "1", CVAR_BOOL | CVAR_RENDERER, "reuse vertex buffers as soon as possible after freeing");
    idVertexCache	 vertexCache;
    static void R_ListVBOMem_f(const idCmdArgs &args) {
    void idVertexCache::ActuallyFree(vertCache_t *block) {
    if (!block) {
     common->Error("idVertexCache Free: NULL pointer");
    if (block->user) {
     // let the owner know we have purged it
     *block->user = NULL;
     block->user = NULL;
    // temp blocks are in a shared space that won't be freed
    if (block->tag != TAG_TEMP) {
     staticAllocTotal -= block->size;
     // only free the buffer if its still active.
     if (virtualMemory) {
      common->DPrintf("Destroying Virtual Memory\n");
      delete[] block->virtMem;
      block->virtMem = NULL;
    block->tag = TAG_FREE;	 // mark as free
    // unlink stick it back on the free list
    block->next->prev = block->prev;
    block->prev->next = block->next;
    if (r_reuseVertexCacheSooner.GetBool()) {
     // stick it on the front of the free list so it will be reused immediately
     block->next = freeStaticHeaders.next;
     block->prev = &freeStaticHeaders;
    else {
     // stick it on the back of the free list so it won't be reused soon (just for debugging)
     block->next = &freeStaticHeaders;
     block->prev = freeStaticHeaders.prev;
    block->next->prev = block;
    block->prev->next = block;
    this will be a real pointer with virtual memory,
    but it will be an int offset cast to a pointer with
    The ARB_vertex_buffer_object will be bound
    void *idVertexCache::Position(vertCache_t *buffer) {
    if (!buffer || buffer->tag == TAG_FREE) {
     common->FatalError("idVertexCache::Position: bad vertCache_t");
    // the ARB vertex object just uses an offset
    if (buffer->vbo) {
     if (r_showVertexCache.GetInteger() == 2) {
      if (buffer->tag == TAG_TEMP) {
       common->Printf("GL_ARRAY_BUFFER_ARB = %i + %i (%i bytes)\n", buffer->vbo, buffer->offset, buffer->size);
      else {
       common->Printf("GL_ARRAY_BUFFER_ARB = %i (%i bytes)\n", buffer->vbo, buffer->size);
     BindIndex((buffer->indexBuffer ? GL_ELEMENT_ARRAY_BUFFER : GL_ARRAY_BUFFER), buffer->vbo);
     return (void *)buffer->offset;
    // virtual memory is a real pointer
    return (void *)((byte *)buffer->virtMem + buffer->offset);
    // dont make these static or the engine will crash.
    GLuint vertexBuffer = 0;
    GLuint indexBuffer = 0;
    Makes sure it only allocates the right buffers once.
    void idVertexCache::BindIndex(GLenum target, GLuint vbo) {
    switch (target) {
     if (vertexBuffer != vbo) {
      // this happens more often than you might think 
      glBindBufferARB(target, vbo);
      vertexBuffer = vbo;
     if (indexBuffer != vbo) {
      // this happens more often than you might think 
      glBindBufferARB(target, vbo);
      indexBuffer = vbo;
     common->FatalError("BindIndex : unknown buffer target : %i\n", static_cast<int>(target));
    Makes sure it only deallocates the right buffers once.
    void idVertexCache::UnbindIndex(GLenum target) {
    switch (target) {
     if (vertexBuffer != 0) {
      // this happens more often than you might think 
      glBindBufferARB(target, 0);
      vertexBuffer = 0;
     if (indexBuffer != 0) {
      // this happens more often than you might think 
      glBindBufferARB(target, 0);
      indexBuffer = 0;
     common->FatalError("UnbindIndex : unknown buffer target : %i\n", static_cast<int>(target));
    void idVertexCache::Init() {
    cmdSystem->AddCommand("ListVBOMem", R_ListVBOMem_f, CMD_FL_RENDERER, "lists Objects Allocated in Vertex Cache");
    // use ARB_vertex_buffer_object unless explicitly disabled
    if (glConfig.ARBVertexBufferObjectAvailable) {
     virtualMemory = false;
     common->Printf("using ARB_vertex_buffer_object memory\n");
    else {
     virtualMemory = true;
     common->Printf("WARNING: vertex array range in virtual memory (SLOW)\n");
    // initialize the cache memory blocks
    freeStaticHeaders.next = freeStaticHeaders.prev = &freeStaticHeaders;
    staticHeaders.next = staticHeaders.prev = &staticHeaders;
    freeDynamicHeaders.next = freeDynamicHeaders.prev = &freeDynamicHeaders;
    dynamicHeaders.next = dynamicHeaders.prev = &dynamicHeaders;
    deferredFreeList.next = deferredFreeList.prev = &deferredFreeList;
    // set up the dynamic frame memory
    frameBytes = FRAME_MEMORY_BYTES;
    staticAllocTotal = 0;
    // allocate a dummy buffer
    byte *frameBuffer = new byte[frameBytes];
    for (int i = 0; i < NUM_VERTEX_FRAMES; i++) {
     // force the alloc to use GL_STREAM_DRAW_ARB
     allocatingTempBuffer = true;
     Alloc(frameBuffer, frameBytes, &tempBuffers[i]);
     allocatingTempBuffer = false;
     tempBuffers[i]->tag = TAG_FIXED;
     // unlink these from the static list, so they won't ever get purged
     tempBuffers[i]->next->prev = tempBuffers[i]->prev;
     tempBuffers[i]->prev->next = tempBuffers[i]->next;
    // use C++ allocation
    delete[] frameBuffer;
    frameBuffer = NULL;
    Used when toggling vertex programs on or off, because
    the cached data isn't valid
    void idVertexCache::PurgeAll() {
    while (staticHeaders.next != &staticHeaders) {
    void idVertexCache::Shutdown() {
    void idVertexCache::Alloc(void *data, int size, vertCache_t **buffer, bool doIndex) {
    vertCache_t *block = NULL;
    if (size <= 0) {
     common->Error("idVertexCache::Alloc: size = %i\n", size);
    // if we can't find anything, it will be NULL
    *buffer = NULL;
    // if we don't have any remaining unused headers, allocate some more
    if (freeStaticHeaders.next == &freeStaticHeaders) {
     for (int i = 0; i < EXPAND_HEADERS; i++) {
      block = headerAllocator.Alloc();
      if (!virtualMemory) {
       glGenBuffers(1, &block->vbo);
       block->size = 0;
      block->next = freeStaticHeaders.next;
      block->prev = &freeStaticHeaders;
      block->next->prev = block;
      block->prev->next = block;
    GLenum target = (doIndex ? GL_ELEMENT_ARRAY_BUFFER : GL_ARRAY_BUFFER);
    GLenum usage = (allocatingTempBuffer ? GL_STREAM_DRAW : GL_STATIC_DRAW);
    // try to find a matching block to replace so that we're not continually respecifying vbo data each frame
    for (vertCache_t *findblock = freeStaticHeaders.next; /**/; findblock = findblock->next) {
     if (findblock == &freeStaticHeaders) {
      block = freeStaticHeaders.next;
     if (findblock->target != target) {
     if (findblock->usage != usage) {
     if (findblock->size != size) {
     block = findblock;
    // move it from the freeStaticHeaders list to the staticHeaders list
    block->target = target;
    block->usage = usage;
    if (block->vbo) {
     // orphan the buffer in case it needs respecifying (it usually will)
     BindIndex(target, block->vbo);
     glBufferDataARB(target, static_cast<GLsizeiptr>(size), NULL, usage);
     glBufferDataARB(target, static_cast<GLsizeiptr>(size), data, usage);
    else {
     // use C++ allocation
     block->virtMem = new byte[size];
     SIMDProcessor->Memcpy(block->virtMem, data, size);
    block->next->prev = block->prev;
    block->prev->next = block->next;
    block->next = staticHeaders.next;
    block->prev = &staticHeaders;
    block->next->prev = block;
    block->prev->next = block;
    block->size = size;
    block->offset = 0;
    block->tag = TAG_USED;
    // save data for debugging
    staticAllocThisFrame += block->size;
    staticAllocTotal += block->size;
    // this will be set to zero when it is purged
    block->user = buffer;
    *buffer = block;
    // allocation doesn't imply used-for-drawing, because at level
    // load time lots of things may be created, but they aren't
    // referenced by the GPU yet, and can be purged if needed.
    block->frameUsed = currentFrame - NUM_VERTEX_FRAMES;
    block->indexBuffer = doIndex;
    void idVertexCache::Touch(vertCache_t *block) {
    if (!block) {
     common->Error("idVertexCache Touch: NULL pointer");
    if (block->tag == TAG_FREE) {
     common->FatalError("idVertexCache Touch: freed pointer");
    if (block->tag == TAG_TEMP) {
     common->FatalError("idVertexCache Touch: temporary pointer");
    block->frameUsed = currentFrame;
    // move to the head of the LRU list
    block->next->prev = block->prev;
    block->prev->next = block->next;
    block->next = staticHeaders.next;
    block->prev = &staticHeaders;
    staticHeaders.next->prev = block;
    staticHeaders.next = block;
    void idVertexCache::Free(vertCache_t *block) {
    if (!block) {
    if (block->tag == TAG_FREE) {
     common->FatalError("idVertexCache Free: freed pointer");
    if (block->tag == TAG_TEMP) {
     common->FatalError("idVertexCache Free: temporary pointer");
    // this block still can't be purged until the frame count has expired,
    // but it won't need to clear a user pointer when it is
    block->user = NULL;
    block->next->prev = block->prev;
    block->prev->next = block->next;
    block->next = deferredFreeList.next;
    block->prev = &deferredFreeList;
    deferredFreeList.next->prev = block;
    deferredFreeList.next = block;
    MH's Version fast on Nvidia But may fail on AMD.
    void idVertexCache::MapBufferRange(vertCache_t *buffer, void *data, int size) {
    BindIndex(GL_ARRAY_BUFFER, buffer->vbo);
    if (glConfig.ARBMapBufferRangeAvailable && r_useArbBufferRange.GetBool()) {
     GLvoid	  *ptr = glMapBufferRange(GL_ARRAY_BUFFER, static_cast<GLintptr>(buffer->offset), static_cast<GLsizeiptr>(size), access);
     // try to get an unsynchronized map if at all possible
     if (ptr) {
      // if the buffer has wrapped then we orphan it
      SIMDProcessor->Memcpy(static_cast<byte *>(ptr), data, size);
     else {
      glBufferSubData(GL_ARRAY_BUFFER, static_cast<GLintptrARB>(buffer->offset), static_cast<GLsizeiptr>(size), data);
    else {
     // just upload the whole shebang.
     glBufferSubData(GL_ARRAY_BUFFER, static_cast<GLintptrARB>(buffer->offset), static_cast<GLsizeiptr>(size), data);
    A frame temp allocation must never be allowed to fail due to overflow.
    We can't simply sync with the GPU and overwrite what we have, because
    there may still be future references to dynamically created surfaces.
    vertCache_t *idVertexCache::AllocFrameTemp(void *data, int size) {
    vertCache_t *block;
    if (size <= 0) {
     common->Error("idVertexCache::AllocFrameTemp: size = %i\n", size);
    if (dynamicAllocThisFrame + size > frameBytes) {
     // if we don't have enough room in the temp block, allocate a static block,
     // but immediately free it so it will get freed at the next frame
     tempOverflow = true;
     Alloc(data, size, &block);
     return block;
    // this data is just going on the shared dynamic list
    // if we don't have any remaining unused headers, allocate some more
    if (freeDynamicHeaders.next == &freeDynamicHeaders) {
     for (int i = 0; i < EXPAND_HEADERS; i++) {
      block = headerAllocator.Alloc();
      block->next = freeDynamicHeaders.next;
      block->prev = &freeDynamicHeaders;
      block->next->prev = block;
      block->prev->next = block;
    // move it from the freeDynamicHeaders list to the dynamicHeaders list
    block = freeDynamicHeaders.next;
    block->next->prev = block->prev;
    block->prev->next = block->next;
    block->next = dynamicHeaders.next;
    block->prev = &dynamicHeaders;
    block->next->prev = block;
    block->prev->next = block;
    block->size = size;
    block->tag = TAG_TEMP;
    block->indexBuffer = false;
    block->offset = dynamicAllocThisFrame;
    dynamicAllocThisFrame += block->size;
    block->user = NULL;
    block->frameUsed = 0;
    // copy the data
    block->virtMem = tempBuffers[listNum]->virtMem;
    block->vbo = tempBuffers[listNum]->vbo;
    // mh code start
    if (block->vbo) {
     MapBufferRange(block, data, size);
    else if (block->virtMem) {
     SIMDProcessor->Memcpy(static_cast<byte *>(block->virtMem) + block->offset, data, size);
    return block;
    void idVertexCache::EndFrame() {
    // display debug information
    if (r_showVertexCache.GetBool()) {
     int staticUseCount = 0;
     int staticUseSize = 0;
     for (vertCache_t *block = staticHeaders.next; block != &staticHeaders; block = block->next) {
      if (block->frameUsed == currentFrame) {
       staticUseSize += block->size;
     const char *frameOverflow = tempOverflow ? "(OVERFLOW)" : "";
     common->Printf("vertex dynamic:%i=%ik%s, static alloc:%i=%ik used:%i=%ik total:%i=%ik\n",
      dynamicCountThisFrame, dynamicAllocThisFrame / 1024, frameOverflow,
      staticCountThisFrame, staticAllocThisFrame / 1024,
      staticUseCount, staticUseSize / 1024,
      staticCountTotal, staticAllocTotal / 1024);
    // unbind vertex buffers so normal virtual memory will be used
    if (!virtualMemory) {
    currentFrame = tr.frameCount;
    listNum = currentFrame % NUM_VERTEX_FRAMES;
    staticAllocThisFrame = 0;
    staticCountThisFrame = 0;
    dynamicAllocThisFrame = 0;
    dynamicCountThisFrame = 0;
    tempOverflow = false;
    // free all the deferred free headers
    while (deferredFreeList.next != &deferredFreeList) {
    // free all the frame temp headers
    vertCache_t *block = dynamicHeaders.next;
    if (block != &dynamicHeaders) {
     block->prev = &freeDynamicHeaders;
     dynamicHeaders.prev->next = freeDynamicHeaders.next;
     freeDynamicHeaders.next->prev = dynamicHeaders.prev;
     freeDynamicHeaders.next = block;
     dynamicHeaders.next = dynamicHeaders.prev = &dynamicHeaders;
    void idVertexCache::List(void) {
    int   numActive = 0;
    int   frameStatic = 0;
    int   totalStatic = 0;
    vertCache_t *block;
    for (block = staticHeaders.next; block != &staticHeaders; block = block->next) {
     totalStatic += block->size;
     if (block->frameUsed == currentFrame) {
      frameStatic += block->size;
    int   numFreeStaticHeaders = 0;
    for (block = freeStaticHeaders.next; block != &freeStaticHeaders; block = block->next) {
    int   numFreeDynamicHeaders = 0;
    for (block = freeDynamicHeaders.next; block != &freeDynamicHeaders; block = block->next) {
    common->Printf("%i dynamic temp buffers of %ik\n", NUM_VERTEX_FRAMES, frameBytes / 1024);
    common->Printf("%5i active static headers\n", numActive);
    common->Printf("%5i free static headers\n", numFreeStaticHeaders);
    common->Printf("%5i free dynamic headers\n", numFreeDynamicHeaders);
    if (!virtualMemory) {
     common->Printf("Vertex cache is in ARB_vertex_buffer_object memory (FAST).\n");
    else {
     common->Printf("Vertex cache is in virtual memory (SLOW)\n");
    common->Printf("Index buffers are accelerated.\n");
    just for gfxinfo printing
    bool idVertexCache::IsFast() {
    if (virtualMemory) {
     return false;
    return true;


    // vertex cache calls should only be made by the front end
    const int NUM_VERTEX_FRAMES = 2;
    typedef enum {
    TAG_FIXED,    // for the temp buffers
    TAG_TEMP	  // in frame temp area, not static area
    } vertBlockTag_t;
    typedef struct vertCache_s {
    GLuint    vbo;
    GLenum    target;
    GLenum    usage;
    void    *virtMem;		 // only one of vbo / virtMem will be set
    bool    indexBuffer;	  // holds indexes instead of vertexes
    int	 offset;
    int	 size;		  // may be larger than the amount asked for, due
    // to round up and minimum fragment sizes
    int	 tag;		   // a tag of 0 is a free block
    struct vertCache_s  **user;		   // will be set to zero when purged
    struct vertCache_s  *next, *prev;	 // may be on the static list or one of the frame lists
    int	 frameUsed;	    // it can't be purged if near the current frame
    } vertCache_t;
    class idVertexCache {
    void   Init();
    void   Shutdown();
    // just for gfxinfo printing
    bool   IsFast();
    // called when vertex programs are enabled or disabled, because
    // the cached data is no longer valid
    void   PurgeAll();
    // Tries to allocate space for the given data in fast vertex
    // memory, and copies it over.
    // Alloc does NOT do a touch, which allows purging of things
    // created at level load time even if a frame hasn't passed yet.
    // These allocations can be purged, which will zero the pointer.
    void   Alloc(void *data, int bytes, vertCache_t **buffer, bool indexBuffer = false);
    // This will be a real pointer with virtual memory,
    // but it will be an int offset cast to a pointer of ARB_vertex_buffer_object
    void   *Position(vertCache_t *buffer);
    // initialize the element array buffers
    void   BindIndex(GLenum target, GLuint vbo);
    // if you need to draw something without an indexCache,
    // this must be called to reset GL_ELEMENT_ARRAY_BUFFER_ARB
    void   UnbindIndex(GLenum target);
    // MH's MapBufferRange.
    void   MapBufferRange(vertCache_t *buffer, void *data, int size);
    // automatically freed at the end of the next frame
    // used for specular texture coordinates and gui drawing, which
    // will change every frame.
    // will return NULL if the vertex cache is completely full
    // As with Position(), this may not actually be a pointer you can access.
    vertCache_t    *AllocFrameTemp(void *data, int bytes);
    // notes that a buffer is used this frame, so it can't be purged
    // out from under the GPU
    void   Touch(vertCache_t *buffer);
    // this block won't have to zero a buffer pointer when it is purged,
    // but it must still wait for the frames to pass, in case the GPU
    // is still referencing it
    void   Free(vertCache_t *buffer);
    // updates the counter for determining which temp space to use
    // and which blocks can be purged
    // Also prints debugging info when enabled
    void   EndFrame();
    // listVBOMem calls this
    void   List();
    void   ActuallyFree(vertCache_t *block);
    static idCVar   r_showVertexCache;
    static idCVar   r_useArbBufferRange;
    static idCVar   r_reuseVertexCacheSooner;
    int    staticCountTotal;
    int    staticAllocTotal;  // for end of frame purging
    int    staticAllocThisFrame;   // debug counter
    int    staticCountThisFrame;
    int    dynamicAllocThisFrame;
    int    dynamicCountThisFrame;
    int    currentFrame;   // for purgable block tracking
    int    listNum;    // currentFrame % NUM_VERTEX_FRAMES, determines which tempBuffers to use
    bool   virtualMemory;   // not fast stuff
    bool   allocatingTempBuffer;   // force GL_STREAM_DRAW_ARB
    vertCache_t	 *tempBuffers[NUM_VERTEX_FRAMES];    // allocated at startup
    bool   tempOverflow;	  // had to alloc a temp in static memory
    idBlockAlloc<vertCache_t, 1024> headerAllocator;
    vertCache_t	 freeStaticHeaders;	  // head of doubly linked list
    vertCache_t	 freeDynamicHeaders;	 // head of doubly linked list
    vertCache_t	 dynamicHeaders;		 // head of doubly linked list
    vertCache_t  deferredFreeList;  // head of doubly linked list
    vertCache_t	 staticHeaders;   // head of doubly linked list in MRU order, staticHeaders.next is most recently used
    int    frameBytes;    // for each of NUM_VERTEX_FRAMES frames
    extern   idVertexCache  vertexCache;


    Keep in mind this is from revelation which uses glew as the opengl call wrapper so in case you dont want to use glew you have to prefix the opengl calls with a q like this qglBufferSubData etc.

    the buffer range protos are also not present in vanilla so you have to make pointers for them in rendersystem_init.cpp qgl.h and where not.

    So just use glew if you are not :) will save you a ton of work the next time you want to add some new opengl functionality.

  2. Nothing quite as elaborate :) i was just trying to change the glPointers to glVertexAttribArrays.

    Both are deprecated but the vertex attribs are less deprecated and also allow more control sadly getting it working correctly has been a lesson in futility for me :S.

    If someone wants to try out a simple test, then theres a vertex attrib for normals in draw_arb2.cpp rest of the engine uses glNormalPointer for that one, try and change it to use vertex attribs like in draw_arb2.cpp and see what happens :). Strangely enough they used to work on nvidia cards but with my AMD card it goes completly bonkers drawing the bumpmaps uber reflective.

    I also tried changing the glColorPointer calls which resulted in fog and heathaze drawing solid even though i specifically set the array to accept alpha values it refused.


    the vertex attrib code for normals in draw_arb2.cpp looks like this


    glVertexAttribPointerARB(11, 3, GL_FLOAT, false, sizeof(idDrawVert), ac->normal.ToFloatPtr());


    its enabled by setting glEnableVertexAttribArrayARB(11); and disabled by glDisableVertexAttribArrayARB(11); notice the number.

    That number is the index of the normals from the shaders.


    now in draw_common.cpp in the code for TG_REFLECT_CUBE the normals are done like this


    glNormalPointer( GL_FLOAT, sizeof( idDrawVert ), ac->normal.ToFloatPtr() );


    which is enabled by glEnableClientState( GL_NORMAL_ARRAY ); and disabled by glDisableClientState( GL_NORMAL_ARRAY );


    the normal pointers have no index number because they used standardized ones, same goes for color pointers texcoord pointers and vertex pointers.


    vanilla uses a mess of these two different ways of controlling arrays but besides looking ugly as hell its no big deal,

    it was more of an experiment if i could use vertex attribs to replace them, one which failed hard :) as you will see if you try to use glVertexAttribPointerARB(11, 3, GL_FLOAT, false, sizeof(idDrawVert), ac->normal.ToFloatPtr()); instead of glNormalPointer( GL_FLOAT, sizeof( idDrawVert ), ac->normal.ToFloatPtr() ); in draw_common.cpp.


    changing the index number from 11 to any other number still breaks the engine atleast for AMD cards :S

  3. Ok thats pretty cool :) thanks for explaning.

    Well my endeavour with changing the glPointer calls to vertex attribs hit a dead end because both specs are deprecated in favour of shader code,

    and there is no apparent speed gain from either as it differs a lot from gfx card to gfx card sometimes the old glPointer calls are actually faster than the newer vertex attribs :S and vice versa.


    Seems atleast in that regard MH was right, still batching texture coordinates using the old code should gain us a bit extra umph so ill look into that.

  4. Ok tested and its indeed what caused the break on AMD cards :S

    vanilla claims to use the standard index attribs but i can now say that if thats true then why dont they work with generic vertex attributes ?.

    Example just to test out if i could use them without breaking something i started by replacing the glNormalPointer calls with vertex attributes.

    Result was broken normals and to top it off i also got the dreaded plastique look on normal maps :( so i tried a different index number and no matter which index number

    it still breaks even if i use the index number from draw_arb2.cpp normals which is 11 it still does not work.

    So i changed that single vertex attrib in draw_arb2.cpp to use glNormalPointer instead and it works just fine arrrrrgh.

    So this is a no go as it simply wont work. Theres also a rather nasty hack in draw_common.cpp where vanilla uses a normal as a texture coordinate for TG_DIFFUSE_CUBE Oo so figuring out the right index number here will be tons of fun i promise :P hint its not any number you can come up with hehe.

  5. Hmm i might misunderstand a few things so let me ask you have a special function -> SEED that batches things up into one large array ? or do i miss something.

    What i was talking about was batching vertex arrays to get rid of numerous texcoord calls by pulling everything together into one large array and render that.


    I allready did some work on that some time back but the engine i based my work on had a bug that made it run awfully slow on AMD cards so im afraid of pulling in my changes from that in case my work with batching was what broke it. Ill make a backup copy of my current engine and try out the changes i made back then, if it works you can say bye bye to the old opengl 1.1 arrays as the renderer will then use

    vertex attribs instead.

  6. aye got to avert state changes as much as possible.

    SEED sounds like a good idea and we can probably get some extra performance by ripping off BFG's thread functions also, i allready did a little work on threads in vanilla but its far from complete

    and atm only network and filesystem is threaded the renderer is not and would probably get a nice boost when on a multicore PC.

  7. Oh god i just talked to mh and its pretty much pointless as vanilla allready batches pretty much everything allready and the only fix atm is rewriting the whole thing to use GLSL as ARB ASM cannot use the needed optimizations or port the wwhole thing to the BFG engine :( you can follow the discussion here http://forums.inside3d.com/viewtopic.php?f=3&t=5576&p=54973#p54973

    the only other thing we could try is optimizing as much as possible of the rest of the code.

  8. mmm :) batch processing would save a ton of client calls, i dont have the magic down to an art yet but mh seems to be back at the inside3d forums and he has

    made the RMQ engine to use batch processing for a nice ammount of speed so it would probably not hurt to ask him for advise on doing this part.


    I hope he has the time to look at this :) hees a very experienced programmer.

  9. Btw might be worth it looking at doing batch proccesing, vanilla still uses the old opengl 1.1 vertex arrays but i played with replacing them with glVertexAttribPointer and to some extent it was working. My biggest problem was getting the right attribs in some places.

  10. It was originally a nvidia extemsion but got merged in to the ARB spec around opengl 2.2 i think.

    btw my implementation is better but it was not totally bug free i noticed when standing near a wall with player shadows on the shadow would sometimes cast outwards as a big black blob depending on the orientation i faced.

  11. Hmm ok ill see if i can explain it, basically using this you wont get hollow shadows when stepping inside an objects shadow (will clip the shadow looking rather strange)

    a bit like two pieces of black paper where one is either seen through the other or obscured by the other.


    final version of the depth renderer i hope but feel free to correct it ->


    static void RB_STD_DrawDepthBuffer(drawSurf_t **drawSurfs, int numDrawSurfs) {
    // if we are just doing 2D rendering, we dont draw the depth buffer image
    if (!backEnd.viewDef->viewEntitys) {
    // get screen size
    int width = tr.GetScreenWidth();
    int height = tr.GetScreenHeight();
    // we can use the depth buffer we already have in most cases
    if ((width = backEnd.viewDef->viewport.x2 - backEnd.viewDef->viewport.x1 + 1) &&
     (height = backEnd.viewDef->viewport.y2 - backEnd.viewDef->viewport.y1 + 1)) {
     backEnd.viewDef->viewport.x2 -
     backEnd.viewDef->viewport.x1 + 1,
     backEnd.viewDef->viewport.y2 -
     backEnd.viewDef->viewport.y1 + 1, true);
    else {
     // render depth to screen size
     GL_Viewport(0, 0, width, height);
     GL_Scissor(0, 0, width, height);
     glStencilFunc(GL_ALWAYS, 0, 255);
     // the first texture will be used for alpha tested surfaces
     RB_RenderDrawSurfListWithFunction(drawSurfs, numDrawSurfs, RB_T_FillDepthBuffer);
     // copy it to a texture
     glCopyTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 0, 0, width, height);
     // reset the window clipping
     GL_Viewport(tr.viewportOffset[0] + backEnd.viewDef->viewport.x1,
     tr.viewportOffset[1] + backEnd.viewDef->viewport.y1,
     backEnd.viewDef->viewport.x2 + 1 - backEnd.viewDef->viewport.x1,
     backEnd.viewDef->viewport.y2 + 1 - backEnd.viewDef->viewport.y1);
     GL_Scissor(tr.viewportOffset[0] + backEnd.viewDef->viewport.x1,
       tr.viewportOffset[1] + backEnd.viewDef->viewport.y1,
       backEnd.viewDef->viewport.x2 + 1 - backEnd.viewDef->viewport.x1,
       backEnd.viewDef->viewport.y2 + 1 - backEnd.viewDef->viewport.y1);
     // the current modelView matrix is not valid
     backEnd.currentSpace = NULL;
    // this is a gray scale image hence the GL_NONE


    call it in RB_STD_DrawView after RB_STD_FillDepthBuffer


    like so


    // fill the depth buffer and clear color buffer to black except on

    // subviews

    RB_STD_FillDepthBuffer(drawSurfs, numDrawSurfs);

    // render the depthbuffer image revelator.

    RB_STD_DrawDepthBuffer(drawSurfs, numDrawSurfs);

  12. btw test code here based on the exp renderer for drawing the depthmap.


    static void RB_STD_RenderViewDepthImage(void) {
    // we can use the depth buffer we already have in most cases
    if (backEnd.viewDef->viewEntitys) {
     backEnd.viewDef->viewport.x2 -
     backEnd.viewDef->viewport.x1 + 1,
     backEnd.viewDef->viewport.y2 -
     backEnd.viewDef->viewport.y1 + 1, true);
    else {
     // render the depth to the new size
     GL_Viewport(0, 0, glConfig.vidWidth, glConfig.vidHeight);
     GL_Scissor(0, 0, glConfig.vidWidth, glConfig.vidHeight);
     glStencilFunc(GL_ALWAYS, 0, 255);
     // the first texture will be used for alpha tested surfaces
     RB_RenderDrawSurfListWithFunction(backEnd.viewDef->drawSurfs, backEnd.viewDef->numDrawSurfs, RB_T_FillDepthBuffer);
     // copy it to a texture
     glCopyTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 0, 0, glConfig.vidWidth, glConfig.vidHeight);
     // reset the window clipping
     GL_Viewport(tr.viewportOffset[0] + backEnd.viewDef->viewport.x1,
     tr.viewportOffset[1] + backEnd.viewDef->viewport.y1,
     backEnd.viewDef->viewport.x2 + 1 - backEnd.viewDef->viewport.x1,
     backEnd.viewDef->viewport.y2 + 1 - backEnd.viewDef->viewport.y1);
     GL_Scissor(tr.viewportOffset[0] + backEnd.viewDef->viewport.x1,
       tr.viewportOffset[1] + backEnd.viewDef->viewport.y1,
       backEnd.viewDef->viewport.x2 + 1 - backEnd.viewDef->viewport.x1,
       backEnd.viewDef->viewport.y2 + 1 - backEnd.viewDef->viewport.y1);
     // the current modelView matrix is not valid
     backEnd.currentSpace = NULL;


    seems to work ok but i noticed that it slows down the engine a bit even if i dont draw anything.

