« Previous Next »

Thread: Minor Enhancement to Microsoft's STS-to-CSV Parser C++ Console App

Last post 01-05-2006 3:54 PM by LogParser User : Mark Vogt. 0 replies.

Average Rating Rate It (5)

RSS

Page 1 of 1 (1 items)

Sort Posts:

  • 01-05-2006, 3:54 PM

    Minor Enhancement to Microsoft's STS-to-CSV Parser C++ Console App

    Greetings all,

    I recently started working with Logparser 2.2, and needed it to process both IIS logs and STS logs. As many of you might have guessed, the IIS logs processed quite nicely, using Logparser 2.2's IISW3C plugin to parse them.

    The STS logs were another matter - no obvious Logparser plugin to aid me... but I did come across a semi-popular article at Microsoft:

    http://msdn.microsoft.com/library/default.asp?url=/library/en-us/odc_SP2003_ta/html/ODC_WSSUsageEventLogging.asp

    The author - Radu Rusu - does a fine job of providing bulletproof C++ sample code and excellent instructions. His STSlog-to-CSV Transformer console application compiled, built and worked precisely as he described...

    But his output file had no header line of column names in it - something CSV files really benefit from in order to be completely useful.

    I added that extra code to his snippet, and offer it here for everyone to use. Your end result will be a Windows Console App that can be added to a BAT file used to automate processing of both IIS logs and STS logs.

    Cheers, and thanks again to Radu for an excellent article...

    - MV

    BEGIN CODE ===============v
    #include "stdafx.h"
    #include "windows.h"
    #include "assert.h"
    #include <stdio.h>
    typedef struct _VLogFileEntry
    {
        // Point to previous entry
        struct _VLogFileEntry *pPrev;  
        unsigned char  bitFlags;
        // Number of bytes to skip ahead to next entry
        unsigned short cbEntry;
        unsigned short cbSiteUrl;
        unsigned short cbWeb;
        unsigned short cbDoc;
        // Bandwidth consumed (bytes in + bytes out)
        unsigned long cBytes;
        unsigned short httpStatus;
        unsigned short cbUser;
        unsigned short cbQS;
        unsigned short cbRef;
        signed short  cbUAS;
        signed long  reserved;
    } VLogFileEntry;
    int main(int argc, char* argv[])
    {
        bool fError = FALSE;
        if (argc < 3)
        {
            printf(
           "\nUsage: %s wsslogfile csvfile optionalField1 optionalField2",  argv[0]);
            return(1);
        }
      
        char *szFile = argv[1];
        char *szCsvFile = argv[2];
        char *szOptionalField1 = argc > 3 ? argv[3] : NULL;
        char *szOptionalField2 = argc > 4 ? argv[4] : NULL;
        char *szGuid = NULL;
        char *szReplace = NULL;
       
        /* Format of each CSV line. Include optional fields
        passed as command line arguments, if any*/
        char *szFormat = "%s,%s,%s,%s,%s,%s,%s,%s\r\n";
        if (NULL == szOptionalField1)
            szFormat += 3;
        if (NULL == szOptionalField2)
            szFormat += 3;
       
        FILE *csvFile = fopen( szCsvFile, "a");
        // Bytes (with no braces)
        static const unsigned long cbSiteGuid  = 36;
        static const unsigned short cbTimeStamp = 8;
        printf("\r\nParsing %s to %s \r\n",  szFile, szCsvFile);
        char *pBase, *pEnd;
        HANDLE hF, hFM;
        if ((hF = CreateFileA(
                szFile,
                GENERIC_READ,
                0,
                NULL,
                OPEN_EXISTING,
                FILE_ATTRIBUTE_NOT_CONTENT_INDEXED,
                NULL)) == INVALID_HANDLE_VALUE)
        {
            printf(
                "Can't open file %s (perhaps because it doesn't exist)",
            szFile);
            return (1);
        }
        DWORD dwFileSize, dwFileSizeHigh = 0;
        dwFileSize = GetFileSize(hF, &dwFileSizeHigh);
        /* We should never encounter a file larger than about 1 GB */
        if (dwFileSizeHigh || dwFileSize > 1000000000)
        {
            printf(" File too large %s", szFile);
            CloseHandle(hF);
            return (1);
        }
        if (dwFileSize == 0)
        {
            printf(" Skipping empty file %s", szFile);
            CloseHandle(hF);
            return (1);
        }
        hFM = CreateFileMapping(hF, NULL, PAGE_WRITECOPY, 0, 0, NULL);
        if (NULL == hFM ||
    NULL == (pBase = (char *)MapViewOfFile(hFM, FILE_MAP_COPY, 0, 0, 0)))
        {
            printf(" Can't map file %s", szFile);   
            if (hFM)
                CloseHandle(hFM);
            CloseHandle(hF);
            return (1);
        }
        pEnd = pBase + dwFileSize - sizeof(VLogFileEntry);
        char *pCur, *pszSite, *pszSiteGuid, *pszTS;
        char *pszWeb, *pszDoc, *pszUser;
        VLogFileEntry *pLFE;
        unsigned long cItemsProcessed = 0;
        unsigned long cbEntrySize = 0;
        const unsigned long maxCbEntrySize = 2048;
      //MV: Start output CSV file with header (column) labels - very useful in future...
     fprintf(csvFile, szFormat,
                "Timestamp",
                "SiteGUID",
                "SiteURL",
                "Subsite",
                "Document",
                "User",
                "Optional1",
                "Optional2");
     //MV: Now begin looping through all the logfile entries, entering them into the output file...
        for(pCur = pBase;
            pCur < pEnd;
            pCur += cbEntrySize)
        {
            pLFE = (VLogFileEntry *)pCur;
            cbEntrySize = sizeof(VLogFileEntry) \
                + cbSiteGuid + cbTimeStamp + 2 \
                + pLFE->cbSiteUrl +  pLFE->cbWeb +  pLFE->cbDoc \
                + pLFE->cbUser +  pLFE->cbQS  +  pLFE->cbRef \
                + pLFE->cbUAS  + 9;   //7 NULLs and 2 bytes for \r\n
            // Check for corrupt log files
            fError  = (cbEntrySize > maxCbEntrySize ||
            !(*(pCur + sizeof(VLogFileEntry)) == '\r') ||
            !(*(pCur +  sizeof(VLogFileEntry) + 1) == '\n') ||
            !(pLFE->cbEntry == cbEntrySize));
            if (fError)
            {
                printf("Error reading Wss log file, aborting.\n");
                goto cleanup;
            }
            // Skip  2 bytes for \r\n
            pszSiteGuid = pCur + sizeof(VLogFileEntry) + 2;
            // Skip 1 byte for the NULL separator
            pszTS = pszSiteGuid + cbSiteGuid + 1;
            pszSite = pszTS + cbTimeStamp + 1;
            // Stop at the end of the site url
            *(pszSite + pLFE->cbSiteUrl) = '\0';
            // Skip 1 byte for the NULL separator
            pszWeb = pszSite + pLFE->cbSiteUrl + 1;
            pszDoc  = pszWeb + pLFE->cbWeb + 1;
            pszUser = pszDoc + pLFE->cbDoc + 1;
         
            /* Output is in the format: timestamp, site guid, siteUrl,
             subsite, document, user, optional1, optional2*/
            fprintf(csvFile, szFormat,
                        pszTS,
                        pszSiteGuid,
                        pszSite,
                        pszWeb,
                        pszDoc,
                        pszUser,
                        szOptionalField1,
                        szOptionalField2);
        }
        cleanup:
        UnmapViewOfFile(pBase);
        CloseHandle(hFM);
        CloseHandle(hF);
        fclose(csvFile);
        return fError;
    }
    END CODE =================^
Page 1 of 1 (1 items)
Microsoft Communities