filetermreader.c

Go to the documentation of this file.
00001 /*
00002  * Copyright (C) 2006  SRI International
00003  *
00004  * This library is free software; you can redistribute it and/or
00005  * modify it under the terms of the GNU Lesser General Public
00006  * License as published by the Free Software Foundation; either
00007  * version 2.1 of the License, or (at your option) any later version.
00008  *
00009  * This library is distributed in the hope that it will be useful,
00010  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00012  * Lesser General Public License for more details.
00013  *
00014  * You should have received a copy of the GNU Lesser General Public
00015  * License along with this library; if not, write to the Free Software
00016  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
00017  *
00018  * SRI International: 333 Ravenswood Ave, Menlo Park, CA 94025
00019  */
00020 
00021 #ifdef _WINDOWS
00022 #include <windows.h>
00023 #include <winsock.h>
00024 #include "oaa-windows.h"
00025 #else
00026 #include <sys/types.h>
00027 #include <sys/socket.h>
00028 #include <unistd.h>     /* close        */
00029 #include <sys/time.h>   /* For polling time */
00030 #include <sys/stat.h>
00031 #include <fcntl.h>
00032 #endif
00033 
00034 #include "stdpccts.h"
00035 
00036 #include <string.h>
00037 #include <stdlib.h>
00038 #include <math.h>
00039 #include <stdio.h>
00040 #include "liboaa.h"
00041 
00042 #include "filetermreader.h"
00043 #include "libicl_private.h"
00044 #include "stringbuffer.h"
00045 #include "stringbuffer_private.h"
00046 
00047 #ifdef _WINDOWS
00048 // VC++ does not support const ints inside of array
00049 // sizes. The following is an "enum hack" to get around
00050 // this bug.
00051 enum { INITBUFFERSZ = 8388608 };
00052 enum { AMOUNTTOREAD = 8192 };
00053 #else
00054 static const size_t INITBUFFERSZ = 8;
00055 static const size_t AMOUNTTOREAD = 8192;
00056 #endif
00057 
00058 struct FileTermReaderStruct
00059 {
00060   TermReader* superReader;
00061   char* currentBuffer;
00062   size_t bufCapacity;
00063   size_t bufUsed;
00064   FILE* fileHandle;
00065   stringbuffer_filter_t filterFunction;
00066   void* filterState;
00067 }
00068 ;
00069 
00070 struct FilterPercentStateStruct
00071 {
00072   int currentlyDiscarding;
00073   int discardNewlineIfNext;
00074 };
00075 
00076 void fileTermReader_cleanup(TermReader*);
00077 ICLTerm* fileTermReader_getNext(TermReader*, double);
00078 void fileTermReader_fillData(FileTermReader* sr);
00079 void fileTermReader_addToCurrentBuffer(FileTermReader* sr, char* buf, ssize_t len);
00080 
00081 static const char NEWLINE_CHAR = '\n';
00082 static const char CARRIAGE_RETURN = '\r'; 
00083 static const char PERCENT = '%';
00084 
00085 FilterPercentState* filterPercent_createState()
00086 {
00087   FilterPercentState* result = (FilterPercentState*)malloc(sizeof(FilterPercentState));
00088   if(result) {
00089     memset(result, 0, sizeof(FilterPercentState));
00090   }
00091 
00092   return result;
00093 }
00094 
00095 int* filterPercent_filter(void* voidState, char toFilter, int* spaceForChar)
00096 {
00097   FilterPercentState* state = (FilterPercentState*)voidState;
00098   if(state->currentlyDiscarding) {
00099     if(toFilter == NEWLINE_CHAR || toFilter == CARRIAGE_RETURN) {
00100       /*
00101       printf("Reached end of line\n");
00102       */
00103       state->currentlyDiscarding = FALSE;
00104     }
00105     else {
00106       /*
00107       printf("Discarding %i\n", toFilter);
00108       */
00109     }
00110     return NULL;
00111   }
00112   else if(toFilter == PERCENT) {
00113     /*
00114     printf("Found percent\n");
00115     */
00116     state->currentlyDiscarding = TRUE;
00117     return NULL;
00118   }
00119   else {
00120     /*
00121     printf("Not filtering %i\n", toFilter);
00122     */
00123     *spaceForChar = toFilter;
00124     return spaceForChar;
00125   }
00126 }
00127 
00128 FileTermReader* fileTermReader_create(TermReader* t, FILE* fileHandle)
00129 {
00130   FileTermReader* r = (FileTermReader*)malloc(sizeof(FileTermReader));
00131   memset(r, 0, sizeof(FileTermReader));
00132   r->superReader = t;
00133   r->currentBuffer = (char*)malloc(INITBUFFERSZ);
00134   r->bufUsed = 0;
00135   r->bufCapacity = INITBUFFERSZ;
00136   /*
00137    * printf("File handle pointer = %p\n", fileHandle);
00138    */
00139   r->fileHandle = fileHandle;
00140   termReader_setReaderSpecificData(t, r);
00141   termReader_setType(t, FILETERMREADERTYPE);
00142   termReader_setGetNextCallback(t, fileTermReader_getNext);
00143   termReader_setCleanupCallback(t, fileTermReader_cleanup);
00144   termReader_setError(t, TERMREADER_OKAY);
00145   return r;
00146 }
00147 
00148 FileTermReader* fileTermReader_createWithFilter(TermReader* t, FILE* fileHandle, stringbuffer_filter_t filterFunction, void* filterState)
00149 {
00150   FileTermReader* r = (FileTermReader*)malloc(sizeof(FileTermReader));
00151   memset(r, 0, sizeof(FileTermReader));
00152   r->superReader = t;
00153   r->currentBuffer = (char*)malloc(INITBUFFERSZ);
00154   r->bufUsed = 0;
00155   r->bufCapacity = INITBUFFERSZ;
00156   r->fileHandle = fileHandle;
00157   r->filterFunction = filterFunction;
00158   r->filterState = filterState;
00159   termReader_setReaderSpecificData(t, r);
00160   termReader_setType(t, FILETERMREADERTYPE);
00161   termReader_setGetNextCallback(t, fileTermReader_getNext);
00162   termReader_setCleanupCallback(t, fileTermReader_cleanup);
00163   termReader_setError(t, TERMREADER_OKAY);
00164   return r;
00165 }
00166 
00167 void fileTermReader_cleanup(TermReader* reader)
00168 {
00169   FileTermReader* sr = (FileTermReader*)termReader_getReaderSpecificData(reader);
00170   if(sr->currentBuffer != NULL) {
00171     free(sr->currentBuffer);
00172   }
00173   if(sr != NULL) {
00174     free(sr);
00175   }
00176 }
00177 
00178 ICLTerm* fileTermReader_getNext(TermReader* reader, double unusedTimeout)
00179 {
00180   ICLTerm* nextEvent = NULL;
00181   stringbuffer_t sbuf;
00182   gboolean done = FALSE;
00183   gboolean hadOldData = FALSE;
00184   gboolean forceFill = FALSE;
00185   int lookedAhead = 0;
00186   FileTermReader* sr = (FileTermReader*)termReader_getReaderSpecificData(reader);
00187   unusedTimeout = unusedTimeout;
00188   
00189   CHECK_LEAKS();
00190   /*
00191   printf("fileTermReader_getNext\n");
00192   */
00193   while(!done) {
00194     if((sr->bufUsed == 0) || forceFill) {
00195       /*
00196       printf("fileTermReader_getNext calling fillData\n");
00197       */
00198       fileTermReader_fillData(sr);
00199     }
00200     else {
00201       /*
00202       printf("fileTermReader_getNext hadOldData\n");
00203       */
00204       forceFill = FALSE;
00205       hadOldData = TRUE;
00206     }
00207 
00208     /*
00209     printf("fileTermReader_getNext bufUsed = %li\n", sr->bufUsed);
00210     {
00211       unsigned int i;
00212       printf("fileTermReader_getNext buffer contents: ");
00213       for(i = 0; i < sr->bufUsed; ++i) {
00214         printf(" %o ", sr->currentBuffer[i]);
00215       }
00216       printf("\n");
00217 
00218       {
00219         char* data = (char*)malloc(sr->bufUsed + 1);
00220         memset(data, 0, sr->bufUsed + 1);
00221         memcpy(data, sr->currentBuffer, sr->bufUsed);
00222         printf("fileTermReader_getNext current buffer contains [%s]\n", data);
00223         free(data);
00224       }
00225     }
00226     parser_setDebug(TRUE);
00227     */
00228 
00229     sbuf.data = sr->currentBuffer;
00230     sbuf.len = sr->bufUsed;
00231     sbuf.index = 0;
00232     if(sr->filterFunction) {
00233       /*
00234       {
00235         char* data = (char*)malloc(sr->bufUsed + 1);
00236         memset(data, 0, sr->bufUsed + 1);
00237         memcpy(data, sr->currentBuffer, sr->bufUsed);
00238         fprintf(stderr, "Filtering [%s]\n", data);
00239         free(data);
00240       }
00241       */
00242       (void)stringbuffer_filter(sr->filterState, sr->filterFunction, &sbuf);
00243       sr->bufUsed = sbuf.len;
00244     }
00245     CHECK_LEAKS();
00246     if((sr->bufUsed > 0) &&
00247        (parser_getTermFromBuf(&nextEvent, &sbuf, &lookedAhead) != FALSE)) {
00248       size_t newUsed;
00249       size_t read = stringbuffer_getIndex(&sbuf);
00250       /*
00251       printf("looked ahead %i\n", lookedAhead);
00252       */
00253       read -= lookedAhead;
00254       CHECK_LEAKS();
00255       newUsed = sr->bufUsed - read;
00256       sr->currentBuffer = memmove(sr->currentBuffer,
00257                                   sr->currentBuffer + read,
00258                                   newUsed);
00259       sr->bufUsed = newUsed;
00260       sr->currentBuffer[sr->bufUsed] = '\0';
00261       done = TRUE;
00262 
00263       /*
00264       {
00265         char* ds = icl_NewStringFromTerm(nextEvent);
00266         printf("fileTermReader_getNext looks good from file:  got term %s\n", ds);
00267         icl_stFree(ds);
00268         {
00269           char* data = (char*)malloc(sr->bufUsed + 1);
00270           memset(data, 0, sr->bufUsed + 1);
00271           memcpy(data, sr->currentBuffer, sr->bufUsed);
00272           printf("fileTermReader_getNext current buffer contains [%s]\n", data);
00273           free(data);
00274         }
00275       }
00276       */
00277 
00278       parser_setDebug(FALSE);
00279       CHECK_LEAKS();
00280       return nextEvent;
00281     }
00282     CHECK_LEAKS();
00283     parser_setDebug(FALSE);
00284 
00285     if(termReader_getError(reader) != TERMREADER_OKAY) {
00286       /*
00287       printf("fileTermReader_getNext termReader_getError != TERMREADER_OKAY\n");
00288       */
00289       CHECK_LEAKS();
00290       return NULL;
00291     }
00292     else if(hadOldData) {
00293       /*
00294       printf("fileTermReader_getNext hadOldData, now forceFill\n");
00295       */
00296       forceFill = TRUE;
00297       hadOldData = FALSE;
00298       CHECK_LEAKS();
00299       continue;
00300     }
00301     else {
00302       /*
00303       printf("fileTermReader_getNext just continuing\n");
00304       */
00305       CHECK_LEAKS();
00306       continue;
00307     }
00308   }
00309   /*
00310   printf("fileTermReader_getNext NULL return\n");
00311   */
00312   CHECK_LEAKS();
00313   return NULL;
00314 }
00315 
00316 void fileTermReader_fillData(FileTermReader* sr)
00317 {
00318   char buf[AMOUNTTOREAD + 1];
00319   ssize_t numBytes = 0;
00320   int nextChar = EOF;
00321   size_t i;
00322 
00323   for(i = 0; i < AMOUNTTOREAD; ++i) {
00324     nextChar = fgetc(sr->fileHandle);
00325     if(nextChar == EOF) {
00326       break;
00327     }
00328     else {
00329       buf[i] = (char)nextChar;
00330       ++numBytes;
00331     }
00332   }
00333 
00334   /*
00335   printf("Filled with %li bytes\n", numBytes);
00336   */
00337 
00338   buf[numBytes] = '\0';
00339 
00340   if(numBytes == 0) {
00341     termReader_setError(sr->superReader, TERMREADER_EOF);
00342     return;
00343   }
00344 
00345   fileTermReader_addToCurrentBuffer(sr, buf, numBytes);
00346   termReader_setError(sr->superReader, TERMREADER_OKAY);
00347 }
00348 
00349 void fileTermReader_addToCurrentBuffer(FileTermReader* sr, char* buf, ssize_t len)
00350 {
00351   // Check our current buffer capacity--double or halve it as appropriate
00352   // Actually, I don't know if halving it is efficient or not...
00353   size_t needCapacity = len + sr->bufUsed + 1;
00354 
00355   if(needCapacity > sr->bufCapacity) {
00356     while(sr->bufCapacity < needCapacity) {
00357       sr->bufCapacity *= 2;
00358     }
00359     sr->currentBuffer = realloc(sr->currentBuffer, sr->bufCapacity);
00360   }
00361   else if(needCapacity < (sr->bufCapacity / 2)) {
00362     sr->bufCapacity /= 2;
00363     sr->currentBuffer = realloc(sr->currentBuffer, sr->bufCapacity);
00364   }
00365   else {
00366     // Just leave it alone
00367   }
00368 
00369   memcpy(sr->currentBuffer + sr->bufUsed, buf, len);
00370   sr->bufUsed += len;
00371   sr->currentBuffer[sr->bufUsed] = '\0';
00372 }

Generated on Wed May 23 17:20:10 2007 using doxygen 1.5.2