// Show me something else than C++
#include "StdAfx.h"
//===============================================
// (c) 2002-2003 Angel Sinigersky, Karlsruhe
//===============================================
//===============================================
// Created: 16.04.2002
//===============================================
//_________________________________
//
// Standard includes
//
#include <stdlib.h>
#include <ctype.h>
#include <assert.h>
//_________________________________
//
// own header
//
#include "StringProcessingUtils.h"
//_________________________________
using namespace std;
//=================================================================================================
#define MAX_LINE_ENDINGS 4
static const char *pcLineEndings[ MAX_LINE_ENDINGS ] =
{
"\n\r", // two-byte endings must be first in this list!
"\r\n", // two-byte endings must be first in this list!
"\n",
"\r",
};
// Splits the given buffer into a list of lines. Line separator can be
// one of the sequences: "\r", "\n", "\r\n", "\n\r". Sequences like
// "\r\r" or "\n\n" are treated as empty lines; an empty string is inserted
// in the list. If the input buffer has the length null, the count of lines
// is assumed 0. Any new-line sequences at the end of the buffer are ignored.
// The new-line sequences are stripped & not written to the output.
// Returns the count of the lines in the list.
int SplitBuffIntoLines( const char *pcBuffer, // buffer to split in lines
vector<string>& vsList, // list with strings for each line to be filled
const int iInBuffLen /*= -1*/ // length of input buffer or -1 if 0-ended string
)
{
assert( pcBuffer != NULL );
int iSrcLen;
if( iInBuffLen >= 0 )
iSrcLen = iInBuffLen; // length of the source buffer is given
else
iSrcLen = strlen( pcBuffer ); // determine length of the source buffer
int iCurrPos = 0; // index of the current position in source
vsList.resize( 0 ); // clean the previous contents of the list
if( iSrcLen == 0 )
return 0; // buffer has length 0, no lines are put in the list
while( iCurrPos < iSrcLen )
{
int iEnding, iEndingType;
const char *pcCurrPos = &pcBuffer[ iCurrPos ]; // const pointer to current position
int iAllEndingPos[ MAX_LINE_ENDINGS ]; // positions of all line-break types
int iEndingPos = -1; // the actual line ending position
// search for occurrences of all endings:
for( iEnding = 0; iEnding < MAX_LINE_ENDINGS; iEnding++ )
{ // search for a line ending sequence
const char *pcThisEndingPos = strstr( pcCurrPos, pcLineEndings[ iEnding ] );
if( pcThisEndingPos == NULL )
iAllEndingPos[ iEnding ] = -1;
else
iAllEndingPos[ iEnding ] = pcThisEndingPos - pcCurrPos; // store index in the pcCurrPos-array
} // for
// search for the ending with the smallest address:
for( iEnding = 0; iEnding < MAX_LINE_ENDINGS; iEnding++ )
{ // search for the first line ending sequence
if( iAllEndingPos[ iEnding ] == -1 )
continue; // this one is not used
if( iEndingPos == -1 )
{ // the first accurence that is != -1
iEndingPos = iAllEndingPos[ iEnding ];
iEndingType = iEnding; // store the type of the ending
} // if
else
{
if( iEndingPos > iAllEndingPos[ iEnding ] )
{
iEndingPos = iAllEndingPos[ iEnding ];
iEndingType = iEnding; // store the type of the ending
} // if
} // else
} // for
if( iEndingPos == -1 )
{ // this was the last line...
vsList.push_back( pcCurrPos );
break;
} // if
// save this line:
string sLine; // a string for one line
sLine.assign( pcCurrPos, iEndingPos ); // store line contents
vsList.push_back( sLine ); // add string to the list with rows
// move the current pointer to the char right after the line break:
iCurrPos += iEndingPos + strlen( pcLineEndings[ iEndingType ] );
} // while
return vsList.size();
}
//=================================================================================================
// Splits a line into items and stores them into a vector with strings;
// the given separator character is used to recognize items;
// returns the count of the items in the list
int SplitLineIntoList( const char *pcLine, // line to be processed
vector<string>& vsList, // target list to be filled
const bool fTrimEachItem /*= false*/, // flag if every item should be trimmed before storage
const char cSeparator, /*= '\t'*/ // char to be used as separator of list items
const bool fSkipEmptyItems /*= false*/ // flag if empty strings are to be inserted into the list
)
{
assert( pcLine != NULL );
int iItemsCnt = 0; // counter for items in the list
int iStartIdx = 0, // index marker for start of a new item
iIdx = 0; // running index for source
const int iSrcLen = strlen( pcLine ); // length of the source line
string sTmp; // string for temporary storage
vsList.resize( 0 ); // clean the previous contents of the list
if( iSrcLen == 0 )
return 0; // zero-length string passed to us
while( pcLine[ iIdx ] != '\0' && iIdx < iSrcLen )
{
if( pcLine[ iIdx ] == cSeparator )
{ // found a separator!
sTmp = ""; // reset temp string
sTmp.insert( 0, &pcLine[ iStartIdx ], iIdx - iStartIdx ); // extract sub-string from input
iStartIdx = iIdx + 1; // store the current index
if( fTrimEachItem )
TrimStr( sTmp );
if( fSkipEmptyItems )
{ // flag to skip the empty lines is given
if( !sTmp.empty() )
vsList.push_back( sTmp ); // save the extracted string to the list
} // if
else
{ // ignore check if line is empty
vsList.push_back( sTmp ); // save the extracted string to the list
} // else
} // if
iIdx ++; // increase position counter
} // while
// save the last string:
sTmp = ""; // reset temp string
sTmp.insert( 0, &pcLine[ iStartIdx ], iIdx - iStartIdx ); // extract sub-string from input
if( fTrimEachItem )
TrimStr( sTmp );
if( fSkipEmptyItems )
{ // flag to skip the empty lines is given
if( !sTmp.empty() )
vsList.push_back( sTmp ); // save the extracted string to the list
} // if
else
{ // ignore check if line is empty
vsList.push_back( sTmp ); // save the extracted string to the list
} // else
return vsList.size();
}
//=================================================================================================
// removes any whitespace characters from the beginning and at the end of the string
void TrimStr( string& sString )
{
int iLen, i;
iLen = sString.length();
for ( i = iLen - 1; i >= 0; i-- )
{
if( !isspace( sString[ i ] ) )
{
iLen = i + 1;
sString.erase( iLen );
break;
} // if
} // end for
for( i = 0; i < iLen; i++ )
{
if( !isspace( sString[ i ] ) )
{
if( i > 0 )
sString.erase( 0, i );
break;
} // if
} // end for
if( isspace( sString[0] ) )
sString = "";
}
//=================================================================================================
// removes any whitespace characters from the beginning and at the end of the string
void TrimStr( char* pcString )
{
int iLen, i;
iLen = strlen( pcString );
for ( i = iLen-1; i >= 0; i--)
{
if( !isspace( pcString[ i ] ) )
{
iLen = i + 1;
pcString[ iLen ] = '\0';
break;
} // if
} // end for
for( i = 0; i < iLen; i++ )
{
if( !isspace( pcString[i] ) )
{
if( i > 0 )
memmove( pcString, &pcString[i], iLen - i+1 );
break;
} // if
} // end for
if( pcString[0] == ' ' )
pcString[0] = '\0';
}
//=================================================================================================
// converts (via copy) a "char **" list to a vector with string's
void CopyList( const char **ppcList, // source list containig strings
const unsigned int uiItemsCount, // count of strings in the source array
vector<string>& vsList // target list to be filled
)
{
unsigned int ui;
assert( ppcList != NULL );
vsList.resize( uiItemsCount ); // reset the target list of strings
for( ui = 0; ui < uiItemsCount; ui++ )
{
assert( ppcList[ ui ] != NULL );
vsList[ ui ] = ppcList[ ui ];
} // for
}
//=================================================================================================