#ifndef __REGEXP_H # define __REGEXP_H /***/ # ifdef WNT # ifdef UNICODE_API # ifndef _UNICODE # define _UNICODE # endif /* _UNICODE */ # endif /* UNICODE_API */ /***/ # ifndef _INC_TCHAR # include # endif /* _INC_TCHAR */ /***/ # ifndef _INC_STDLIB # include # endif /* _INC_STDLIB */ /***/ # ifndef _INC_MALLOC # include # endif /* _INC_MALLOC */ /***/ # ifndef _INC_LIMITS # include # endif /* _INC_LIMITS */ # ifndef _INC_STRING # include # endif /* _INC_STRING */ /***/ # if !defined(HAVE_NO_DLL) # ifdef __WOKUnix_DLL # define REGEXP_API __declspec( dllexport ) # else # define REGEXP_API __declspec( dllimport ) # endif /* __WOKUnix_DLL */ # else # define REGEXP_API # endif # else # define _TEXT( arg ) arg # define REGEXP_API # include # include typedef char _TCHAR; typedef char TCHAR; typedef unsigned char _TUCHAR; # endif /* WNT */ /***/ /******************************************************************************/ /* Definitions for data structures callers pass the regex library. */ /* Copyright (C) 1985 Free Software Foundation, Inc. */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU Lesser General Public */ /* License as published by the Free Software Foundation; either */ /* version 2.1 of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU Lesser General Public License for more details. */ /* */ /* You should have received a copy of the GNU Lesser General Public */ /* License along with the GNU C Library; if not, write to the Free */ /* Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA */ /* 02111-1307 USA. */ /******************************************************************************/ /******************************************************************************/ /* Modified by EUG ( MATRA Datavision ) for Windows NT and UNICODE ( 1996 ) */ /******************************************************************************/ /***/ /******************************************************************************/ /* Define number of parens for which we record the beginnings and ends. */ /* This affects how much space the `struct re_registers' type takes up. */ /******************************************************************************/ #ifndef RE_NREGS # define RE_NREGS 10 #endif /***/ /******************************************************************************/ /* */ /* width of a byte in bits */ /* */ /******************************************************************************/ /***/ #ifdef _UNICODE # define BYTEWIDTH ( CHAR_BIT * sizeof ( TCHAR ) ) # define CHAR_MASK USHRT_MAX #else # define BYTEWIDTH CHAR_BIT # define CHAR_MASK UCHAR_MAX #endif /* _UNICODE */ /***/ /******************************************************************************/ /* These bits are used in the obscure_syntax variable to choose among */ /* alternative regexp syntaxes. */ /* */ /* 1 means plain parentheses serve as grouping, and backslash */ /* parentheses are needed for literal searching. */ /* 0 means backslash-parentheses are grouping, and plain parentheses */ /* are for literal searching. */ /******************************************************************************/ #define RE_NO_BK_PARENS 1 /******************************************************************************/ /* 1 means plain | serves as the "or"-operator, and \| is a literal. */ /* 0 means \| serves as the "or"-operator, and | is a literal. */ /******************************************************************************/ #define RE_NO_BK_VBAR 2 /******************************************************************************/ /* 0 means plain + or ? serves as an operator, and \+, \? are literals. */ /* 1 means \+, \? are operators and plain +, ? are literals. */ /******************************************************************************/ #define RE_BK_PLUS_QM 4 /******************************************************************************/ /* 1 means | binds tighter than ^ or $. */ /* 0 means the contrary. */ /******************************************************************************/ #define RE_TIGHT_VBAR 8 /******************************************************************************/ /* 1 means treat \n as an _OR operator */ /* 0 means treat it as a normal character */ /******************************************************************************/ #define RE_NEWLINE_OR 16 /******************************************************************************/ /* 0 means that a special characters (such as *, ^, and $) always have */ /* their special meaning regardless of the surrounding context. */ /* 1 means that special characters may act as normal characters in some */ /* contexts. Specifically, this applies to: */ /* ^ - only special at the beginning, or after ( or | */ /* $ - only special at the end, or before ) or | */ /* *, +, ? - only special when not after the beginning, (, or | */ /******************************************************************************/ #define RE_CONTEXT_INDEP_OPS 32 /******************************************************************************/ /* Now define combinations of bits for the standard possibilities. */ /******************************************************************************/ #define RE_SYNTAX_AWK ( RE_NO_BK_PARENS | RE_NO_BK_VBAR | RE_CONTEXT_INDEP_OPS ) #define RE_SYNTAX_EGREP ( RE_SYNTAX_AWK | RE_NEWLINE_OR ) #define RE_SYNTAX_GREP ( RE_BK_PLUS_QM | RE_NEWLINE_OR ) #define RE_SYNTAX_EMACS 0 /******************************************************************************/ /* This data structure is used to represent a compiled pattern. */ /******************************************************************************/ typedef struct re_pattern_buffer { _TCHAR* buffer; /* Space holding the compiled pattern commands. */ int allocated; /* Size of space that buffer points to */ int used; /* Length of portion of buffer actually occupied */ _TCHAR* fastmap; /* Pointer to fastmap, if any, or zero if none. */ /* re_search uses the fastmap, if there is one, */ /* to skip quickly over totally implausible characters */ _TCHAR* translate; /* Translate table to apply to all characters before */ /* comparing. Or zero for no translation. The translation */ /* is applied to a pattern when it is compiled and to */ /* data when it is matched. */ _TCHAR fastmap_accurate; /* Set to zero when a new pattern is stored, */ /* set to one when the fastmap is updated from it. */ _TCHAR can_be_null; /* Set to one by compiling fastmap */ /* if this pattern might match the null string. */ /* It does not necessarily match the null string */ /* in that case, but if this is zero, it cannot. */ /* 2 as value means can match null string */ /* but at end of range or before a character */ /* listed in the fastmap. */ } RE_PATTERN_BUFFER, *PRE_PATTERN_BUFFER; /******************************************************************************/ /* Structure to store "register" contents data in. */ /* Pass the address of such a structure as an argument to re_match, etc., */ /* if you want this information back. */ /* */ /* start[i] and end[i] record the string matched by \( ... \) grouping i, */ /* for i from 1 to RE_NREGS - 1. */ /* start[0] and end[0] record the entire string matched. */ /******************************************************************************/ typedef struct re_registers { int start[ RE_NREGS ]; int end[ RE_NREGS ]; } RE_REGISTERS, *PRE_REGISTERS; /******************************************************************************/ /* These are the command codes that appear in compiled regular expressions, */ /* one per byte. Some command codes are followed by argument bytes. */ /* A command code can specify any interpretation whatever for its arguments. */ /* Zero-bytes may appear in the compiled regular expression. */ /******************************************************************************/ enum regexpcode { unused, exactn, /* followed by one byte giving n, and then by n literal bytes */ begline, /* fails unless at beginning of line */ endline, /* fails unless at end of line */ jump, /* followed by two bytes giving relative address to jump to */ on_failure_jump, /* followed by two bytes giving relative address of place */ /* to resume at in case of failure. */ finalize_jump, /* Throw away latest failure point and then jump to address. */ maybe_finalize_jump, /* Like jump but finalize if safe to do so. */ /* This is used to jump back to the beginning */ /* of a repeat. If the command that follows */ /* this jump is clearly incompatible with the */ /* one at the beginning of the repeat, such that */ /* we can be sure that there is no use backtracking */ /* out of repetitions already completed, */ /* then we finalize. */ dummy_failure_jump, /* jump, and push a dummy failure point. */ /* This failure point will be thrown away */ /* if an attempt is made to use it for a failure. */ /* A + construct makes this before the first repeat. */ anychar, /* matches any one character */ charset, /* matches any one char belonging to specified set. */ /* First following byte is # bitmap bytes. */ /* Then come bytes for a bit-map saying which chars */ /* are in. */ /* Bits in each byte are ordered low-bit-first. */ /* A character is in the set if its bit is 1. */ /* A character too large to have a bit in the map */ /* is automatically not in the set */ charset_not, /* similar but match any character that is NOT one of */ /* those specified */ start_memory, /* starts remembering the text that is matched */ /* and stores it in a memory register. */ /* followed by one byte containing the register number. */ /* Register numbers must be in the range 0 through */ /* NREGS. */ stop_memory, /* stops remembering the text that is matched */ /* and stores it in a memory register. */ /* followed by one byte containing the register number. */ /* Register numbers must be in the range 0 through */ /* NREGS. */ duplicate, /* match a duplicate of something remembered. */ /* Followed by one byte containing the index of the */ /* memory register. */ before_dot, /* Succeeds if before dot */ at_dot, /* Succeeds if at dot */ after_dot, /* Succeeds if after dot */ begbuf, /* Succeeds if at beginning of buffer */ endbuf, /* Succeeds if at end of buffer */ wordchar, /* Matches any word-constituent character */ notwordchar, /* Matches any char that is not a word-constituent */ wordbeg, /* Succeeds if at word beginning */ wordend, /* Succeeds if at word end */ wordbound, /* Succeeds if at a word boundary */ notwordbound, /* Succeeds if not at a word boundary */ syntaxspec, /* Matches any character whose syntax is specified. */ /* followed by a byte which contains a syntax code, */ /* Sword or such like */ notsyntaxspec /* Matches any character whose syntax differs from the */ /* specified. */ }; /******************************************************************************/ /* Function prototypes */ /******************************************************************************/ /***/ # ifdef __cplusplus extern "C" { # endif /* __cplusplus */ /***/ REGEXP_API _TCHAR* re_compile_pattern ( _TCHAR*, int, PRE_PATTERN_BUFFER ); REGEXP_API void re_compile_fastmap ( PRE_PATTERN_BUFFER ); REGEXP_API int re_search ( PRE_PATTERN_BUFFER, _TCHAR*, int, int, int, PRE_REGISTERS ); REGEXP_API int re_search_2 ( PRE_PATTERN_BUFFER, _TCHAR*, int,_TCHAR*, int, int, int, PRE_REGISTERS, int ); REGEXP_API int re_match ( PRE_PATTERN_BUFFER, _TCHAR*, int, int, PRE_REGISTERS ); REGEXP_API int re_match_2 ( PRE_PATTERN_BUFFER, _TUCHAR*, int, _TUCHAR*, int, int, PRE_REGISTERS, int ); REGEXP_API int re_set_syntax ( int ); /***/ # ifdef __cplusplus } # endif /* __cplusplus */ /***/ /******************************************************************************/ #endif /* __REGEXP_H */