/* PCRE_DLL.ahk Wrapper routines to ease the use of the functions in PCRE3.dll. I create pseudo classes to ease persistence of information (compiled state, etc.). http://www.autohotkey.com/forum/viewtopic.php?t=10651 // by Philippe Lhoste http://Phi.Lho.free.fr // File/Project history: 1.00.000 -- 2006/06/22 (PL) -- First release. 0.01.000 -- 2006/06/06 (PL) -- Creation. */ /* Copyright notice: See the PhiLhoSoftLicence.txt file for details. This file is distributed under the zlib/libpng license. Copyright (c) 2006 Philippe Lhoste / PhiLhoSoft */ ;/* Options */ #PCRE_CASELESS := 0x00000001 #PCRE_MULTILINE := 0x00000002 #PCRE_DOTALL := 0x00000004 #PCRE_EXTENDED := 0x00000008 #PCRE_ANCHORED := 0x00000010 #PCRE_DOLLAR_ENDONLY := 0x00000020 #PCRE_EXTRA := 0x00000040 #PCRE_NOTBOL := 0x00000080 #PCRE_NOTEOL := 0x00000100 #PCRE_UNGREEDY := 0x00000200 #PCRE_NOTEMPTY := 0x00000400 #PCRE_UTF8 := 0x00000800 #PCRE_NO_AUTO_CAPTURE := 0x00001000 #PCRE_NO_UTF8_CHECK := 0x00002000 #PCRE_AUTO_CALLOUT := 0x00004000 #PCRE_PARTIAL := 0x00008000 #PCRE_DFA_SHORTEST := 0x00010000 #PCRE_DFA_RESTART := 0x00020000 #PCRE_FIRSTLINE := 0x00040000 ; Non-PCRE options #PCRE_HIDENONSTDOPT := 0x00FFFFFF #PCRE_GETLENGTH := 0x01000000 #PCRE_GETSTRING := 0x02000000 ;/* Exec-time and get/set-time error codes */ #PCRE_ERROR_NOMATCH := (-1) #PCRE_ERROR_NULL := (-2) #PCRE_ERROR_BADOPTION := (-3) #PCRE_ERROR_BADMAGIC := (-4) #PCRE_ERROR_UNKNOWN_NODE := (-5) #PCRE_ERROR_NOMEMORY := (-6) #PCRE_ERROR_NOSUBSTRING := (-7) #PCRE_ERROR_MATCHLIMIT := (-8) #PCRE_ERROR_BADUTF8 := (-10) #PCRE_ERROR_BADUTF8_OFFSET := (-11) #PCRE_ERROR_PARTIAL := (-12) #PCRE_ERROR_BADPARTIAL := (-13) #PCRE_ERROR_INTERNAL := (-14) #PCRE_ERROR_BADCOUNT := (-15) #PCRE_ERROR_DFA_UITEM := (-16) #PCRE_ERROR_DFA_UCOND := (-17) #PCRE_ERROR_DFA_UMLIMIT := (-18) #PCRE_ERROR_DFA_WSSIZE := (-19) #PCRE_ERROR_DFA_RECURSE := (-20) ;/* Request types for pcre_fullinfo() */ #PCRE_INFO_OPTIONS := 0 #PCRE_INFO_SIZE := 1 #PCRE_INFO_CAPTURECOUNT := 2 #PCRE_INFO_BACKREFMAX := 3 #PCRE_INFO_FIRSTBYTE := 4 #PCRE_INFO_FIRSTTABLE := 5 #PCRE_INFO_LASTLITERAL := 6 #PCRE_INFO_NAMEENTRYSIZE := 7 #PCRE_INFO_NAMECOUNT := 8 #PCRE_INFO_NAMETABLE := 9 #PCRE_INFO_STUDYSIZE := 10 #PCRE_INFO_DEFAULT_TABLES := 11 #hPCREModule = 0 ; Provide full path or put it in the path (or the working dir). #PCRE_DLL = PCRE3.dll ; Skip internal code and continue to auto-exec section of including code Goto PCRE=>ContinueAutoExec PCRE_Init() { global #PCRE_refNb := 0 #PCRE_matchNb := 0 #PCRE_replaceNb := 0 #PCRELastError_errorCode := 0 #hPCREModule := DllCall("LoadLibrary", "Str", #PCRE_DLL) If (#hPCREModule = 0) { MsgBox 16, PCRE_DLL, You need the %#PCRE_DLL% in your path! Exit } } PCRE_End() { global ; Clean up Loop %#PCRE_refNb% { DllCall(#PCRE_DLL "\pcre_free" , "UInt", #PCRECompRE#%A_Index%_ref) } DllCall("FreeLibrary", "UInt", #hPCREModule) #hPCREModule := 0 } /* // Take a string regular expression and optional (!) options, // and return a reference to (the compiled version of) the RE. // If this reference is zero, there was an error, the code of the error // and its position in the string, separated by a pipe (|), are in ErrorLevel. */ PCRE_RegisterRegExp(_regExp, _options=0) { local hPCRE, errorCode, p_errorMsg, errorOffset, captureCount if (#hPCREModule = 0) PCRE_Init() if (#hPCREModule = 0) Return 0 hPCRE := DllCall(#PCRE_DLL "\pcre_compile2" , "Str", _regExp , "Int", _options , "Int *", errorCode , "UInt *", p_errorMsg , "Int *", errorOffset , "UInt", 0 , CDecl) If (ErrorLevel != 0) Return 0 if (hPCRE = 0) { ErrorLevel := errorCode "|" errorOffset #PCRELastError_regExp := _regExp #PCRELastError_options := _options #PCRELastError_errorCode := errorCode #PCRELastError_p_errorMsg := p_errorMsg #PCRELastError_errorOffset := errorOffset Return 0 } #PCRE_refNb++ #PCRECompRE#%#PCRE_refNb%_ref := hPCRE DllCall(#PCRE_DLL "\pcre_fullinfo" , "UInt", hPCRE , "UInt", 0 , "UInt", #PCRE_INFO_CAPTURECOUNT , "UInt *", captureCount , CDecl) If (ErrorLevel != 0) Return 0 #PCRECompRE#%#PCRE_refNb%_captureCount := captureCount #PCRELastError_errorCode := 0 OutputDebug %_regExp% ==> %captureCount% return #PCRE_refNb } /* // Get only the position of a match on the given string, 0 if no match. // Useful for string validation or simple search. // It doesn't manage captures. // You can add an offset, and options. // If you add #PCRE_GETLENGTH to the options, you get both the position // and the length of the match, separated by a pipe (|). // If you add #PCRE_GETSTRING to the options, you get instead the matched string. */ PCRE_GetMatch(_reRef, _searchedString, _startOffset=1, _options=0) { local offsetTable, offsetTableSize, compRegExp, pos, len, res If (_reRef = 0) { ErrorLevel := #PCRE_ERROR_NULL Return 0 } offsetTableSize := 3 VarSetCapacity(offsetTable, offsetTableSize * 4) compRegExp := #PCRECompRE#%_reRef%_ref resCode := DllCall(#PCRE_DLL "\pcre_exec" , "UInt", compRegExp , "UInt", 0 , "Str", _searchedString , "Int", StrLen(_searchedString) , "Int", _startOffset - 1 , "Int", _options & #PCRE_HIDENONSTDOPT , "UInt", &offsetTable , "Int", offsetTableSize , CDecl) ErrorLevel := resCode If (resCode < 0) Return 0 pos := PCRE_GetOffset(offsetTable, 0) res := pos + 1 If (_options & (#PCRE_GETLENGTH | #PCRE_GETSTRING)) { len := PCRE_GetOffset(offsetTable, 1) - pos If (_options & #PCRE_GETSTRING) StringMid res, _searchedString, pos + 1, len Else res := res "|" len } #PCRELastError_errorCode := 0 Return res } /* // Get match and captures of this RE on the given string, at given offset // (0 = start of string by default), using the optional (!) options. // Return a reference to this match for use in further calls. // Return 0 if there is no match, in this case ErrorLevel contains // the related error code, which can be a simple #PCRE_ERROR_NOMATCH. */ PCRE_Match(_reRef, _searchedString, _startOffset=1, _options=0) { local n, captureCount, offsetTableSize, compRegExp, resCode n := #PCRE_matchNb + 1 captureCount := #PCRECompRE#%_reRef%_captureCount offsetTableSize := 3 * (captureCount + 1) VarSetCapacity(#PCREMatch#%n%_offsetTable, offsetTableSize * 4) compRegExp := #PCRECompRE#%_reRef%_ref resCode := DllCall(#PCRE_DLL "\pcre_exec" , "UInt", compRegExp , "UInt", 0 , "Str", _searchedString , "Int", StrLen(_searchedString) , "Int", _startOffset - 1 , "Int", _options , "UInt", &#PCREMatch#%n%_offsetTable , "Int", offsetTableSize , CDecl) If (resCode < 0) { ErrorLevel := resCode Return 0 } #PCRE_matchNb++ #PCREMatch#%#PCRE_matchNb%_captureCount := resCode #PCREMatch#%#PCRE_matchNb%_searchedString := _searchedString #PCREMatch#%#PCRE_matchNb%_nextOffset := PCRE_GetOffset(#PCREMatch#%n%_offsetTable, 1) #PCREMatch#%#PCRE_matchNb%_options := _options #PCRELastError_errorCode := 0 Return #PCRE_matchNb } /* // Update _matchRef to the next occurence of _reRef. // A relative start offset can be specified. // It is relative to the position from which the next occurence will be searched, // which is the end of the previous match. */ PCRE_MatchNext(_reRef, _matchRef, _relativeStartOffset=0) { local resCode, startOffset startOffset := #PCREMatch#%_matchRef%_nextOffset + _relativeStartOffset resCode := DllCall(#PCRE_DLL "\pcre_exec" , "UInt", #PCRECompRE#%_reRef%_ref , "UInt", 0 , "Str", #PCREMatch#%_matchRef%_searchedString , "Int", StrLen(#PCREMatch#%_matchRef%_searchedString) , "Int", startOffset , "Int", #PCREMatch#%_matchRef%_options , "UInt", &#PCREMatch#%_matchRef%_offsetTable , "Int", 3 * (#PCRECompRE#%_reRef%_captureCount + 1) , CDecl) If (resCode < 0) { ErrorLevel := resCode Return 0 } #PCREMatch#%_matchRef%_captureCount := resCode #PCREMatch#%_matchRef%_nextOffset := PCRE_GetOffset(#PCREMatch#%_matchRef%_offsetTable, 1) #PCRELastError_errorCode := 0 Return resCode } /* // Return the number of sub-captures in the given match. */ PCRE_GetMatchedCaptureNumber(_matchRef) { Return #PCREMatch#%_matchRef%_captureCount } /* // Update @pos and @len with the starting position and the length // of the _num th capture (up to PCRE_GetMatchedCaptureNumber) // for the given match. */ PCRE_GetMatchVals(_matchRef, _num, ByRef @pos, ByRef @len) { local pos If (_num > #PCREMatch#%_matchRef%_captureCount - 1) { @pos := 0 @len := 0 Return 0 } pos := PCRE_GetOffset(#PCREMatch#%_matchRef%_offsetTable, _num * 2) @len := PCRE_GetOffset(#PCREMatch#%_matchRef%_offsetTable, _num * 2 + 1) - pos @pos := pos + 1 } /* // Return the _num th captured string for the given match. */ PCRE_GetMatchStr(_matchRef, _num) { local pos, len, res If (_num > #PCREMatch#%_matchRef%_captureCount - 1) Return "" pos := PCRE_GetOffset(#PCREMatch#%_matchRef%_offsetTable, _num * 2) len := PCRE_GetOffset(#PCREMatch#%_matchRef%_offsetTable, _num * 2 + 1) - pos StringMid res, #PCREMatch#%_matchRef%_searchedString, pos + 1, len Return res } /* // Take a string replacement expression (with capture references preceded by one symbol), // and return a reference to (the pre-parsed version of) the expression. */ PCRE_RegisterReplaceString(_replaceExp, _replaceSymbol="") { local pos, varPos, preparsed, c, head, tail local replSymbLen local nextPos, ref, refNb If _replaceSymbol = _replaceSymbol := "$" #PCRE_replaceNb++ pos := InStr(_replaceExp, _replaceSymbol) If (pos = 0) { ; No replace variables to dereference: plain /fast replacement #PCREParsedReplace#%#PCRE_replaceNb%_string1 := _replaceExp #PCREParsedReplace#%#PCRE_replaceNb%_refNb := 0 Return #PCRE_replaceNb } replSymbLen := StrLen(_replaceSymbol) refNb := 0 preparsed := _replaceExp Loop { varPos := pos ; Skip the start symbol pos += replSymbLen ; Get character after the start symbol StringMid c, preparsed, pos, 1 If (c = "") ; Empty if symbol is at the end of the string { ; Just skip it } Else If c is digit { ; Take the string before the var ref StringLeft head, preparsed, varPos - 1 ; And the string after it StringMid preparsed, preparsed, pos + 1 ; Search at start of remaining string pos := 1 refNb++ #PCREParsedReplace#%#PCRE_replaceNb%_string%refNb% := head #PCREParsedReplace#%#PCRE_replaceNb%_ref%refNb% := c } Else ; No digit after the start symbol { StringMid c, preparsed, pos, replSymbLen If (c = _replaceSymbol) { ; Doubled symbol is plain symbol ; String up to the first symbol (including it) StringLeft head, preparsed, pos - 1 ; String after the second symbol StringMid tail, preparsed, pos + replSymbLen ; Concatenate both preparsed := head tail } ; Else just skip it } pos := InStr(preparsed, _replaceSymbol, false, pos) If (pos = 0) Break } #PCREParsedReplace#%#PCRE_replaceNb%_refNb := refNb refNb++ #PCREParsedReplace#%#PCRE_replaceNb%_string%refNb% := preparsed return #PCRE_replaceNb } /* // Take a string replacement expression (with capture references between two symbols), // and return a reference to (the pre-parsed version of) the expression. */ PCRE_RegisterReplaceStringEx(_replaceExp, _replaceSymbolStart="", _replaceSymbolEnd="") { local pos, varPos, preparsed, c, head, tail local replSymbStartLen, replSymbEndLen local nextPos, digitNb, ref, refNb If _replaceSymbolStart = _replaceSymbolStart := "${" If _replaceSymbolEnd = _replaceSymbolEnd := "}" #PCRE_replaceNb++ pos := InStr(_replaceExp, _replaceSymbolStart) If (pos = 0) { ; No replace variables to dereference: plain /fast replacement #PCREParsedReplace#%#PCRE_replaceNb%_string1 := _replaceExp #PCREParsedReplace#%#PCRE_replaceNb%_refNb := 0 Return #PCRE_replaceNb } replSymbStartLen := StrLen(_replaceSymbolStart) replSymbEndLen := StrLen(_replaceSymbolEnd) refNb := 0 preparsed := _replaceExp Loop { varPos := pos ; Skip the start symbol pos += replSymbStartLen ; Get character after the start symbol StringMid c, preparsed, pos, 1 If c is digit { nextPos := InStr(preparsed, _replaceSymbolEnd, false, pos) digitNb := nextPos - pos If (nextPos = 0) { ; No ending symbol, take the remainder literally Break } If (digitNb > 5) { ; Not a valid variable reference, ignore it } Else { StringMid ref, preparsed, pos, digitNb If ref is not digit ; Non digit chars in the reference { ; Just skip it } Else If (ref > 65535) { ; Not a valid variable reference, ignore it pos := nextPos + replSymbEndLen } Else { ; Take the string before the var ref StringLeft head, preparsed, varPos - 1 ; And the string after it StringMid preparsed, preparsed, nextPos + replSymbEndLen ; Search at start of remaining string pos := 1 refNb++ #PCREParsedReplace#%#PCRE_replaceNb%_string%refNb% := head #PCREParsedReplace#%#PCRE_replaceNb%_ref%refNb% := ref } } } Else ; No digit after the start symbol { StringMid c, preparsed, pos, replSymbEndLen If (c = _replaceSymbolEnd) { ; Start symbol immediately followed by end symbol is plain start symbol ; String up to the start symbol (including it) StringLeft head, preparsed, pos - 1 ; String after the end symbol StringMid tail, preparsed, pos + replSymbEndLen ; Concatenate both preparsed := head tail } ; Else just skip it } pos := InStr(preparsed, _replaceSymbolStart, false, pos) If (pos = 0) Break } #PCREParsedReplace#%#PCRE_replaceNb%_refNb := refNb refNb++ #PCREParsedReplace#%#PCRE_replaceNb%_string%refNb% := preparsed return #PCRE_replaceNb } /* // Replace. */ PCRE_Replace(_reRef, _replRef, _string, _num="", _startOffset=1, _options=0) { local hMatch, refNb, repl, prevPos, pos0, len0 local resultString, slice, captureRef, capture, last hMatch := PCRE_Match(_reRef, _string, _startOffset) If (ErrorLevel = #PCRE_ERROR_NOMATCH) Return _string ; Not found, string is unchanged If (_num = "A" or _num = "") _num := StrLen(_string) prevPos := 1 ; Get the number of references to captures in the replace string refNb := #PCREParsedReplace#%_replRef%_refNb last := refNb + 1 If (refNb = 0) { ; Plain replace repl := #PCREParsedReplace#%_replRef%_string1 } Loop %_num% { ; Get position and length of whole match PCRE_GetMatchVals(hMatch, 0, pos0, len0) StringMid slice, _string, %prevPos%, % pos0 - prevPos If (refNb > 0) { repl = Loop %refNb% { captureRef := #PCREParsedReplace#%_replRef%_ref%A_Index% capture := PCRE_GetMatchStr(hMatch, captureRef) repl := repl . #PCREParsedReplace#%_replRef%_string%A_Index% . capture } repl := repl . #PCREParsedReplace#%_replRef%_string%last% } resultString := resultString . slice . repl prevPos := pos0 + len0 PCRE_MatchNext(_reRef, hMatch) If (ErrorLevel = #PCRE_ERROR_NOMATCH) Break } ; Take remainder of string StringMid slice, _string, %prevPos% Return resultString . slice } PCRE_ShowLastError() { local errorMsg If (#PCRELastError_errorCode = 0) { MsgBox 64, PCRE_DLL, No error in last command Return } VarSetCapacity(errorMsg, 100) DllCall("lstrcpy", "Str", errorMsg, "UInt", #PCRELastError_p_errorMsg) ;~ MsgBox, ;~ ( ;~ Error compiling pattern /%_regExp%/ found at position %_errorOffset%: ;~ (%_errorCode%) %_errorMsg% ;~ ) Gui 66:Add, Text, , Error compiling pattern: (%#PCRELastError_errorCode%) %errorMsg% Gui 66:Add, Button, Default gPCRE_GuiClose x100 y100 w80 h30, OK Gui 66:Font, s15, Courier Gui 66:Font, , Andale Mono Gui 66:Add, Text, x10 y40, %#PCRELastError_regExp% VarSetCapacity(pos, #PCRELastError_errorOffset, 32) pos := pos "^" Gui 66:Add, Text, x10 y65, %pos% Gui 66:Show, , PCRE Error Gui 66:+LastFound WinWaitClose ; Modal window Return PCRE_GuiClose: 66GuiEscape: 66GuiClose: Gui 66:Destroy Return } ;===== Private section ===== PCRE_GetOffset(ByRef @offsetTable, _index) { local addr addr := &@offsetTable + _index * 4 Return *addr + (*(addr + 1) << 8) + (*(addr + 2) << 16) + (*(addr + 3) << 24) } ;===== Private section ===== PCRE=>ContinueAutoExec: