C++一个不错的正则表达式引擎(二)

简介:
//
// Implementation
//
template <class CHART> CBoundaryElxT <CHART> :: CBoundaryElxT(int ntype, int byes)
...{
    m_ntype = ntype;
    m_byes  = byes;
}
template <class CHART> int CBoundaryElxT <CHART> :: Match(CContext * pContext) const
...{
    const CHART * pcsz  = (const CHART *)pContext->m_pMatchString;
    int npos = pContext->m_nCurrentPos;
    int tlen = pContext->m_pMatchStringLength;
    CHART chL = npos > 0    ? pcsz[npos - 1] : 0;
    CHART chR = npos < tlen ? pcsz[npos    ] : 0;
    int bsucc = 0;
    switch(m_ntype)
    ...{
    case BOUNDARY_FILE_BEGIN:
        bsucc = (npos <= 0);
        break;
    case BOUNDARY_FILE_END:
        bsucc = (npos >= tlen);
        break;
    case BOUNDARY_LINE_BEGIN:
        bsucc = (npos <= 0   ) || (chL == RCHART(' ')) || ((chL == RCHART(' ')) && (chR != RCHART(' ')));
        break;
    case BOUNDARY_LINE_END:
        bsucc = (npos >= tlen) || (chR == RCHART(' ')) || ((chR == RCHART(' ')) && (chL != RCHART(' ')));
        break;
    case BOUNDARY_WORD_BEGIN:
        bsucc = ! IsWordChar(chL) &&   IsWordChar(chR);
        break;
    case BOUNDARY_WORD_END:
        bsucc =   IsWordChar(chL) && ! IsWordChar(chR);
        break;
    case BOUNDARY_WORD_EDGE:
        bsucc =   IsWordChar(chL) ?  ! IsWordChar(chR) : IsWordChar(chR);
        break;
    }
    return bsucc;
}
template <class CHART> int CBoundaryElxT <CHART> :: MatchNext(CContext *) const
...{
    return 0;
}
template <class CHART> inline int CBoundaryElxT <CHART> :: IsWordChar(CHART ch)
...{
    return (ch >= RCHART('A') && ch <= RCHART('Z')) || (ch >= RCHART('a') && ch <= RCHART('z')) || (ch >= RCHART('0') && ch <= RCHART('9')) || (ch == RCHART('_'));
}
//
// Bracket
//
template <class CHART> class CBracketElxT : public ElxInterface  
...{
public:
    int Match    (CContext * pContext) const;
    int MatchNext(CContext * pContext) const;
public:
    CBracketElxT(int nnumber, int bright);
public:
    int m_nnumber;
    int m_bright;
    CBufferT <CHART> m_szNamed;
};
template <class CHART> CBracketElxT <CHART> :: CBracketElxT(int nnumber, int bright)
...{
    m_nnumber = nnumber;
    m_bright  = bright;
}
template <class CHART> int CBracketElxT <CHART> :: Match(CContext * pContext) const
...{
    // check, for named
    if(m_nnumber < 0) return 0;
    if( ! m_bright )
    ...{
        pContext->m_captureindex.Prepare(m_nnumber, -1);
        int index = pContext->m_captureindex[m_nnumber];
        // check
        if(index > 0 && index < pContext->m_capturestack.GetSize() && pContext->m_capturestack[index+2] < 0)
        ...{
            pContext->m_capturestack[index+3] --;
            return 1;
        }
        // save
        pContext->m_captureindex[m_nnumber] = pContext->m_capturestack.GetSize();
        pContext->m_capturestack.Push(m_nnumber);
        pContext->m_capturestack.Push(pContext->m_nCurrentPos);
        pContext->m_capturestack.Push(-1);
        pContext->m_capturestack.Push( 0); // z-index
    }
    else
    ...{
        // check
        int index = pContext->m_captureindex[m_nnumber];
        if(pContext->m_capturestack[index + 3] < 0)
        ...{
            pContext->m_capturestack[index + 3] ++;
            return 1;
        }
        // save
        pContext->m_capturestack[index + 2] = pContext->m_nCurrentPos;
        pContext->m_capturestack[index + 3] = pContext->m_nParenZindex ++;
    }
    return 1;
}
template <class CHART> int CBracketElxT <CHART> :: MatchNext(CContext * pContext) const
...{
    int index = pContext->m_captureindex[m_nnumber];
    if( ! m_bright )
    ...{
        if(pContext->m_capturestack[index + 3] < 0)
        ...{
            pContext->m_capturestack[index + 3] ++;
            return 0;
        }
        pContext->m_capturestack.Restore(pContext->m_capturestack.GetSize() - 4);
        // to find
        index = pContext->m_capturestack.GetSize() - 4;
        while(index >= 0 && pContext->m_capturestack[index] != m_nnumber) index -= 4;
        // new index
        pContext->m_captureindex[m_nnumber] = index;
    }
    else
    ...{
        if(pContext->m_capturestack[index + 3] < 0)
        ...{
            pContext->m_capturestack[index + 3] --;
            return 0;
        }
        pContext->m_capturestack[index + 2] = -1;
        pContext->m_capturestack[index + 3] =  0;
    }
    return 0;
}
//
// Deletage
//
template <class CHART> class CDelegateElxT : public ElxInterface  
...{
public:
    int Match    (CContext * pContext) const;
    int MatchNext(CContext * pContext) const;
public:
    CDelegateElxT(int ndata = 0);
public:
    ElxInterface * m_pelx;
    int m_ndata; // +0 : recursive to
                 // -3 : named recursive
    CBufferT <CHART> m_szNamed;
};
template <class CHART> CDelegateElxT <CHART> :: CDelegateElxT(int ndata)
...{
    m_pelx  = 0;
    m_ndata = ndata;
}
template <class CHART> int CDelegateElxT <CHART> :: Match(CContext * pContext) const
...{
    if(m_pelx != 0)
        return m_pelx->Match(pContext);
    else
        return 1;
}
template <class CHART> int CDelegateElxT <CHART> :: MatchNext(CContext * pContext) const
...{
    if(m_pelx != 0)
        return m_pelx->MatchNext(pContext);
    else
        return 0;
}
//
// Empty
//
template <int x> class CEmptyElxT : public ElxInterface
...{
public:
    int Match    (CContext * pContext) const;
    int MatchNext(CContext * pContext) const;
public:
    CEmptyElxT();
};
typedef CEmptyElxT <0> CEmptyElx;
//
// Global
//
template <int x> class CGlobalElxT : public ElxInterface
...{
public:
    int Match    (CContext * pContext) const;
    int MatchNext(CContext * pContext) const;
public:
    CGlobalElxT();
};
typedef CGlobalElxT <0> CGlobalElx;
//
// Repeat
//
template <int x> class CRepeatElxT : public ElxInterface
...{
public:
    int Match    (CContext * pContext) const;
    int MatchNext(CContext * pContext) const;
public:
    CRepeatElxT(ElxInterface * pelx, int ntimes);
protected:
    int MatchFixed    (CContext * pContext) const;
    int MatchNextFixed(CContext * pContext) const;
public:
    ElxInterface * m_pelx;
    int m_nfixed;
};
typedef CRepeatElxT <0> CRepeatElx;
//
// Greedy
//
template <int x> class CGreedyElxT : public CRepeatElxT <x>
...{
public:
    int Match    (CContext * pContext) const;
    int MatchNext(CContext * pContext) const;
public:
    CGreedyElxT(ElxInterface * pelx, int nmin = 0, int nmax = INT_MAX);
protected:
    int MatchVart    (CContext * pContext) const;
    int MatchNextVart(CContext * pContext) const;
public:
    int m_nvart;
};
typedef CGreedyElxT <0> CGreedyElx;
//
// Independent
//
template <int x> class CIndependentElxT : public ElxInterface
...{
public:
    int Match    (CContext * pContext) const;
    int MatchNext(CContext * pContext) const;
public:
    CIndependentElxT(ElxInterface * pelx);
public:
    ElxInterface * m_pelx;
};
typedef CIndependentElxT <0> CIndependentElx;
//
// List
//
template <int x> class CListElxT : public ElxInterface
...{
public:
    int Match    (CContext * pContext) const;
    int MatchNext(CContext * pContext) const;
public:
    CListElxT(int brightleft);
public:
    CBufferT <ElxInterface *> m_elxlist;
    int m_brightleft;
};
typedef CListElxT <0> CListElx;
//
// Posix Elx
//
template <class CHART> class CPosixElxT : public ElxInterface
...{
public:
    int Match    (CContext * pContext) const;
    int MatchNext(CContext * pContext) const;
public:
    CPosixElxT(const char * posix, int brightleft);
protected:
    static int m_isblank(int c);
public:
    int (*m_posixfun)(int);
    int m_brightleft;
    int m_byes;
};
//
// Implementation
//
template <class CHART> CPosixElxT <CHART> :: CPosixElxT(const char * posix, int brightleft)
...{
    m_brightleft = brightleft;
    if(posix[1] == '^')
    ...{
        m_byes = 0;
        posix += 2;
    }
    else
    ...{
        m_byes = 1;
        posix += 1;
    }
    if     (!strncmp(posix, "alnum:", 6)) m_posixfun = ::isalnum ;
    else if(!strncmp(posix, "alpha:", 6)) m_posixfun = ::isalpha ;
    else if(!strncmp(posix, "ascii:", 6)) m_posixfun = ::isascii ;
    else if(!strncmp(posix, "cntrl:", 6)) m_posixfun = ::iscntrl ;
    else if(!strncmp(posix, "digit:", 6)) m_posixfun = ::isdigit ;
    else if(!strncmp(posix, "graph:", 6)) m_posixfun = ::isgraph ;
    else if(!strncmp(posix, "lower:", 6)) m_posixfun = ::islower ;
    else if(!strncmp(posix, "print:", 6)) m_posixfun = ::isprint ;
    else if(!strncmp(posix, "punct:", 6)) m_posixfun = ::ispunct ;
    else if(!strncmp(posix, "space:", 6)) m_posixfun = ::isspace ;
    else if(!strncmp(posix, "upper:", 6)) m_posixfun = ::isupper ;
    else if(!strncmp(posix, "xdigit:",7)) m_posixfun = ::isxdigit;
    else if(!strncmp(posix, "blank:", 6)) m_posixfun = m_isblank ;
    else                                  m_posixfun = 0         ;
}
template <class CHART> int CPosixElxT <CHART> :: m_isblank(int c)
...{
    return c == 0x20 || c == ' ';
}
template <class CHART> int CPosixElxT <CHART> :: Match(CContext * pContext) const
...{
    if(m_posixfun == 0) return 0;
    int tlen = pContext->m_pMatchStringLength;
    int npos = pContext->m_nCurrentPos;
    // check
    int at   = m_brightleft ? npos - 1 : npos;
    if( at < 0 || at >= tlen )
        return 0;
    CHART ch = ((const CHART *)pContext->m_pMatchString)[at];
    int bsucc = (*m_posixfun)(ch);
    if( ! m_byes )
        bsucc = ! bsucc;
    if( bsucc )
        pContext->m_nCurrentPos += m_brightleft ? -1 : 1;
    return bsucc;
}
template <class CHART> int CPosixElxT <CHART> :: MatchNext(CContext * pContext) const
...{
    pContext->m_nCurrentPos -= m_brightleft ? -1 : 1;
    return 0;
}
//
// Possessive
//
template <int x> class CPossessiveElxT : public CGreedyElxT <x>
...{
public:
    int Match    (CContext * pContext) const;
    int MatchNext(CContext * pContext) const;
public:
    CPossessiveElxT(ElxInterface * pelx, int nmin = 0, int nmax = INT_MAX);
};
typedef CPossessiveElxT <0> CPossessiveElx;
//
// Range Elx
//
template <class CHART> class CRangeElxT : public ElxInterface
...{
public:
    int Match    (CContext * pContext) const;
    int MatchNext(CContext * pContext) const;
public:
    CRangeElxT(int brightleft, int byes);
public:
    CBufferT <CHART> m_ranges;
    CBufferT <CHART> m_chars;
    CBufferT <ElxInterface *> m_embeds;
public:
    int m_brightleft;
    int m_byes;
};
//
// Implementation
//
template <class CHART> CRangeElxT <CHART> :: CRangeElxT(int brightleft, int byes)
...{
    m_brightleft = brightleft;
    m_byes       = byes;
}
template <class CHART> int CRangeElxT <CHART> :: Match(CContext * pContext) const
...{
    int tlen = pContext->m_pMatchStringLength;
    int npos = pContext->m_nCurrentPos;
    // check
    int at   = m_brightleft ? npos - 1 : npos;
    if( at < 0 || at >= tlen )
        return 0;
    CHART ch = ((const CHART *)pContext->m_pMatchString)[at];
    int bsucc = 0, i;
    // compare
    for(i=0; !bsucc && i<m_ranges.GetSize(); i+=2)
    ...{
        if(m_ranges[i] <= ch && ch <= m_ranges[i+1]) bsucc = 1;
    }
    for(i=0; !bsucc && i<m_chars.GetSize(); i++)
    ...{
        if(m_chars[i] == ch) bsucc = 1;
    }
    for(i=0; !bsucc && i<m_embeds.GetSize(); i++)
    ...{
        if(m_embeds[i]->Match(pContext))
        ...{
            pContext->m_nCurrentPos = npos;
            bsucc = 1;
        }
    }
    if( ! m_byes )
        bsucc = ! bsucc;
    if( bsucc )
        pContext->m_nCurrentPos += m_brightleft ? -1 : 1;
    return bsucc;
}
template <class CHART> int CRangeElxT <CHART> :: MatchNext(CContext * pContext) const
...{
    pContext->m_nCurrentPos -= m_brightleft ? -1 : 1;
    return 0;
}
//
// Reluctant
//
template <int x> class CReluctantElxT : public CRepeatElxT <x>
...{
public:
    int Match    (CContext * pContext) const;
    int MatchNext(CContext * pContext) const;
public:
    CReluctantElxT(ElxInterface * pelx, int nmin = 0, int nmax = INT_MAX);
protected:
    int MatchVart    (CContext * pContext) const;
    int MatchNextVart(CContext * pContext) const;
public:
    int m_nvart;
};
typedef CReluctantElxT <0> CReluctantElx;


本文转自jazka 51CTO博客,原文链接:http://blog.51cto.com/jazka/228017,如需转载请自行联系原作者
相关文章
|
3月前
|
算法 测试技术 C#
【动态规划】【字符串】C++算法:正则表达式匹配
【动态规划】【字符串】C++算法:正则表达式匹配
|
1月前
|
存储 JavaScript API
C++ 正则表达式库 std::basic_regex 中文手册(API说明来自cppreference.com)
C++ 正则表达式库 std::basic_regex 中文手册(API说明来自cppreference.com)
24 0
|
5月前
|
算法 测试技术
从0到1打造正则表达式执行引擎(二) NFA转DFA
然后对DFA的节点0执行步骤1,找到NFA中所有a可达的NFA节点(1#2#4#6#8#9)构成NFA中的节点1,如下图。
49 0
|
5月前
|
设计模式 算法
从0到1打造正则表达式执行引擎(一) 正则表达式转NFA (2)
看完上文之后相信你一直知道如果将一个正则表达式转化为状态机的方法了,这里我们要将理论转化为代码。首先我们要将图转化为代码标识,我用State表示一个节点,其中用了Map<MatchStrategy, List> next表示其后继节点,next中有个key-value就是一条边,MatchStrategy用来描述边的信息。
34 0
|
5月前
|
算法 Java
从0到1打造正则表达式执行引擎(一) 正则表达式转NFA (1)
重复匹配(正则表达式中的 ? + *) 正则表达式里有4种表示重复的方式,分别是:
32 1
|
6月前
|
算法 C++
剑指offer(C++)-JZ19:正则表达式匹配(算法-动态规划)
剑指offer(C++)-JZ19:正则表达式匹配(算法-动态规划)
|
6月前
|
C# C++
C++ 利于宏模拟C#的正则表达式
C++ 利于宏模拟C#的正则表达式
|
6月前
|
C++ Windows Perl
[笔记]c++基础实践《二》regex正则表达式
[笔记]c++基础实践《二》regex正则表达式
|
8月前
|
C++
C++正则表达式
C++正则表达式
|
8月前
|
移动开发 小程序 JavaScript
【C++】C++ 标准库 — 正则表达式
关于 C++ 标准库中正则表达式的使用 std::regex
128 0

热门文章

最新文章