文章/答案/技术大牛

发布

社区首页 >问答首页 >根据另一个数组中的条目拆分数组

问根据另一个数组中的条目拆分数组
EN

Stack Overflow用户

提问于 2015-06-19 20:13:25

回答 1查看 248关注 0票数 0

我正在编写一个JavaScript代码，它将把一个RegExp分解成它的基本组件，并对它所做的事情给出一个小小的解释。

我的总体想法是将输入字符串(作为RegExp)从另一个数组拆分为条目。

我现在的代码是：

function interpret(regex){
    var r = regex + "";
    r = r.split("/");
    body = r[1];
    flags = r[2];
    var classes     = [".","\w","\d","\s","\W","\D","\S","[","]"];
    var classdefs   = ["any non-newline character","any word (digit or letter)","any digit (characters 0-9)","any whitespace character","any non-word (non-digit and non-letter)","any non-digit (not characters 0-9)","open matchset","close matchset"];
    var quantifiers = ["*","+","?",
                        /{(\d+)}/g,         // a{n} 
                        /{(\d+),}/g,        // a{n,}
                        /{(\d+),(\d+)}/g,   // a{n,m}
                        /[+*?]\?/g          // a<quant>? - lazy quantification
                      ];
    var quantDefs   = ["repeated 0 or more times","repeated 1 or more times","repeated once or not at all","repeated exactly $1 time","repeated $1 or more times","repeated between $1 and $2 times"];
    var escaped     = ["\t","\n","\r","\.","\*","\\","\^","\?","\|"];
    var escapedDefs = ["a tab","a linefeed","a carriage return","a period","an asterisk","a backslash","a carot","a question mark","a vertical bar"];

    // code to split r based on entries in classes, quantifiers, and escaped.
}

理想情况下，此函数(让我们称之为splitR)将返回如下所示的输出：

> splitR("hello",["he","l"]);
["he", "l", "l", "o"]
> splitR("hello",["he"]);
["he", "llo"]
> splitR("hello",["he","o"]);
["he", "ll", "o"];
> splitR("5 is the square root of 25",[/\d+/g,/\w{3,}/g,"of"]);
["5", " is ", "the", " ", "square", " ", "root", " ", "of", " ", "25"]

明确定义的splitR函数应该在interpret函数的上下文中获取一个RegExp并将其拆分为它的基本组件；例如，\d+[0-9]\w*?应该拆分为["\d", "+", "[", "0-9", "]", "\w", "*", "?"]。这些组件在其他数组中分别定义，使用各种RegExps (例如，/{(\d+)}/g查找a{n})和字符串(例如".")。

确实，我对splitR的定义感到困惑。任何帮助都是非常感谢的！

javascript

arrays

regex

split

回答 1

Stack Overflow用户

回答已采纳

发布于 2015-06-20 04:13:16

这将将正则表达式拆分为各个部分，并填充第二个数组中对部件的描述。它将跳过意想不到的字符，但是没有真正的regex语法检查，也就是说，如果启动一个范围而不结束它，脚本就不会抱怨。我冒昧地添加了列表中缺少的一些内容，比如分组括号、开始和结束锚.

function interpret(regex)
{
    var body = regex.source;
    var flags = (regex.global ? "g" : "") + (regex.ignoreCase ? "i" : "") + (regex.multiline ? "m" : "");
    var classes     = [/^\w\-\w/,/^\./,/^\\w/,/^\\d/,/^\\s/,/^\\W/,/^\\D/,/^\\S/,/^\[/,/^\]/,/^\(/,/^\)/,/^\^/,/^\$/,/^\|/];
    var classDefs   = ["character range","any non-newline character","any word (digit or letter)","any digit (characters 0-9)","any whitespace character","any non-word (non-digit and non-letter)","any non-digit (not characters 0-9)","any non-whitespace character","open matchset","close matchset","open group","close group","start anchor or negation","end anchor","alternative"];
    var quantifiers = [/^[+*?]\?/,/^\*/,/^\+/,/^\?/,/^{(\d+)}/,/^{(\d+),}/,/^{(\d+),(\d+)}/];
    var quantDefs   = ["lazy quantification","repeated 0 or more times","repeated 1 or more times","repeated once or not at all","repeated exactly $1 time","repeated $1 or more times","repeated between $1 and $2 times"];
    var escaped     = [/^\\t/,/^\\n/,/^\\r/,/^\\\./,/^\\\*/,/^\\\+/,/^\\\-/,/^\\\\/,/^\\\^/,/^\\\$/,/^\\\?/,/^\\\|/,/^\\\[/,/^\\\]/,/^\\\(/,/^\\\)/,/^\\\{/,/^\\\}/];
    var escapedDefs = ["a tab","a linefeed","a carriage return","a period","an asterisk","a plus","a minus","a backslash","a caret","a dollar sign","a question mark","a vertical bar","a square bracket","a square bracket","a bracket","a bracket","a curly bracket","a curly bracket"];
    var literal     = [/^[^\.\\\[\]\(\)\^\$\|\*\+\-\?\{\}]+/];
    var literalDefs = ["literal text"];
    var regs = classes.concat(quantifiers,escaped,literal);
    var defs = classDefs.concat(quantDefs,escapedDefs,literalDefs);
    var reglist = [];
    var deflist = [];

    while (body.length)
    {
        var found = false;
        var chunk = null;

        for (var i = 0; i < regs.length; i++)
        {
            chunk = body.match(regs[i]);

            if (chunk)
            {
                reglist.push(chunk[0]);
                deflist.push(defs[i]);
                body = body.substr(chunk[0].length);
                found = true;
                break;
            }
        }

        if (!found)
        {
            body = body.substr(1);	// skip unexpected character
        }
    }

    console.log(regex.source);
    console.log(reglist);
    console.log(deflist);
    alert("see console for output");
}

var x = new RegExp("^[a-z0-9]\\^.\\.\\w\\d\\s\\W\\D\\S+(te|\\|st)*\\*\\n+\\+\\-\\}(\\W?\\?\\s{1,3})\\\\*?a{3}b{4,}c{}\\r\\t\\$$", "ig");
interpret(x);

票数 1

页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持

原文链接：

https://stackoverflow.com/questions/30946549

复制

相似问题

问根据另一个数组中的条目拆分数组
EN

回答 1

Stack Overflow用户

社区

活动

圈层

关于

腾讯云开发者

热门产品

热门推荐

更多推荐

问根据另一个数组中的条目拆分数组EN

回答 1

Stack Overflow用户

社区

活动

圈层

关于

腾讯云开发者

热门产品

热门推荐

更多推荐

问根据另一个数组中的条目拆分数组
EN