LinuxEye - Linux系统教程

LinuxEye - Linux系统教程

当前位置: 主页 > 脚本编程 >

Python findall匹配’*'号的真相

时间:2013-06-20 21:05来源:wubiaoblog.com/archives/594 编辑:吴飚 点击:
如下 pythonPython 2.7.3 (default, Apr 10 2012, 23:24:47) [MSC v.1500 64 bit (AMD64)] on win32Type help, copyright, credits or license for more information. import re p=re.compile(s*) s1=p.findall() print s1[] s2=p.findall(sabc) print s2[s,
如下
>python
Python 2.7.3 (default, Apr 10 2012, 23:24:47) [MSC v.1500 64 bit (AMD64)] on win32
Type "help", "copyright", "credits" or "license" for more information.
>>> import re
>>> p=re.compile('s*')
>>> s1=p.findall('')
>>> print s1
['']
>>> s2=p.findall('sabc')
>>> print s2
['s', '', '', '', '']
>>> s3=p.findall('abcs')
>>> print s3
['', '', '', 's', '']
>>>
1、匹配一个空串,返回包含一个空值的列表。
2、匹配’sabc’,返回包含五个值的列表。
3、匹配’abcs’,返回包含五个值的列表。

findall的c代码:
static PyObject*
pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
{
    SRE_STATE state;
    PyObject* list;
    int status;
    Py_ssize_t i, b, e;

    PyObject* string;
    Py_ssize_t start = 0;
    Py_ssize_t end = PY_SSIZE_T_MAX;
    static char* kwlist[] = { "source", "pos", "endpos", NULL };
    if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:findall", kwlist,
                                     &string, &start, &end))
        return NULL;

    string = state_init(&state, self, string, start, end);
    if (!string)
        return NULL;

    list = PyList_New(0);
    if (!list) {
        state_fini(&state);
        return NULL;
    }

    while (state.start <= state.end) {

        PyObject* item;

        state_reset(&state);

        state.ptr = state.start;

        if (state.logical_charsize == 1) {
            status = sre_search(&state, PatternObject_GetCode(self));
        } else {
            status = sre_usearch(&state, PatternObject_GetCode(self));
        }

        if (PyErr_Occurred())
            goto error;

        if (status <= 0) {
            if (status == 0)
                break;
            pattern_error(status);
            goto error;
        }

        /* don't bother to build a match object */
        switch (self->groups) {
        case 0:
            b = STATE_OFFSET(&state, state.start);
            e = STATE_OFFSET(&state, state.ptr);
            item = PySequence_GetSlice(string, b, e);
            if (!item)
                goto error;
            break;
        case 1:
            item = state_getslice(&state, 1, string, 1);
            if (!item)
                goto error;
            break;
        default:
            item = PyTuple_New(self->groups);
            if (!item)
                goto error;
            for (i = 0; i < self->groups; i++) {
                PyObject* o = state_getslice(&state, i+1, string, 1);
                if (!o) {
                    Py_DECREF(item);
                    goto error;
                }
                PyTuple_SET_ITEM(item, i, o);
            }
            break;
        }

        status = PyList_Append(list, item);
        Py_DECREF(item);
        if (status < 0)
            goto error;

        if (state.ptr == state.start)
            state.start = (void*) ((char*) state.ptr + state.charsize);
        else
            state.start = state.ptr;

    }

    state_fini(&state);
    return list;

error:
    Py_DECREF(list);
    state_fini(&state);
    return NULL;

}

在27行while循环的条件是 (state.start <= state.end)
就是说当state.start == state.end时也会执行一次
state.start == state.end应该是状态机已处理完了,
但Python还会往List里回一个串。
所以这样就会多出一个空串。
当把while循环的条件改为(state.start < state.end)后就没有最后的空串了。
>>> import re
>>> p=re.compile('s*')
>>> s1=p.findall('')
>>> print(s1)
[]
>>> s2=p.findall('sabc')
>>> print(s2)
['s', '', '', '']
>>> s3=p.findall('abcs')
>>> print(s3)
['', '', '', 's']

转载请保留固定链接: https://linuxeye.com/program/1777.html

------分隔线----------------------------
标签:Pythonfindall
栏目列表
推荐内容