>python Python 2.7.3 (default, Apr 10 2012, 23:24:47) [MSC v.1500 64 bit (AMD64)] on win32 Type "help", "copyright", "credits" or "license" for more information. >>> import re >>> p=re.compile('s*') >>> s1=p.findall('') >>> print s1 [''] >>> s2=p.findall('sabc') >>> print s2 ['s', '', '', '', ''] >>> s3=p.findall('abcs') >>> print s3 ['', '', '', 's', ''] >>>1、匹配一个空串,返回包含一个空值的列表。 2、匹配’sabc’,返回包含五个值的列表。 3、匹配’abcs’,返回包含五个值的列表。 findall的c代码: static PyObject* pattern_findall(PatternObject* self, PyObject* args, PyObject* kw) { SRE_STATE state; PyObject* list; int status; Py_ssize_t i, b, e; PyObject* string; Py_ssize_t start = 0; Py_ssize_t end = PY_SSIZE_T_MAX; static char* kwlist[] = { "source", "pos", "endpos", NULL }; if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:findall", kwlist, &string, &start, &end)) return NULL; string = state_init(&state, self, string, start, end); if (!string) return NULL; list = PyList_New(0); if (!list) { state_fini(&state); return NULL; } while (state.start <= state.end) { PyObject* item; state_reset(&state); state.ptr = state.start; if (state.logical_charsize == 1) { status = sre_search(&state, PatternObject_GetCode(self)); } else { status = sre_usearch(&state, PatternObject_GetCode(self)); } if (PyErr_Occurred()) goto error; if (status <= 0) { if (status == 0) break; pattern_error(status); goto error; } /* don't bother to build a match object */ switch (self->groups) { case 0: b = STATE_OFFSET(&state, state.start); e = STATE_OFFSET(&state, state.ptr); item = PySequence_GetSlice(string, b, e); if (!item) goto error; break; case 1: item = state_getslice(&state, 1, string, 1); if (!item) goto error; break; default: item = PyTuple_New(self->groups); if (!item) goto error; for (i = 0; i < self->groups; i++) { PyObject* o = state_getslice(&state, i+1, string, 1); if (!o) { Py_DECREF(item); goto error; } PyTuple_SET_ITEM(item, i, o); } break; } status = PyList_Append(list, item); Py_DECREF(item); if (status < 0) goto error; if (state.ptr == state.start) state.start = (void*) ((char*) state.ptr + state.charsize); else state.start = state.ptr; } state_fini(&state); return list; error: Py_DECREF(list); state_fini(&state); return NULL; } 在27行while循环的条件是 (state.start <= state.end) 就是说当state.start == state.end时也会执行一次 state.start == state.end应该是状态机已处理完了, 但Python还会往List里回一个串。 所以这样就会多出一个空串。 当把while循环的条件改为(state.start < state.end)后就没有最后的空串了。 >>> import re >>> p=re.compile('s*') >>> s1=p.findall('') >>> print(s1) [] >>> s2=p.findall('sabc') >>> print(s2) ['s', '', '', ''] >>> s3=p.findall('abcs') >>> print(s3) ['', '', '', 's'] 转载请保留固定链接: https://linuxeye.com/program/1777.html |