update text spliter
This commit is contained in:
parent
aead5c0495
commit
a74d428489
@ -224,8 +224,8 @@ class CharacterTextSplitter(TextSplitter):
|
|||||||
splits = _split_text_with_regex(text, self._separator, self._keep_separator)
|
splits = _split_text_with_regex(text, self._separator, self._keep_separator)
|
||||||
_separator = "" if self._keep_separator else self._separator
|
_separator = "" if self._keep_separator else self._separator
|
||||||
_good_splits_lengths = [] # cache the lengths of the splits
|
_good_splits_lengths = [] # cache the lengths of the splits
|
||||||
for split in splits:
|
if splits:
|
||||||
_good_splits_lengths.append(self._length_function(split))
|
_good_splits_lengths.extend(self._length_function(splits))
|
||||||
return self._merge_splits(splits, _separator, _good_splits_lengths)
|
return self._merge_splits(splits, _separator, _good_splits_lengths)
|
||||||
|
|
||||||
|
|
||||||
@ -478,9 +478,8 @@ class RecursiveCharacterTextSplitter(TextSplitter):
|
|||||||
_good_splits = []
|
_good_splits = []
|
||||||
_good_splits_lengths = [] # cache the lengths of the splits
|
_good_splits_lengths = [] # cache the lengths of the splits
|
||||||
_separator = "" if self._keep_separator else separator
|
_separator = "" if self._keep_separator else separator
|
||||||
|
s_lens = self._length_function(splits)
|
||||||
for s in splits:
|
for s, s_len in zip(splits, s_lens):
|
||||||
s_len = self._length_function(s)
|
|
||||||
if s_len < self._chunk_size:
|
if s_len < self._chunk_size:
|
||||||
_good_splits.append(s)
|
_good_splits.append(s)
|
||||||
_good_splits_lengths.append(s_len)
|
_good_splits_lengths.append(s_len)
|
||||||
|
Loading…
Reference in New Issue
Block a user