I_ACCESS_DELAY |
TREXS_CRUISER-COUNTER |
N |
10 |
0 |
X |
time between 2 HTTP gets in ms |
I_CRAWLING_ALGORITHM |
TREXS_CRUISER-NUMC1 |
N |
1 |
0 |
X |
0=down the tree; 1= same server; 2=same domain; 3 = top level domain, 4= free |
I_CREATE_SUSPENDED |
TREXS_CRUISER-NUMC1 |
N |
1 |
0 |
X |
1=cruise is suspended after creatation |
I_CRUISE_DOCUMENT_INFO |
TREXS_CRUISE_DOCUMENT_INFO |
v |
24 |
|
|
meta information like seedurl etc. |
I_CRUISE_EXTENSION_LIST |
TREXT_CRUISE_EXTENSION_LIST |
h |
16 |
|
X |
TREX cruiser extension list for parameters |
I_CRUISE_NAME |
TREXS_CRUISER-CRUISE_NAME |
C |
64 |
|
|
name of the crawl / cruise |
I_CRUISE_PP_EXTENSION_LIST |
TREXT_CRUISE_EXTENSION_LIST |
h |
16 |
|
X |
TREX cruiser extension list for parameters |
I_DEINDEX_FLAG |
TREXS_CRUISER-NUMC1 |
N |
1 |
1 |
X |
1= deindex documents which got an error during update crawl |
I_DOCKEY_CONVERSION_LIST |
TREXT_CRUISE_LIST |
h |
16 |
|
X |
cruiser list for convertion (e.g. document keys) |
I_DONT_INDEX_DIRS |
TREXS_CRUISER-NUMC1 |
N |
1 |
0 |
X |
1=do not index directories |
I_EXCLUDE_DIRECTORIES |
TREXT_EXCLUDE_DIRECTORIES |
h |
8 |
|
X |
directories which must not be craweld |
I_FILE_PATH |
TREXS_CRUISER-STRING_FIELD |
g |
8 |
|
X |
connector type = 1 (file) where documents will be stored |
I_GET_ACCESS_RIGHTS |
TREXS_CRUISER-NUMC1 |
N |
1 |
0 |
X |
1= extract ACLs from files |
I_HOST_INFORMATION |
TREXT_CRUISE_HOST_INFORMATION |
h |
280 |
|
X |
TREX cruiser host information |
I_INDEX_ID |
TREX_RFC-INDEX_ID |
C |
64 |
|
X |
indexid |
I_IS_MULTILANGUAGE_INDEX |
TREXS_CRUISER-NUMC1 |
N |
1 |
0 |
X |
1 = using multi language index |
I_LANGUAGE |
LAISO |
C |
2 |
|
X |
Language according to ISO 639 |
I_MAX_DEPTH |
TREXS_CRUISER-COUNTER |
N |
10 |
0 |
X |
maximal depth which will be crawled |
I_MAX_DOCSIZE |
TREXS_CRUISER-COUNTER |
N |
10 |
20000000 |
X |
maximal size in bytes which will be crawled |
I_MAX_RETRY_COUNT |
TREXS_CRUISER-COUNTER |
N |
10 |
3 |
X |
max retry of HTTP gets |
I_MIN_DOCSIZE |
TREXS_CRUISER-COUNTER |
N |
10 |
1 |
X |
minimal size in bytes which will be crawled |
I_NEGATIVE_FILE_EXTENSION |
TREXT_NEGATIVE_FILE_EXTENSIONS |
h |
8 |
|
X |
list of negative files extensions which will NOT be crawled |
I_OPTIMIZE_EVERY |
TREXS_CRUISER-COUNTER |
N |
10 |
1 |
X |
call an optimize after indexing n documents |
I_POSITIVE_FILE_EXTENSIONS |
TREXT_POSITIVE_FILE_EXTENSIONS |
h |
8 |
|
X |
list of positive files extensions which will be crawled |
I_PREPROCESSOR_POOL_SIZE |
TREXS_CRUISER-POOL_SIZE |
N |
2 |
3 |
X |
number of parallel requests to the PreProcessor processes |
I_PYTHON_COMMAND_ARGS |
TREXS_CRUISER-STRING_FIELD |
g |
8 |
|
X |
Python command for scheduled execution |
I_REGULAR_EXPRESSION |
TREXT_CRUISER_REG_EXPRESSION |
h |
8 |
|
X |
regular expressions for the TREX cruiser/crawler |
I_RESULT_CONNECTOR |
TREXT_CRUISE_RESULT_CONNECTOR |
h |
80 |
|
X |
TREX cruiser result connector |
I_RESULT_CONNECTOR_TYPE |
TREXS_CRUISER-NUMC1 |
N |
1 |
0 |
X |
(default )0=TREX index, 1=file, 2=dummy for python commands |
I_RFC_DESTINATION |
TREX_RFC-RFC_DESTINATION |
C |
32 |
|
|
RFC destination to TREX |
I_SCHEDULE_TIME |
TREXS_CRUISER-STRING_FIELD |
g |
8 |
|
X |
if not empty, scheduler will be used |
I_USER_AGENT |
TREXS_CRUISER-STRING_FIELD |
g |
8 |
'TREX' |
X |
user agent for HTTP get |
I_USE_FREESTYLE_CONTAINER |
TREXS_CRUISER-NUMC1 |
N |
1 |
0 |
X |
1= concatenate all attribute values to one |
I_USE_QUEUESERVER |
TREXS_CRUISER-NUMC1 |
N |
1 |
1 |
X |
X=using queue server for crawling / indexing |
I_USE_RAPTOR_ATTRIBUTES |
TREXS_CRUISER-NUMC1 |
N |
1 |
1 |
X |
extracting of standard attributes |
I_USE_ROBOTRULES |
TREXS_CRUISER-NUMC1 |
N |
1 |
0 |
X |
1 = crawler is using robot rules |