I_ACCESS_DELAY |
TREXS_CRUISER-COUNTER |
N |
10 |
0 |
X |
time between 2 HTTP gets in ms |
I_CRUISE_NAME |
TREXS_CRUISER-CRUISE_NAME |
C |
64 |
|
|
name of the crawl / cruise |
I_EXCLUDE_DIRECTORIES |
TREXT_EXCLUDE_DIRECTORIES |
h |
8 |
|
X |
directories which must not be craweld |
I_HOST_INFORMATION |
TREXT_CRUISE_HOST_INFORMATION |
h |
280 |
|
X |
TREX cruiser host information |
I_MAX_DEPTH |
TREXS_CRUISER-COUNTER |
N |
10 |
1 |
X |
maximal depth which will be crawled |
I_MAX_DOCSIZE |
TREXS_CRUISER-COUNTER |
N |
10 |
20000000 |
X |
maximal size in bytes which will be crawled |
I_MAX_RETRY_COUNT |
TREXS_CRUISER-COUNTER |
N |
10 |
3 |
X |
max retry of HTTP gets |
I_MIN_DOCSIZE |
TREXS_CRUISER-COUNTER |
N |
10 |
1 |
X |
minimal size in bytes which will be crawled |
I_NEGATIVE_FILE_EXTENSION |
TREXT_NEGATIVE_FILE_EXTENSIONS |
h |
8 |
|
X |
list of negative files extensions which will NOT be crawled |
I_POSITIVE_FILE_EXTENSIONS |
TREXT_POSITIVE_FILE_EXTENSIONS |
h |
8 |
|
X |
list of positive files extensions which will be crawled |
I_PREPROCESSOR_POOL_SIZE |
TREXS_CRUISER-POOL_SIZE |
N |
2 |
0 |
X |
number of parallel requests to the PreProcessor processes |
I_REGULAR_EXPRESSION |
TREXT_CRUISER_REG_EXPRESSION |
h |
8 |
|
X |
regular expressions for the TREX cruiser/crawler |
I_RESET_PARAMETERS |
INT4 |
I |
4 |
|
X |
1=Regexp;2=Excldirs;3=PosFileExts;8=NegFileExts |
I_RFC_DESTINATION |
TREX_RFC-RFC_DESTINATION |
C |
32 |
|
|
RFC destination to TREX |
I_RUN_MODE |
TREXS_CRUISER-RUN_MODE |
N |
1 |
|
|
1=update,2=continus,3=default,4=recrwal,5=repair, 6=full,7 |
I_SCHEDULE_TIME |
TREXS_CRUISER-STRING_FIELD |
g |
8 |
|
X |
if not empty, scheduler will be used |
I_USER_AGENT |
TREXS_CRUISER-STRING_FIELD |
g |
8 |
'TREX' |
X |
user agent for HTTP get |