
/* OpenWebSpider
 *
 *  Authors:     Stefano Alimonti AND Stefano Fantin
 *  Version:     0.8
 *  E-Mails:     shen139 [at] openwebspider (dot) org AND stefanofantinguz@yahoo.it
 *
 *
 * This file is part of OpenWebSpider
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 */


#ifndef __MODULEs
#define __MODULEs

#include "platform.h"

static int OwsAddCustomExtension(const char *ext)
{
int i;

	if(!ext || ext[0]==0)
		return 0;

	for(i=0;i<MAXCUSTOMEXTENSIONS;i++)
	{
		if(CustomExtensions[i][0]==0)
			break;

		if(stricmp(CustomExtensions[i],(char*)ext)==0)
			return 1;
	}

	if(i>=MAXCUSTOMEXTENSIONS)
		return 0;

	strncpy(CustomExtensions[i],ext,MAXCUSTOMEXTENSIONSIZE-1);
	CustomExtensions[i][MAXCUSTOMEXTENSIONSIZE-1]=0;
	return 1;
}

static void OwsRegisterModuleExtensions(const char *filename)
{
static const char *imageExts[] = {
	".jpg",".jpeg",".jpe",".jfif",".pjpeg",".png",".apng",".gif",".webp",".svg",
	".bmp",".ico",".cur",".tif",".tiff",".avif",".heic",".heif",".jp2",".j2k",".jpf",".jpx",".jxl","\0"
};
static const char *videoExts[] = {
	".mp4",".webm",".ogv",".mov",".avi",".mkv",".m4v",".flv",".wmv",".asf",
	".ts",".m2ts",".mpeg",".mpg",".mpe",".vob",".3gp",".3g2",
	".mp3",".m4a",".m4b",".aac",".adts",".oga",".ogg",".wav",".flac",".wma",".opus",".weba",
	".aif",".aiff",".mid",".midi",
	".m3u8",".mpd",".vtt",".srt","\0"
};
int i;

	if(!filename)
		return;

	if(my_stristr((char*)filename,"mod_pdf"))
		OwsAddCustomExtension(".pdf");

	if(my_stristr((char*)filename,"mod_image"))
	{
		for(i=0; imageExts[i][0] != '\0'; i++)
			OwsAddCustomExtension(imageExts[i]);
	}

	if(my_stristr((char*)filename,"mod_video"))
	{
		for(i=0; videoExts[i][0] != '\0'; i++)
			OwsAddCustomExtension(videoExts[i]);
	}
}

static int OwsModulePathHasExtension(const char *filename)
{
const char *slash;
const char *dot;

	if(!filename)
		return 0;

	slash = strrchr(filename, '/');
#ifdef WIN32
	{
		const char *backslash = strrchr(filename, '\\');
		if(!slash || (backslash && backslash > slash))
			slash = backslash;
	}
#endif
	dot = strrchr(filename, '.');
	return dot && (!slash || dot > slash);
}

static const char *OwsResolveModuleFilename(const char *filename, char *buffer, size_t bufferSize)
{
	if(!filename || OwsModulePathHasExtension(filename))
		return filename;

	if(bufferSize == 0)
		return filename;

	snprintf(buffer, bufferSize, "%s%s", filename, OWS_MODULE_EXT);
	buffer[bufferSize - 1] = 0;
	return buffer;
}

int myLoadModules(char* filename,void* handler)
{
char* error;
void* modFilterHandler=NULL;
void* modInitHandler=NULL;
char resolvedFilename[1024];
const char *moduleFilename;

	moduleFilename = OwsResolveModuleFilename(filename, resolvedFilename, sizeof(resolvedFilename));

	printf("\nTrying to open module: %s...",moduleFilename);
#ifdef WIN32
/*
http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dllproc/base/loadlibrary.asp
*/
	handler = (HANDLE)LoadLibraryA(moduleFilename);
	if (!handler)
	{
		printf("Error (%lu)\n",(unsigned long)GetLastError());
		exit(0);
	}
#else
/*
dlopen loads a dynamic library from the file named by the null terminated string filename and returns an opaque "handle" for the dynamic library
*/
	handler=dlopen(moduleFilename, RTLD_LAZY);
	if (!handler)
	{
		printf("%s\n\n",dlerror());
		exit(0);
	}
#endif

	printf("OK\n");

	printf("+ Trying to import functions:\n");
	printf(" - modFilter() ...");
	modFilterHandler = myGetProcAddress(handler, "modFilter");
#ifdef WIN32
	if(modFilterHandler==NULL)
#else
	if ((error = dlerror()) != NULL)
#endif
	{
		printf("not found\n");
		printf("No functions loaded... exiting! (Omit '-f' if you don't want to load functions from a module)\n\n");
		exit(0);
	}
	printf("loaded\n");

	printf("   - modFilter() has init function modInitFilter():...");
	modInitHandler = myGetProcAddress(handler, "modInitFilter");
#ifdef WIN32
	if(modInitHandler==NULL)
#else
	if ((error = dlerror()) != NULL)
#endif
	{
		printf("no\n");
	}
	else
	{
		printf("yes\n");
	}

	if(loadedModuleCount >= MAXLOADEDMODULES)
	{
		printf("Too many modules loaded... exiting!\n\n");
		exit(0);
	}

	memset(&loadedModules[loadedModuleCount],0,sizeof(loadedModules[loadedModuleCount]));
	{
		size_t filenameLen = strlen(moduleFilename);
		size_t filenameMax = sizeof(loadedModules[loadedModuleCount].filename)-1;
		if(filenameLen > filenameMax)
			filenameLen = filenameMax;
		memcpy(loadedModules[loadedModuleCount].filename,moduleFilename,filenameLen);
		loadedModules[loadedModuleCount].filename[filenameLen]=0;
	}
	loadedModules[loadedModuleCount].dlHandle = handler;
	loadedModules[loadedModuleCount].handler = modFilterHandler;
	loadedModules[loadedModuleCount].initHandler = modInitHandler;
	loadedModuleCount++;

	OwsRegisterModuleExtensions(moduleFilename);

	return 1;
}

static int OwsModuleFileExists(const char *filename)
{
char resolvedFilename[1024];
const char *moduleFilename;
FILE *file;

	moduleFilename = OwsResolveModuleFilename(filename, resolvedFilename, sizeof(resolvedFilename));
	file = fopen(moduleFilename, "rb");
	if(!file)
		return 0;

	fclose(file);
	return 1;
}

static void OwsLoadDefaultModuleFromKnownPaths(const char *moduleName)
{
char path[512];

	if(!moduleName || moduleName[0]==0)
		return;

	snprintf(path,sizeof(path),"modules/%s/%s",moduleName,moduleName);
	path[sizeof(path)-1]=0;
	if(OwsModuleFileExists(path))
	{
		myLoadModules(path, modHandler);
		return;
	}

	snprintf(path,sizeof(path),"%s",moduleName);
	path[sizeof(path)-1]=0;
	if(OwsModuleFileExists(path))
	{
		myLoadModules(path, modHandler);
		return;
	}

#ifndef WIN32
	snprintf(path,sizeof(path),"/usr/local/lib/openwebspider/%s",moduleName);
	path[sizeof(path)-1]=0;
	if(OwsModuleFileExists(path))
	{
		myLoadModules(path, modHandler);
		return;
	}

	snprintf(path,sizeof(path),"/usr/lib/openwebspider/%s",moduleName);
	path[sizeof(path)-1]=0;
	if(OwsModuleFileExists(path))
		myLoadModules(path, modHandler);
#endif
}

static void OwsEnableAggressiveIndexDefaults(void)
{
static const char *wideMediaExtensions[] = {
	".pdf",
	".ps",".eps",".xps",".oxps",
	".doc",".docx",".docm",".dot",".dotx",".dotm",
	".xls",".xlsx",".xlsm",".xlsb",".xlt",".xltx",".xltm",
	".ppt",".pptx",".pptm",".pps",".ppsx",".ppsm",
	".odt",".ott",".ods",".ots",".odp",".otp",".odg",".otg",".odf",
	".rtf",".txt",".text",".log",".md",".markdown",".rst",".tex",
	".csv",".tsv",".tab",".ics",".vcf",
	".epub",".mobi",".azw",".azw3",".fb2",".djvu",".djv",
	".jpg",".jpeg",".jpe",".jfif",".pjpeg",".png",".apng",".gif",".webp",".svg",
	".bmp",".ico",".cur",".tif",".tiff",".avif",".heic",".heif",".jp2",".j2k",".jpf",".jpx",".jxl",
	".psd",".ai",".indd",".raw",".cr2",".nef",".orf",".sr2",".dng",
	".mp4",".webm",".ogv",".mov",".avi",".mkv",".m4v",".flv",".wmv",".asf",
	".ts",".m2ts",".mts",".mpeg",".mpg",".mpe",".mpv",".m2v",".vob",".3gp",".3g2",
	".f4v",".divx",".rm",".rmvb",".dv",".mxf",
	".mp3",".m4a",".m4b",".aac",".adts",".oga",".ogg",".wav",".flac",".wma",".opus",".weba",
	".aif",".aiff",".aifc",".alac",".amr",".ape",".au",".caf",".mka",".ra",".ac3",".dts",
	".mid",".midi",
	".m3u",".m3u8",".pls",".xspf",".mpd",".ism",".ismc",".f4m",
	".vtt",".srt",".ass",".ssa",".ttml",".dfxp",
	".woff",".woff2",".ttf",".otf",".eot",
	".zip",".tar",".gz",".tgz",".bz2",".tbz",".tbz2",".xz",".txz",".7z",".rar",
	".zst",".br",".lz",".lzma",".cab",".iso",".dmg",
	".css",".map",".js",".mjs",".cjs",".wasm",
	".json",".jsonl",".jsonld",".xml",".rss",".atom",".rdf",".xhtml",".webmanifest",
	".kml",".kmz",".gpx",".geojson",
	".apk",".ipa",".exe",".msi",".deb",".rpm",".pkg",".appimage",
	".torrent",".metalink",".metalink4",
	".bin",".dat",".part",".tmp",
	".sqlite",".sqlite3",".db",
	".msg",".eml",".mbox",
	".blend",".fbx",".obj",".stl",".gltf",".glb",".dae",".3ds",".usd",".usdz",
	".dwg",".dxf",".skp",
	".nc",".fits",".fit",".h5",".hdf5",
	".parquet",".orc",".avro",".feather",
	".ndjson",".har",".warc",".warc.gz",
	".crx",".xpi",".jar",".ear",".war",
	".cer",".crt",".pem",".p7b",".p7c",
	".ics",".ifc",".step",".stp",".iges",".igs",
	".sldprt",".sldasm",".prt",".asm",
	".pages",".numbers",".key",
	".qcp",".spx",".m4r",".m4v",
	"\0"
};
int i;

	for(i=0; wideMediaExtensions[i][0] != '\0'; i++)
		OwsAddCustomExtension(wideMediaExtensions[i]);

	/* Make bare "-i URL" behave like the broadest crawl profile. */
	bAggressiveIndexMode = 1;
	bFreeIndexingMode = 1;
	nRelationships = 2;
	xCacheHtml = 1;
	xCacheHtmlCompressed = 1;
	iCrawlDelay = 0;

	CRAWLER_LIMITS.nMaxPagesPerSite = 0;
	CRAWLER_LIMITS.nMaxDepthLevel = 0;
	CRAWLER_LIMITS.nMaxSecondsPerSite = 0;
	CRAWLER_LIMITS.nMaxBytesPerSite = 0;
	CRAWLER_LIMITS.nMaxErrorPerSite = 0;

	if(nThread == 20 && MAXTHREAD >= 2048)
		nThread = 2048;

	OwsLoadDefaultModuleFromKnownPaths("mod_pdf");
	OwsLoadDefaultModuleFromKnownPaths("mod_image");
	OwsLoadDefaultModuleFromKnownPaths("mod_video");
}

int myUnloadModule(void* handler)
{
#ifdef WIN32
	return FreeLibrary((HANDLE)handler);
#else
/*
dlclose decrements the reference count on the dynamic library handle handle. If the reference count drops to zero and no other loaded libraries use symbols in it, then the dynamic library is unloaded. If the dynamic library exports a routine named _fini, then that routine is called just before the library is unloaded
*/
	return dlclose(handler);
#endif
}

void* myGetProcAddress(void* handler,char* funct)
{
#ifdef WIN32
	return GetProcAddress((HANDLE)handler, funct);
#else
	return dlsym(handler, funct);
#endif
}

void* GetModFunctionHandlerByName(char* functName)
{
	return (loadedModuleCount>0 && strcmp(functName,"modFilter")==0) ? loadedModules[0].handler : NULL;
}

void* GetInitModFunctionHandlerByName(char* functName)
{
	return (loadedModuleCount>0 && strcmp(functName,"modFilter")==0) ? loadedModules[0].initHandler : NULL;
}

unsigned int GetModFunctionHandlerCountByName(char* functName)
{
	if(strcmp(functName,"modFilter")==0)
		return loadedModuleCount;

	return 0;
}

void* GetModFunctionHandlerByNameAt(char* functName,unsigned int index)
{
	if(strcmp(functName,"modFilter")!=0 || index>=loadedModuleCount)
		return NULL;

	return loadedModules[index].handler;
}

char* GetLoadedModuleFilenameByIndex(unsigned int index)
{
	if(index>=loadedModuleCount)
		return NULL;

	return loadedModules[index].filename;
}

unsigned int GetInitModFunctionHandlerCountByName(char* functName)
{
unsigned int i,count=0;

	if(strcmp(functName,"modFilter")!=0)
		return 0;

	for(i=0;i<loadedModuleCount;i++)
		if(loadedModules[i].initHandler)
			count++;

	return count;
}

void* GetInitModFunctionHandlerByNameAt(char* functName,unsigned int index)
{
unsigned int i,count=0;

	if(strcmp(functName,"modFilter")!=0)
		return NULL;

	for(i=0;i<loadedModuleCount;i++)
	{
		if(loadedModules[i].initHandler)
		{
			if(count==index)
				return loadedModules[i].initHandler;
			count++;
		}
	}

	return NULL;
}


#endif
