Java: Simple Parser for Apache HTTP Server access.log
Parser para o access.log do Apache HTTP Server
English: Simple Parser for Apache HTTP Server access.log
If you need to parse rsyslog, you must change variable accessLogType to "rsyslog"
Código / Code
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
*
* @author ebasso
*/
public class SimpleParserApacheLog {
private static String fileAccesslog = "/var/logs/apache/access.log";
private static String accessLogType = "apache"; // For Apache combined
//private static String accessLogType = "rsyslog"; // In case of rsyslog
private static int REQUEST_TIME_GROUP = 0;
private static int CLIENT_REQUESTGROUP = 0;
private static int HTTP_STATUS_CODE_GROUP = 0;
public static void main(String[] args) {
try {
System.out.println("SimpleParserApacheLog: start");
SimpleParserApacheLog myApp = new SimpleParserApacheLog();
// Faz a carga das variaveis de configuração
myApp.processFileAccessLog();
System.out.println("SimpleParserApacheLog: end");
} catch (Exception e) {
e.printStackTrace();
}
}
private void processFileAccessLog() throws Exception {
File file = new File(fileAccesslog);
FileInputStream fis = null;
BufferedReader reader = null;
String lineinput = "";
String response = "";
System.out.println("Read file [" + fileAccesslog + "]");
int countLines = 0;
int countLinesWithDevice = 0;
int countLinesUpdates = 0;
ArrayList<String> listaFull = null;
try {
fis = new FileInputStream(file);
reader = new BufferedReader(new InputStreamReader(fis));
while ((lineinput = reader.readLine()) != null) {
processLine(lineinput);
countLines++;
}
System.out.println("Total Lines: [" + countLines + "]");
} catch (FileNotFoundException e) {
System.out.println("Arquivo [" + fileAccesslog + "] não existe");
} catch (IOException e) {
e.printStackTrace();
} finally {
if (reader != null) {
try {
reader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
private void processLine(String lineInput) throws Exception {
//String clientHost = null;
String requestTime = null;
String clientRequest = null;
String httpStatusCode = null;
//String numOfBytes = null;
//String referer = null;
//String agent = null;
int pos = 0;
String deviceId = null;
Pattern accessLogPattern = Pattern.compile(getAccessLogRegex(), Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
Matcher accessLogEntryMatcher;
accessLogEntryMatcher = accessLogPattern.matcher(lineInput);
if (!accessLogEntryMatcher.matches()) {
System.out.println(" : couldn't be parsed");
}
// String regexA;
// System.out.print("[");
// for (int i = 1; i < 12; i++) {
// regexA = (String) accessLogEntryMatcher.group(i);
// System.out.print("regex" + i + "=" + regexA + "||");
// }
// System.out.println("]");
System.out.print("[");
System.out.print((String) accessLogEntryMatcher.group(REQUEST_TIME_GROUP) + "; ");
System.out.print( (String) accessLogEntryMatcher.group(CLIENT_REQUESTGROUP)+ "; ");
System.out.print( (String) accessLogEntryMatcher.group(HTTP_STATUS_CODE_GROUP));
System.out.println("]");
}
private String getAccessLogRegex() throws Exception {
String myRegex = "";
if (accessLogType.equals("apache")) {
// 172.18.10.37 - - [12/Aug/2015:07:18:45 -0300] "GET
String regex1 = "^([\\d.]+)"; // Client IP
String regex2 = " (\\S+)"; // -
String regex3 = " (\\S+)"; // -
String regex4 = " \\[([\\w:/]+\\s[+\\-]\\d{4})\\]"; // Date
String regex5 = " \"(.+?)\""; // request method and url
String regex6 = " (\\d{3})"; // HTTP code
String regex7 = " (\\d+|(.+?))"; // Number of bytes
String regex8 = " \"([^\"]+|(.+?))\""; // Referer
String regex9 = " \"([^\"]+|(.+?))\""; // Agent
REQUEST_TIME_GROUP = 4;
CLIENT_REQUESTGROUP = 5;
HTTP_STATUS_CODE_GROUP = 6;
myRegex = regex1 + regex2 + regex3 + regex4 + regex5 + regex6 + regex7 + regex8 + regex9;
} else if (accessLogType.equals("rsyslog")) {
//Aug 11 03:33:50 server1 servidoreshttp: 186.204.167.161 - [email protected] - "POST
String regex1 = "^(\\S+ \\d{2} \\S+)"; // Date
String regex2 = " (\\S+)"; // server1:
String regex3 = " (\\S+)"; // servidoreshttp:
String regex4 = " ([\\d.]+)"; // Client IP
String regex5 = " (\\S+)"; // -
String regex6 = " (\\S+)"; // usuario
String regex7 = " (\\S+)"; // -
String regex8 = " \"(.+?)\""; // request method and url
String regex9 = " (\\d{3})"; // HTTP code
String regex10 = " (\\d+|(.+?))"; // Number of bytes
String regex11 = " \"([^\"]+|(.+?))\""; // Referer
String regex12 = " \"([^\"]+|(.+?))\""; // Agent
REQUEST_TIME_GROUP = 1;
CLIENT_REQUESTGROUP = 8;
HTTP_STATUS_CODE_GROUP = 9;
myRegex = regex1 + regex2 + regex3 + regex4 + regex5 + regex6 + regex7 + regex8 + regex9 + regex10 + regex11 + regex12;
}
return myRegex;
}
}
Running
java SimpleParserApacheLog
SimpleParserApacheLog: start Read file [/var/logs/apache/access.log] [Aug 11 03:33:50; GET /foo.html HTTP/1.1; 200] [Aug 11 03:33:50; POST /[email protected]&DeviceId=RP3NVFADFADFADFADFDSAFAFA8 HTTP/1.1; 200] ... [Aug 11 03:34:50; POST /index.php?action=update&deviceId=Android_f570dlkjadlfncc0 HTTP/1.1; 200] Total Lines: [10] SimpleParserApacheLog: end