restructure parser

This commit is contained in:
Christian Schabesberger 2016-02-02 18:43:20 +01:00
parent bad576c23d
commit d097363b24
12 changed files with 365 additions and 274 deletions

View file

@ -72,7 +72,7 @@ public class VideoItemDetailActivity extends AppCompatActivity {
StreamingService[] serviceList = ServiceList.getServices();
//VideoExtractor videoExtractor = null;
for (int i = 0; i < serviceList.length; i++) {
if (serviceList[i].acceptUrl(videoUrl)) {
if (serviceList[i].getUrlIdHandler().acceptUrl(videoUrl)) {
arguments.putInt(VideoItemDetailFragment.STREAMING_SERVICE, i);
currentStreamingService = i;
//videoExtractor = ServiceList.getService(i).getExtractorInstance();

View file

@ -112,7 +112,7 @@ public class VideoItemDetailFragment extends Fragment {
public void run() {
try {
videoExtractor = service.getExtractorInstance(videoUrl, new Downloader());
VideoInfo videoInfo = videoExtractor.getVideoInfo();
VideoInfo videoInfo = VideoInfo.getVideoInfo(videoExtractor, new Downloader());
h.post(new VideoResultReturnedRunnable(videoInfo));
h.post(new SetThumbnailRunnable(
//todo: make bitmaps not bypass tor

View file

@ -0,0 +1,101 @@
package org.schabi.newpipe.crawler;
import android.util.Xml;
import org.xmlpull.v1.XmlPullParser;
import java.io.IOException;
import java.io.StringReader;
import java.util.Vector;
/**
* Created by Christian Schabesberger on 02.02.16.
*
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
* DashMpdParser.java is part of NewPipe.
*
* NewPipe is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* NewPipe is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/
public class DashMpdParser {
static class DashMpdParsingException extends ParsingException {
DashMpdParsingException(String message, Exception e) {
super(message, e);
}
}
public static VideoInfo.AudioStream[] getAudioStreams(String dashManifestUrl,
Downloader downloader)
throws DashMpdParsingException {
String dashDoc;
try {
dashDoc = downloader.download(dashManifestUrl);
} catch(IOException ioe) {
throw new DashMpdParsingException("Could not get dash mpd: " + dashManifestUrl, ioe);
}
Vector<VideoInfo.AudioStream> audioStreams = new Vector<>();
try {
XmlPullParser parser = Xml.newPullParser();
parser.setInput(new StringReader(dashDoc));
String tagName = "";
String currentMimeType = "";
int currentBandwidth = -1;
int currentSamplingRate = -1;
boolean currentTagIsBaseUrl = false;
for(int eventType = parser.getEventType();
eventType != XmlPullParser.END_DOCUMENT;
eventType = parser.next() ) {
switch(eventType) {
case XmlPullParser.START_TAG:
tagName = parser.getName();
if(tagName.equals("AdaptationSet")) {
currentMimeType = parser.getAttributeValue(XmlPullParser.NO_NAMESPACE, "mimeType");
} else if(tagName.equals("Representation") && currentMimeType.contains("audio")) {
currentBandwidth = Integer.parseInt(
parser.getAttributeValue(XmlPullParser.NO_NAMESPACE, "bandwidth"));
currentSamplingRate = Integer.parseInt(
parser.getAttributeValue(XmlPullParser.NO_NAMESPACE, "audioSamplingRate"));
} else if(tagName.equals("BaseURL")) {
currentTagIsBaseUrl = true;
}
break;
case XmlPullParser.TEXT:
if(currentTagIsBaseUrl &&
(currentMimeType.contains("audio"))) {
int format = -1;
if(currentMimeType.equals(MediaFormat.WEBMA.mimeType)) {
format = MediaFormat.WEBMA.id;
} else if(currentMimeType.equals(MediaFormat.M4A.mimeType)) {
format = MediaFormat.M4A.id;
}
audioStreams.add(new VideoInfo.AudioStream(parser.getText(),
format, currentBandwidth, currentSamplingRate));
}
break;
case XmlPullParser.END_TAG:
if(tagName.equals("AdaptationSet")) {
currentMimeType = "";
} else if(tagName.equals("BaseURL")) {
currentTagIsBaseUrl = false;
}//no break needed here
}
}
} catch(Exception e) {
throw new DashMpdParsingException("Could not parse Dash mpd", e);
}
return audioStreams.toArray(new VideoInfo.AudioStream[audioStreams.size()]);
}
}

View file

@ -0,0 +1,47 @@
package org.schabi.newpipe.crawler;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Created by Christian Schabesberger on 02.02.16.
*
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
* RegexHelper.java is part of NewPipe.
*
* NewPipe is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* NewPipe is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/
/** avoid using regex !!! */
public class RegexHelper {
public static class RegexException extends ParsingException {
public RegexException(String message) {
super(message);
}
}
public static String matchGroup1(String pattern, String input) throws RegexException {
Pattern pat = Pattern.compile(pattern);
Matcher mat = pat.matcher(input);
boolean foundMatch = mat.find();
if (foundMatch) {
return mat.group(1);
}
else {
//Log.e(TAG, "failed to find pattern \""+pattern+"\" inside of \""+input+"\"");
throw new RegexException("failed to find pattern \""+pattern+" inside of "+input+"\"");
}
}
}

View file

@ -27,11 +27,11 @@ public interface StreamingService {
public String name = "";
}
ServiceInfo getServiceInfo();
VideoExtractor getExtractorInstance(String url, Downloader downloader) throws IOException, CrawlingException;
VideoExtractor getExtractorInstance(String url, Downloader downloader)
throws IOException, CrawlingException;
SearchEngine getSearchEngineInstance();
/**When a VIEW_ACTION is caught this function will test if the url delivered within the calling
Intent was meant to be watched with this Service.
Return false if this service shall not allow to be called through ACTIONs.*/
boolean acceptUrl(String videoUrl);
UrlIdHandler getUrlIdHandler();
}

View file

@ -0,0 +1,32 @@
package org.schabi.newpipe.crawler;
/**
* Created by Christian Schabesberger on 02.02.16.
*
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
* UrlIdHandler.java is part of NewPipe.
*
* NewPipe is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* NewPipe is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/
public interface UrlIdHandler {
String getVideoUrl(String videoId);
String getVideoId(String siteUrl) throws ParsingException;
String cleanUrl(String siteUrl) throws ParsingException;
/**When a VIEW_ACTION is caught this function will test if the url delivered within the calling
Intent was meant to be watched with this Service.
Return false if this service shall not allow to be called through ACTIONs.*/
boolean acceptUrl(String videoUrl);
}

View file

@ -20,14 +20,14 @@ package org.schabi.newpipe.crawler;
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/
import java.util.List;
import java.net.URL;
import java.util.Vector;
/**Scrapes information from a video streaming service (eg, YouTube).*/
@SuppressWarnings("ALL")
public abstract class VideoExtractor {
public interface VideoExtractor {
public class ExctractorInitException extends CrawlingException {
public ExctractorInitException() {}
@ -42,13 +42,6 @@ public abstract class VideoExtractor {
}
}
public class RegexException extends ParsingException {
public RegexException() {}
public RegexException(String message) {
super(message);
}
}
public class ContentNotAvailableException extends ParsingException {
public ContentNotAvailableException() {}
public ContentNotAvailableException(String message) {
@ -62,111 +55,6 @@ public abstract class VideoExtractor {
}
}
protected final String pageUrl;
protected VideoInfo videoInfo;
@SuppressWarnings("WeakerAccess")
public VideoExtractor(String url, Downloader dl) {
this.pageUrl = url;
}
/**Fills out the video info fields which are common to all services.
* Probably needs to be overridden by subclasses*/
public VideoInfo getVideoInfo() throws CrawlingException
{
if(videoInfo == null) {
videoInfo = new VideoInfo();
}
if(videoInfo.webpage_url.isEmpty()) {
videoInfo.webpage_url = pageUrl;
}
if (videoInfo.title.isEmpty()) {
videoInfo.title = getTitle();
}
if (videoInfo.duration < 1) {
videoInfo.duration = getLength();
}
if (videoInfo.uploader.isEmpty()) {
videoInfo.uploader = getUploader();
}
if (videoInfo.description.isEmpty()) {
videoInfo.description = getDescription();
}
if (videoInfo.view_count == -1) {
videoInfo.view_count = getViews();
}
if (videoInfo.upload_date.isEmpty()) {
videoInfo.upload_date = getUploadDate();
}
if (videoInfo.thumbnail_url.isEmpty()) {
videoInfo.thumbnail_url = getThumbnailUrl();
}
if (videoInfo.id.isEmpty()) {
videoInfo.id = getVideoId(pageUrl);
}
/** Load and extract audio*/
if (videoInfo.audioStreams == null) {
videoInfo.audioStreams = getAudioStreams();
}
/** Extract video stream url*/
if (videoInfo.videoStreams == null) {
videoInfo.videoStreams = getVideoStreams();
}
if (videoInfo.uploader_thumbnail_url.isEmpty()) {
videoInfo.uploader_thumbnail_url = getUploaderThumbnailUrl();
}
if (videoInfo.startPosition < 0) {
videoInfo.startPosition = getTimeStamp();
}
if(videoInfo.dashMpdUrl.isEmpty()) {
videoInfo.dashMpdUrl = getDashMpdUrl();
}
if(videoInfo.average_rating.isEmpty()) {
videoInfo.average_rating = getAverageRating();
}
if(videoInfo.like_count == -1) {
videoInfo.like_count = getLikeCount();
}
if(videoInfo.dislike_count == -1) {
videoInfo.dislike_count = getDislikeCount();
}
if(videoInfo.nextVideo == null) {
videoInfo.nextVideo = getNextVideo();
}
if(videoInfo.relatedVideos == null) {
videoInfo.relatedVideos = getRelatedVideos();
}
//Bitmap thumbnail = null;
//Bitmap uploader_thumbnail = null;
//int videoAvailableStatus = VIDEO_AVAILABLE;
return videoInfo;
}
//todo: remove these functions, or make them static, otherwise its useles, to have them here
public abstract String getVideoUrl(String videoId);
public abstract String getVideoId(String siteUrl) throws ParsingException;
///////////////////////////////////////////////////////////////////////////////////////////
public abstract int getTimeStamp() throws ParsingException;
public abstract String getTitle() throws ParsingException;
public abstract String getDescription() throws ParsingException;
@ -185,4 +73,6 @@ public abstract class VideoExtractor {
public abstract int getDislikeCount() throws ParsingException;
public abstract VideoPreviewInfo getNextVideo() throws ParsingException;
public abstract Vector<VideoPreviewInfo> getRelatedVideos() throws ParsingException;
public abstract UrlIdHandler getUrlIdConverter();
public abstract String getPageUrl();
}

View file

@ -1,5 +1,6 @@
package org.schabi.newpipe.crawler;
import java.io.IOException;
import java.util.List;
/**
@ -26,8 +27,52 @@ import java.util.List;
@SuppressWarnings("ALL")
public class VideoInfo extends AbstractVideoInfo {
/**Fills out the video info fields which are common to all services.
* Probably needs to be overridden by subclasses*/
public static VideoInfo getVideoInfo(VideoExtractor extractor, Downloader downloader)
throws CrawlingException, IOException {
VideoInfo videoInfo = new VideoInfo();
UrlIdHandler uiconv = extractor.getUrlIdConverter();
videoInfo.webpage_url = extractor.getPageUrl();
videoInfo.title = extractor.getTitle();
videoInfo.duration = extractor.getLength();
videoInfo.uploader = extractor.getUploader();
videoInfo.description = extractor.getDescription();
videoInfo.view_count = extractor.getViews();
videoInfo.upload_date = extractor.getUploadDate();
videoInfo.thumbnail_url = extractor.getThumbnailUrl();
videoInfo.id = uiconv.getVideoId(extractor.getPageUrl());
videoInfo.dashMpdUrl = extractor.getDashMpdUrl();
/** Load and extract audio*/
videoInfo.audioStreams = extractor.getAudioStreams();
if(videoInfo.dashMpdUrl != null && !videoInfo.dashMpdUrl.isEmpty()) {
if(videoInfo.audioStreams == null || videoInfo.audioStreams.length == 0) {
videoInfo.audioStreams =
DashMpdParser.getAudioStreams(videoInfo.dashMpdUrl, downloader);
}
}
/** Extract video stream url*/
videoInfo.videoStreams = extractor.getVideoStreams();
videoInfo.uploader_thumbnail_url = extractor.getUploaderThumbnailUrl();
videoInfo.startPosition = extractor.getTimeStamp();
videoInfo.average_rating = extractor.getAverageRating();
videoInfo.like_count = extractor.getLikeCount();
videoInfo.dislike_count = extractor.getDislikeCount();
videoInfo.nextVideo = extractor.getNextVideo();
videoInfo.relatedVideos = extractor.getRelatedVideos();
//Bitmap thumbnail = null;
//Bitmap uploader_thumbnail = null;
//int videoAvailableStatus = VIDEO_AVAILABLE;
return videoInfo;
}
public String uploader_thumbnail_url = "";
public String description = "";
/*todo: make this lists over vectors*/
public VideoStream[] videoStreams = null;
public AudioStream[] audioStreams = null;
// video streams provided by the dash mpd do not need to be provided as VideoStream.

View file

@ -52,7 +52,8 @@ public class YoutubeSearchEngine implements SearchEngine {
private static final String TAG = YoutubeSearchEngine.class.toString();
@Override
public Result search(String query, int page, String languageCode, Downloader downloader) throws IOException, ParsingException {
public Result search(String query, int page, String languageCode, Downloader downloader)
throws IOException, ParsingException {
Result result = new Result();
Uri.Builder builder = new Uri.Builder();
builder.scheme("https")
@ -171,7 +172,8 @@ public class YoutubeSearchEngine implements SearchEngine {
try {
dBuilder = dbFactory.newDocumentBuilder();
doc = dBuilder.parse(new InputSource(new ByteArrayInputStream(response.getBytes("utf-8"))));
doc = dBuilder.parse(new InputSource(
new ByteArrayInputStream(response.getBytes("utf-8"))));
doc.getDocumentElement().normalize();
} catch (ParserConfigurationException | SAXException | IOException e) {
e.printStackTrace();

View file

@ -3,6 +3,7 @@ package org.schabi.newpipe.crawler.services.youtube;
import org.schabi.newpipe.crawler.CrawlingException;
import org.schabi.newpipe.crawler.Downloader;
import org.schabi.newpipe.crawler.StreamingService;
import org.schabi.newpipe.crawler.UrlIdHandler;
import org.schabi.newpipe.crawler.VideoExtractor;
import org.schabi.newpipe.crawler.SearchEngine;
@ -37,8 +38,10 @@ public class YoutubeService implements StreamingService {
return serviceInfo;
}
@Override
public VideoExtractor getExtractorInstance(String url, Downloader downloader) throws CrawlingException, IOException {
if(acceptUrl(url)) {
public VideoExtractor getExtractorInstance(String url, Downloader downloader)
throws CrawlingException, IOException {
UrlIdHandler urlIdHandler = new YoutubeUrlIdHandler();
if(urlIdHandler.acceptUrl(url)) {
return new YoutubeVideoExtractor(url, downloader) ;
}
else {
@ -49,9 +52,9 @@ public class YoutubeService implements StreamingService {
public SearchEngine getSearchEngineInstance() {
return new YoutubeSearchEngine();
}
@Override
public boolean acceptUrl(String videoUrl) {
return videoUrl.contains("youtube") ||
videoUrl.contains("youtu.be");
public UrlIdHandler getUrlIdHandler() {
return new YoutubeUrlIdHandler();
}
}

View file

@ -0,0 +1,68 @@
package org.schabi.newpipe.crawler.services.youtube;
import org.schabi.newpipe.crawler.ParsingException;
import org.schabi.newpipe.crawler.RegexHelper;
import org.schabi.newpipe.crawler.UrlIdHandler;
/**
* Created by Christian Schabesberger on 02.02.16.
*
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
* YoutubeUrlIdHandler.java is part of NewPipe.
*
* NewPipe is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* NewPipe is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/
public class YoutubeUrlIdHandler implements UrlIdHandler {
@SuppressWarnings("WeakerAccess")
@Override
public String getVideoUrl(String videoId) {
return "https://www.youtube.com/watch?v=" + videoId;
}
@SuppressWarnings("WeakerAccess")
@Override
public String getVideoId(String url) throws ParsingException {
String id;
String pat;
if(url.contains("youtube")) {
pat = "youtube\\.com/watch\\?v=([\\-a-zA-Z0-9_]{11})";
}
else if(url.contains("youtu.be")) {
pat = "youtu\\.be/([a-zA-Z0-9_-]{11})";
}
else {
throw new ParsingException("Error no suitable url: " + url);
}
id = RegexHelper.matchGroup1(pat, url);
if(!id.isEmpty()){
//Log.i(TAG, "string \""+url+"\" matches!");
return id;
} else {
throw new ParsingException("Error could not parse url: " + url);
}
}
public String cleanUrl(String complexUrl) throws ParsingException {
return getVideoUrl(getVideoId(complexUrl));
}
@Override
public boolean acceptUrl(String videoUrl) {
return videoUrl.contains("youtube") ||
videoUrl.contains("youtu.be");
}
}

View file

@ -15,6 +15,8 @@ import org.mozilla.javascript.ScriptableObject;
import org.schabi.newpipe.crawler.CrawlingException;
import org.schabi.newpipe.crawler.Downloader;
import org.schabi.newpipe.crawler.ParsingException;
import org.schabi.newpipe.crawler.RegexHelper;
import org.schabi.newpipe.crawler.UrlIdHandler;
import org.schabi.newpipe.crawler.VideoExtractor;
import org.schabi.newpipe.crawler.MediaFormat;
import org.schabi.newpipe.crawler.VideoInfo;
@ -25,11 +27,8 @@ import java.io.IOException;
import java.io.StringReader;
import java.net.URLDecoder;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Vector;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Created by Christian Schabesberger on 06.08.15.
@ -51,7 +50,7 @@ import java.util.regex.Pattern;
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/
public class YoutubeVideoExtractor extends VideoExtractor {
public class YoutubeVideoExtractor implements VideoExtractor {
public class DecryptException extends ParsingException {
DecryptException(Throwable cause) {
@ -75,7 +74,6 @@ public class YoutubeVideoExtractor extends VideoExtractor {
private static final String TAG = YoutubeVideoExtractor.class.toString();
private final Document doc;
private JSONObject playerArgs;
private String errorMessage = "";
// static values
private static final String DECRYPTION_FUNC_NAME="decrypt";
@ -83,23 +81,27 @@ public class YoutubeVideoExtractor extends VideoExtractor {
// cached values
private static volatile String decryptionCode = "";
UrlIdHandler urlidhandler = new YoutubeUrlIdHandler();
String pageUrl = "";
private Downloader downloader;
public YoutubeVideoExtractor(String pageUrl, Downloader dl) throws CrawlingException, IOException {
//most common videoInfo fields are now set in our superclass, for all services
super(pageUrl, dl);
downloader = dl;
String pageContent = downloader.download(cleanUrl(pageUrl));
this.pageUrl = urlidhandler.cleanUrl(pageUrl);
String pageContent = downloader.download(this.pageUrl);
doc = Jsoup.parse(pageContent, pageUrl);
String ytPlayerConfigRaw;
JSONObject ytPlayerConfig;
//attempt to load the youtube js player JSON arguments
try {
ytPlayerConfigRaw = matchGroup1("ytplayer.config\\s*=\\s*(\\{.*?\\});", pageContent);
ytPlayerConfigRaw =
RegexHelper.matchGroup1("ytplayer.config\\s*=\\s*(\\{.*?\\});", pageContent);
ytPlayerConfig = new JSONObject(ytPlayerConfigRaw);
playerArgs = ytPlayerConfig.getJSONObject("args");
} catch (RegexException e) {
} catch (RegexHelper.RegexException e) {
String errorReason = findErrorReason(doc);
switch(errorReason) {
case "GEMA":
@ -233,7 +235,16 @@ public class YoutubeVideoExtractor extends VideoExtractor {
@Override
public String getDashMpdUrl() throws ParsingException {
try {
return playerArgs.getString("dashmpd");
String dashManifest = playerArgs.getString("dashmpd");
if(!dashManifest.contains("/signature/")) {
String encryptedSig = RegexHelper.matchGroup1("/s/([a-fA-F0-9\\.]+)", dashManifest);
String decryptedSig;
decryptedSig = decryptSignature(encryptedSig, decryptionCode);
dashManifest = dashManifest.replace("/s/" + encryptedSig, "/signature/" + decryptedSig);
}
return dashManifest;
} catch(NullPointerException e) {
throw new ParsingException(
"Could not find \"dashmpd\" upon the player args (maybe no dash manifest available).", e);
@ -244,15 +255,8 @@ public class YoutubeVideoExtractor extends VideoExtractor {
@Override
public VideoInfo.AudioStream[] getAudioStreams() throws ParsingException {
try {
String dashManifest = playerArgs.getString("dashmpd");
return parseDashManifest(dashManifest, decryptionCode);
} catch (NullPointerException e) {
throw new ParsingException(
"Could not find \"dashmpd\" upon the player args (maybe no dash manifest available).", e);
} catch (Exception e) {
throw new ParsingException(e);
}
/* If we provide a valid dash manifest, we don't need to provide audio streams extra */
return null;
}
@Override
@ -300,37 +304,6 @@ public class YoutubeVideoExtractor extends VideoExtractor {
return videoStreams.toArray(new VideoInfo.VideoStream[videoStreams.size()]);
}
@SuppressWarnings("WeakerAccess")
@Override
public String getVideoId(String url) throws ParsingException {
String id;
String pat;
if(url.contains("youtube")) {
pat = "youtube\\.com/watch\\?v=([\\-a-zA-Z0-9_]{11})";
}
else if(url.contains("youtu.be")) {
pat = "youtu\\.be/([a-zA-Z0-9_-]{11})";
}
else {
throw new ParsingException("Error no suitable url: " + url);
}
id = matchGroup1(pat, url);
if(!id.isEmpty()){
//Log.i(TAG, "string \""+url+"\" matches!");
return id;
} else {
throw new ParsingException("Error could not parse url: " + url);
}
}
@SuppressWarnings("WeakerAccess")
@Override
public String getVideoUrl(String videoId) {
return "https://www.youtube.com/watch?v=" + videoId;
}
/**Attempts to parse (and return) the offset to start playing the video from.
* @return the offset (in seconds), or 0 if no timestamp is found.*/
@Override
@ -338,8 +311,8 @@ public class YoutubeVideoExtractor extends VideoExtractor {
//todo: add unit test for timestamp
String timeStamp;
try {
timeStamp = matchGroup1("((#|&|\\?)t=\\d{0,3}h?\\d{0,3}m?\\d{1,3}s?)", pageUrl);
} catch (RegexException e) {
timeStamp = RegexHelper.matchGroup1("((#|&|\\?)t=\\d{0,3}h?\\d{0,3}m?\\d{1,3}s?)", pageUrl);
} catch (RegexHelper.RegexException e) {
// catch this instantly since an url does not necessarily have to have a time stamp
// -2 because well the testing system will then know its the regex that failed :/
@ -354,15 +327,15 @@ public class YoutubeVideoExtractor extends VideoExtractor {
String minutesString = "";
String hoursString = "";
try {
secondsString = matchGroup1("(\\d{1,3})s", timeStamp);
minutesString = matchGroup1("(\\d{1,3})m", timeStamp);
hoursString = matchGroup1("(\\d{1,3})h", timeStamp);
secondsString = RegexHelper.matchGroup1("(\\d{1,3})s", timeStamp);
minutesString = RegexHelper.matchGroup1("(\\d{1,3})m", timeStamp);
hoursString = RegexHelper.matchGroup1("(\\d{1,3})h", timeStamp);
} catch (Exception e) {
//it could be that time is given in another method
if (secondsString.isEmpty() //if nothing was got,
&& minutesString.isEmpty()//treat as unlabelled seconds
&& hoursString.isEmpty()) {
secondsString = matchGroup1("t=(\\d{1,3})", timeStamp);
secondsString = RegexHelper.matchGroup1("t=(\\d{1,3})", timeStamp);
}
}
@ -455,72 +428,14 @@ public class YoutubeVideoExtractor extends VideoExtractor {
}
}
private VideoInfo.AudioStream[] parseDashManifest(String dashManifest, String decryptoinCode) throws RegexException, DecryptException {
if(!dashManifest.contains("/signature/")) {
String encryptedSig = matchGroup1("/s/([a-fA-F0-9\\.]+)", dashManifest);
String decryptedSig;
@Override
public UrlIdHandler getUrlIdConverter() {
return new YoutubeUrlIdHandler();
}
decryptedSig = decryptSignature(encryptedSig, decryptoinCode);
dashManifest = dashManifest.replace("/s/" + encryptedSig, "/signature/" + decryptedSig);
}
String dashDoc;
try {
dashDoc = downloader.download(dashManifest);
} catch(IOException ioe) {
throw new DecryptException("Could not get dash mpd", ioe);
}
Vector<VideoInfo.AudioStream> audioStreams = new Vector<>();
try {
XmlPullParser parser = Xml.newPullParser();
parser.setInput(new StringReader(dashDoc));
String tagName = "";
String currentMimeType = "";
int currentBandwidth = -1;
int currentSamplingRate = -1;
boolean currentTagIsBaseUrl = false;
for(int eventType = parser.getEventType();
eventType != XmlPullParser.END_DOCUMENT;
eventType = parser.next() ) {
switch(eventType) {
case XmlPullParser.START_TAG:
tagName = parser.getName();
if(tagName.equals("AdaptationSet")) {
currentMimeType = parser.getAttributeValue(XmlPullParser.NO_NAMESPACE, "mimeType");
} else if(tagName.equals("Representation") && currentMimeType.contains("audio")) {
currentBandwidth = Integer.parseInt(
parser.getAttributeValue(XmlPullParser.NO_NAMESPACE, "bandwidth"));
currentSamplingRate = Integer.parseInt(
parser.getAttributeValue(XmlPullParser.NO_NAMESPACE, "audioSamplingRate"));
} else if(tagName.equals("BaseURL")) {
currentTagIsBaseUrl = true;
}
break;
case XmlPullParser.TEXT:
if(currentTagIsBaseUrl &&
(currentMimeType.contains("audio"))) {
int format = -1;
if(currentMimeType.equals(MediaFormat.WEBMA.mimeType)) {
format = MediaFormat.WEBMA.id;
} else if(currentMimeType.equals(MediaFormat.M4A.mimeType)) {
format = MediaFormat.M4A.id;
}
audioStreams.add(new VideoInfo.AudioStream(parser.getText(),
format, currentBandwidth, currentSamplingRate));
}
//missing break here?
case XmlPullParser.END_TAG:
if(tagName.equals("AdaptationSet")) {
currentMimeType = "";
} else if(tagName.equals("BaseURL")) {
currentTagIsBaseUrl = false;
}//no break needed here
}
}
} catch(Exception e) {
e.printStackTrace();
}
return audioStreams.toArray(new VideoInfo.AudioStream[audioStreams.size()]);
@Override
public String getPageUrl() {
return pageUrl;
}
/**Provides information about links to other videos on the video page, such as related videos.
@ -533,7 +448,7 @@ public class YoutubeVideoExtractor extends VideoExtractor {
info.webpage_url = li.select("a.content-link").first()
.attr("abs:href");
info.id = matchGroup1("v=([0-9a-zA-Z-]*)", info.webpage_url);
info.id = RegexHelper.matchGroup1("v=([0-9a-zA-Z-]*)", info.webpage_url);
//todo: check NullPointerException causing
info.title = li.select("span.title").first().text();
@ -584,15 +499,20 @@ public class YoutubeVideoExtractor extends VideoExtractor {
try {
String playerCode = downloader.download(playerUrl);
decryptionFuncName = matchGroup1("\\.sig\\|\\|([a-zA-Z0-9$]+)\\(", playerCode);
decryptionFuncName =
RegexHelper.matchGroup1("\\.sig\\|\\|([a-zA-Z0-9$]+)\\(", playerCode);
String functionPattern = "(" + decryptionFuncName.replace("$", "\\$") + "=function\\([a-zA-Z0-9_]*\\)\\{.+?\\})";
decryptionFunc = "var " + matchGroup1(functionPattern, playerCode) + ";";
String functionPattern = "("
+ decryptionFuncName.replace("$", "\\$")
+ "=function\\([a-zA-Z0-9_]*\\)\\{.+?\\})";
decryptionFunc = "var " + RegexHelper.matchGroup1(functionPattern, playerCode) + ";";
helperObjectName = matchGroup1(";([A-Za-z0-9_\\$]{2})\\...\\(", decryptionFunc);
helperObjectName = RegexHelper
.matchGroup1(";([A-Za-z0-9_\\$]{2})\\...\\(", decryptionFunc);
String helperPattern = "(var " + helperObjectName.replace("$", "\\$") + "=\\{.+?\\}\\};)";
helperObject = matchGroup1(helperPattern, playerCode);
String helperPattern = "(var "
+ helperObjectName.replace("$", "\\$") + "=\\{.+?\\}\\};)";
helperObject = RegexHelper.matchGroup1(helperPattern, playerCode);
callerFunc = callerFunc.replace("%%", decryptionFuncName);
@ -624,25 +544,8 @@ public class YoutubeVideoExtractor extends VideoExtractor {
return (result == null ? "" : result.toString());
}
private String cleanUrl(String complexUrl) throws ParsingException {
return getVideoUrl(getVideoId(complexUrl));
}
private String matchGroup1(String pattern, String input) throws RegexException {
Pattern pat = Pattern.compile(pattern);
Matcher mat = pat.matcher(input);
boolean foundMatch = mat.find();
if (foundMatch) {
return mat.group(1);
}
else {
//Log.e(TAG, "failed to find pattern \""+pattern+"\" inside of \""+input+"\"");
throw new RegexException("failed to find pattern \""+pattern+" inside of "+input+"\"");
}
}
private String findErrorReason(Document doc) {
errorMessage = doc.select("h1[id=\"unavailable-message\"]").first().text();
String errorMessage = doc.select("h1[id=\"unavailable-message\"]").first().text();
if(errorMessage.contains("GEMA")) {
// Gema sometimes blocks youtube music content in germany:
// https://www.gema.de/en/